1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
|
From 7e6bcdfeed54500ca533d2f0eb12078248c43c77 Mon Sep 17 00:00:00 2001
Message-ID: <7e6bcdfeed54500ca533d2f0eb12078248c43c77.1743682382.git.ngraves@ngraves.fr>
From: Nicolas Graves <ngraves@ngraves.fr>
Date: Thu, 3 Apr 2025 11:33:58 +0200
Subject: [PATCH] samples: Use deterministic samples in Guix.
---
bed_reader/_sample_data.py | 86 +++++++++-----------------------------
1 file changed, 19 insertions(+), 67 deletions(-)
diff --git a/bed_reader/_sample_data.py b/bed_reader/_sample_data.py
index 6ca4cc0..6a1146e 100644
--- a/bed_reader/_sample_data.py
+++ b/bed_reader/_sample_data.py
@@ -1,33 +1,8 @@
+import os
import tempfile
from pathlib import Path, PurePath
from typing import Union
-try:
- import pooch
-
- """
- Load sample data.
- """
-
- POOCH = pooch.create(
- # Use the default cache folder for the OS
- path=pooch.os_cache("bed_reader"),
- # The remote data is on Github
- base_url="https://raw.githubusercontent.com/"
- + "fastlmm/bed-sample-files/main/",
- # If this is a development version, get the data from the master branch
- version_dev="main",
- # The registry specifies the files that can be fetched
- env="BED_READER_DATA_DIR",
- )
-
- # Get registry file from package_data
- registry_file = Path(__file__).parent / "tests/registry.txt"
- # Load this registry file
- POOCH.load_registry(registry_file)
-except ImportError:
- pooch = None
-
def sample_file(filepath: Union[str, Path]) -> str:
"""Retrieve a sample .bed file. (Also retrieves associated .fam and .bim files).
@@ -40,50 +15,41 @@ def sample_file(filepath: Union[str, Path]) -> str:
Returns
-------
str
- Local name of sample .bed file.
-
-
- .. note::
- This function requires the :mod:`pooch` package. Install `pooch` with:
-
- .. code-block:: bash
-
- pip install --upgrade bed-reader[samples]
-
-
- By default this function puts files under the user's cache directory.
- Override this by setting
- the `BED_READER_DATA_DIR` environment variable.
+ Local path of sample .bed file.
Example
--------
.. doctest::
- >>> # pip install bed-reader[samples] # if needed
>>> from bed_reader import sample_file
>>>
>>> file_name = sample_file("small.bed")
>>> print(f"The local file name is '{file_name}'")
The local file name is '...small.bed'
-
"""
- if pooch is None:
- raise ImportError(
- "The function sample_file() requires pooch. "
- + "Install it with 'pip install --upgrade bed-reader[samples]'.",
+ filepath = Path(filepath)
+ sample_dir = os.environ.get("BED_READER_DATA_DIR")
+ if sample_dir is None:
+ raise EnvironmentError(
+ "BED_READER_DATA_DIR environment variable is not set. "
+ "This should point to the directory containing the sample files."
)
- filepath = Path(filepath)
- file_string = str(filepath)
- if file_string.lower().endswith(".bed"):
- POOCH.fetch(file_string[:-4] + ".fam")
- POOCH.fetch(file_string[:-4] + ".bim")
- return POOCH.fetch(file_string)
+ file_path = Path(sample_dir) / filepath
+
+ # Check if file exists
+ if not file_path.exists():
+ raise FileNotFoundError(
+ f"Sample file '{filepath}' not found in {sample_dir}. "
+ f"Make sure you're using the latest samples in BED_READER_DATA_DIR."
+ )
+
+ return str(file_path)
def sample_url(filepath: Union[str, Path]) -> str:
- """Retrieve a URL to a sample .bed file. (Also makes ready associated .fam and .bim files).
+ """Retrieve a URL to a sample .bed file.
Parameters
----------
@@ -95,25 +61,11 @@ def sample_url(filepath: Union[str, Path]) -> str:
str
URL to sample .bed file.
-
- .. note::
- This function requires the :mod:`pooch` package. Install `pooch` with:
-
- .. code-block:: bash
-
- pip install --upgrade bed-reader[samples]
-
-
- By default this function puts files under the user's cache directory.
- Override this by setting
- the `BED_READER_DATA_DIR` environment variable.
-
Example
--------
.. doctest::
- >>> # pip install bed-reader[samples] # if needed
>>> from bed_reader import sample_url
>>>
>>> url = sample_url("small.bed")
--
2.49.0
|