From 7e6bcdfeed54500ca533d2f0eb12078248c43c77 Mon Sep 17 00:00:00 2001 Message-ID: <7e6bcdfeed54500ca533d2f0eb12078248c43c77.1743682382.git.ngraves@ngraves.fr> From: Nicolas Graves Date: Thu, 3 Apr 2025 11:33:58 +0200 Subject: [PATCH] samples: Use deterministic samples in Guix. --- bed_reader/_sample_data.py | 86 +++++++++----------------------------- 1 file changed, 19 insertions(+), 67 deletions(-) diff --git a/bed_reader/_sample_data.py b/bed_reader/_sample_data.py index 6ca4cc0..6a1146e 100644 --- a/bed_reader/_sample_data.py +++ b/bed_reader/_sample_data.py @@ -1,33 +1,8 @@ +import os import tempfile from pathlib import Path, PurePath from typing import Union -try: - import pooch - - """ - Load sample data. - """ - - POOCH = pooch.create( - # Use the default cache folder for the OS - path=pooch.os_cache("bed_reader"), - # The remote data is on Github - base_url="https://raw.githubusercontent.com/" - + "fastlmm/bed-sample-files/main/", - # If this is a development version, get the data from the master branch - version_dev="main", - # The registry specifies the files that can be fetched - env="BED_READER_DATA_DIR", - ) - - # Get registry file from package_data - registry_file = Path(__file__).parent / "tests/registry.txt" - # Load this registry file - POOCH.load_registry(registry_file) -except ImportError: - pooch = None - def sample_file(filepath: Union[str, Path]) -> str: """Retrieve a sample .bed file. (Also retrieves associated .fam and .bim files). @@ -40,50 +15,41 @@ def sample_file(filepath: Union[str, Path]) -> str: Returns ------- str - Local name of sample .bed file. - - - .. note:: - This function requires the :mod:`pooch` package. Install `pooch` with: - - .. code-block:: bash - - pip install --upgrade bed-reader[samples] - - - By default this function puts files under the user's cache directory. - Override this by setting - the `BED_READER_DATA_DIR` environment variable. + Local path of sample .bed file. Example -------- .. doctest:: - >>> # pip install bed-reader[samples] # if needed >>> from bed_reader import sample_file >>> >>> file_name = sample_file("small.bed") >>> print(f"The local file name is '{file_name}'") The local file name is '...small.bed' - """ - if pooch is None: - raise ImportError( - "The function sample_file() requires pooch. " - + "Install it with 'pip install --upgrade bed-reader[samples]'.", + filepath = Path(filepath) + sample_dir = os.environ.get("BED_READER_DATA_DIR") + if sample_dir is None: + raise EnvironmentError( + "BED_READER_DATA_DIR environment variable is not set. " + "This should point to the directory containing the sample files." ) - filepath = Path(filepath) - file_string = str(filepath) - if file_string.lower().endswith(".bed"): - POOCH.fetch(file_string[:-4] + ".fam") - POOCH.fetch(file_string[:-4] + ".bim") - return POOCH.fetch(file_string) + file_path = Path(sample_dir) / filepath + + # Check if file exists + if not file_path.exists(): + raise FileNotFoundError( + f"Sample file '{filepath}' not found in {sample_dir}. " + f"Make sure you're using the latest samples in BED_READER_DATA_DIR." + ) + + return str(file_path) def sample_url(filepath: Union[str, Path]) -> str: - """Retrieve a URL to a sample .bed file. (Also makes ready associated .fam and .bim files). + """Retrieve a URL to a sample .bed file. Parameters ---------- @@ -95,25 +61,11 @@ def sample_url(filepath: Union[str, Path]) -> str: str URL to sample .bed file. - - .. note:: - This function requires the :mod:`pooch` package. Install `pooch` with: - - .. code-block:: bash - - pip install --upgrade bed-reader[samples] - - - By default this function puts files under the user's cache directory. - Override this by setting - the `BED_READER_DATA_DIR` environment variable. - Example -------- .. doctest:: - >>> # pip install bed-reader[samples] # if needed >>> from bed_reader import sample_url >>> >>> url = sample_url("small.bed") -- 2.49.0