aboutsummaryrefslogtreecommitdiff
From 7e6bcdfeed54500ca533d2f0eb12078248c43c77 Mon Sep 17 00:00:00 2001
Message-ID: <7e6bcdfeed54500ca533d2f0eb12078248c43c77.1743682382.git.ngraves@ngraves.fr>
From: Nicolas Graves <ngraves@ngraves.fr>
Date: Thu, 3 Apr 2025 11:33:58 +0200
Subject: [PATCH] samples: Use deterministic samples in Guix.

---
 bed_reader/_sample_data.py | 86 +++++++++-----------------------------
 1 file changed, 19 insertions(+), 67 deletions(-)

diff --git a/bed_reader/_sample_data.py b/bed_reader/_sample_data.py
index 6ca4cc0..6a1146e 100644
--- a/bed_reader/_sample_data.py
+++ b/bed_reader/_sample_data.py
@@ -1,33 +1,8 @@
+import os
 import tempfile
 from pathlib import Path, PurePath
 from typing import Union
 
-try:
-    import pooch
-
-    """
-    Load sample data.
-    """
-
-    POOCH = pooch.create(
-        # Use the default cache folder for the OS
-        path=pooch.os_cache("bed_reader"),
-        # The remote data is on Github
-        base_url="https://raw.githubusercontent.com/"
-        + "fastlmm/bed-sample-files/main/",
-        # If this is a development version, get the data from the master branch
-        version_dev="main",
-        # The registry specifies the files that can be fetched
-        env="BED_READER_DATA_DIR",
-    )
-
-    # Get registry file from package_data
-    registry_file = Path(__file__).parent / "tests/registry.txt"
-    # Load this registry file
-    POOCH.load_registry(registry_file)
-except ImportError:
-    pooch = None
-
 
 def sample_file(filepath: Union[str, Path]) -> str:
     """Retrieve a sample .bed file. (Also retrieves associated .fam and .bim files).
@@ -40,50 +15,41 @@ def sample_file(filepath: Union[str, Path]) -> str:
     Returns
     -------
     str
-        Local name of sample .bed file.
-
-
-    .. note::
-        This function requires the :mod:`pooch` package. Install `pooch` with:
-
-        .. code-block:: bash
-
-            pip install --upgrade bed-reader[samples]
-
-
-    By default this function puts files under the user's cache directory.
-    Override this by setting
-    the `BED_READER_DATA_DIR` environment variable.
+        Local path of sample .bed file.
 
     Example
     --------
 
     .. doctest::
 
-        >>> # pip install bed-reader[samples]  # if needed
         >>> from bed_reader import sample_file
         >>>
         >>> file_name = sample_file("small.bed")
         >>> print(f"The local file name is '{file_name}'")
         The local file name is '...small.bed'
-
     """
-    if pooch is None:
-        raise ImportError(
-            "The function sample_file() requires pooch. "
-            + "Install it with 'pip install --upgrade bed-reader[samples]'.",
+    filepath = Path(filepath)
+    sample_dir = os.environ.get("BED_READER_DATA_DIR")
+    if sample_dir is None:
+        raise EnvironmentError(
+            "BED_READER_DATA_DIR environment variable is not set. "
+            "This should point to the directory containing the sample files."
         )
 
-    filepath = Path(filepath)
-    file_string = str(filepath)
-    if file_string.lower().endswith(".bed"):
-        POOCH.fetch(file_string[:-4] + ".fam")
-        POOCH.fetch(file_string[:-4] + ".bim")
-    return POOCH.fetch(file_string)
+    file_path = Path(sample_dir) / filepath
+
+    # Check if file exists
+    if not file_path.exists():
+        raise FileNotFoundError(
+            f"Sample file '{filepath}' not found in {sample_dir}. "
+            f"Make sure you're using the latest samples in BED_READER_DATA_DIR."
+        )
+
+    return str(file_path)
 
 
 def sample_url(filepath: Union[str, Path]) -> str:
-    """Retrieve a URL to a sample .bed file. (Also makes ready associated .fam and .bim files).
+    """Retrieve a URL to a sample .bed file.
 
     Parameters
     ----------
@@ -95,25 +61,11 @@ def sample_url(filepath: Union[str, Path]) -> str:
     str
         URL to sample .bed file.
 
-
-    .. note::
-        This function requires the :mod:`pooch` package. Install `pooch` with:
-
-        .. code-block:: bash
-
-            pip install --upgrade bed-reader[samples]
-
-
-    By default this function puts files under the user's cache directory.
-    Override this by setting
-    the `BED_READER_DATA_DIR` environment variable.
-
     Example
     --------
 
     .. doctest::
 
-        >>> # pip install bed-reader[samples]  # if needed
         >>> from bed_reader import sample_url
         >>>
         >>> url = sample_url("small.bed")
-- 
2.49.0