diff options
author | Nicolas Graves <ngraves@ngraves.fr> | 2025-04-04 21:16:55 +0200 |
---|---|---|
committer | Andreas Enge <andreas@enge.fr> | 2025-04-16 11:46:28 +0200 |
commit | db14ce5c4d413d513f25e41119fca522af4acd94 (patch) | |
tree | 730ed4b513dc4c2cc2d4c0199894f05c997cedd8 | |
parent | e8e46bbeb599f608627c869f654b1598bc09837f (diff) | |
download | guix-db14ce5c4d413d513f25e41119fca522af4acd94.tar.gz guix-db14ce5c4d413d513f25e41119fca522af4acd94.zip |
gnu: python-bed-reader: Fix build and enable tests.
* gnu/packages/bioinformatics.scm (python-bed-reader): Fix build and enable tests.
[source]{snippet}: Delete bundled website-related javascript.
{patches}: Use the store-cached instead of pooch-cached samples.
[arguments]{tests?}: Enable them.
{cargo-test-flags}: Skip doc tests. Skip failing tests.
{cargo-inputs}: Improve style.
{cargo-development-inputs}: Improve style.
{phases}: Add phases 'set-data-path, 'patch-data-path to use
store-cached samples for library and tests. Rewrite phase
'prepare-python-module to rely more on the existing info in
pyproject.toml. Rewrite phase 'check-python entirely, and
marginaly rewrite phase 'install-python-library to match 'check-python
phase style.
{modules}: Adapt accordingly.
{propagated-inputs}: Remove python-pooch. Add python-scipy.
(bed-sample-files): Add origin, used in python-bed-reader.
* gnu/packages/patches/python-bed-reader-use-store-samples.patch: Add
patch.
* gnu/local.mk: Record patch.
Signed-off-by: Sharlatan Hellseher <sharlatanus@gmail.com>
-rw-r--r-- | gnu/local.mk | 1 | ||||
-rw-r--r-- | gnu/packages/bioinformatics.scm | 195 | ||||
-rw-r--r-- | gnu/packages/patches/python-bed-reader-use-store-samples.patch | 147 |
3 files changed, 284 insertions, 59 deletions
diff --git a/gnu/local.mk b/gnu/local.mk index ce4226e5d8..aa9f279a3c 100644 --- a/gnu/local.mk +++ b/gnu/local.mk @@ -2009,6 +2009,7 @@ dist_patch_DATA = \ %D%/packages/patches/python-3.12-fix-tests.patch \ %D%/packages/patches/python-accupy-use-matplotx.patch \ %D%/packages/patches/python-accupy-fix-use-of-perfplot.patch \ + %D%/packages/patches/python-bed-reader-use-store-samples.patch \ %D%/packages/patches/python-chai-drop-python2.patch \ %D%/packages/patches/python-clarabel-blas.patch \ %D%/packages/patches/python-docrepr-fix-tests.patch \ diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index b5839f9763..2f2269b9ec 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -2185,6 +2185,20 @@ Format (GFF) with Biopython integration.") (modify-inputs (package-propagated-inputs python-bcbio-gff) (replace "python-biopython" python-biopython-1.73)))))) +(define bed-sample-files + (let* ((name "bed-sample-files") + (commit "a06dc0450e484090f15656ffd5d317813a5e1e01") + (revision "0") + (version (git-version "0.0.0" revision commit))) + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/fastlmm/bed-sample-files") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 "1ldr2lvgbcykxa9i2s2298mhfh0sz96aaxs5dx217aipa9vsrjwk"))))) + (define-public python-bed-reader (package (name "python-bed-reader") @@ -2194,80 +2208,124 @@ Format (GFF) with Biopython integration.") (method url-fetch) (uri (pypi-uri "bed_reader" version)) (sha256 - (base32 "1c8ibwvz3b069w7ffh9aasz16lfkmx4z0249c2v909a21mrkkd6n")))) + (base32 "1c8ibwvz3b069w7ffh9aasz16lfkmx4z0249c2v909a21mrkkd6n")) + (modules '((guix build utils))) + ;; Bundled unused javascript & co. + (snippet #~(delete-file-recursively "_static")) + (patches + (search-patches "python-bed-reader-use-store-samples.patch")))) (build-system cargo-build-system) (arguments (list - ;; Many of the tests (both the Rust tests and the Python tests) require - ;; Internet access to fetch samples. - #:tests? #false #:install-source? #false #:features '(list "extension-module") - #:cargo-test-flags '(list "--features=extension-module") + #:cargo-test-flags + '(list "--features=extension-module" + ;; Skip doc tests. + "--lib" "--bins" "--tests" "--" + ;; This test is the only one not matched by our regexp. + "--skip=http_one" + ;; These test require a 84 GB file. + "--skip=http_two" + "--skip=http_cloud_urls_md_3") #:cargo-inputs - `(("rust-anyinput" ,rust-anyinput-0.1) - ("rust-bytecount" ,rust-bytecount-0.6) - ("rust-byteorder" ,rust-byteorder-1) - ("rust-bytes" ,rust-bytes-1) - ("rust-cloud-file" ,rust-cloud-file-0.2) - ("rust-derive-builder" ,rust-derive-builder-0.20) - ("rust-dpc-pariter" ,rust-dpc-pariter-0.4) - ("rust-fetch-data" ,rust-fetch-data-0.2) - ("rust-futures-util" ,rust-futures-util-0.3) - ("rust-itertools" ,rust-itertools-0.13) - ("rust-ndarray" ,rust-ndarray-0.16) - ("rust-ndarray-npy" ,rust-ndarray-npy-0.9) - ("rust-num-traits" ,rust-num-traits-0.2) - ("rust-numpy" ,rust-numpy-0.22) - ("rust-pyo3" ,rust-pyo3-0.22) - ("rust-pyo3-build-config" ,rust-pyo3-build-config-0.22) - ("rust-rayon" ,rust-rayon-1) - ("rust-statrs" ,rust-statrs-0.17) - ("rust-thiserror" ,rust-thiserror-1) - ("rust-tokio" ,rust-tokio-1)) + (list rust-anyinput-0.1 + rust-bytecount-0.6 + rust-byteorder-1 + rust-bytes-1 + rust-cloud-file-0.2 + rust-derive-builder-0.20 + rust-dpc-pariter-0.4 + rust-fetch-data-0.2 + rust-futures-util-0.3 + rust-itertools-0.13 + rust-ndarray-0.16 + rust-ndarray-npy-0.9 + rust-num-traits-0.2 + rust-numpy-0.22 + rust-pyo3-0.22 + rust-pyo3-build-config-0.22 + rust-rayon-1 + rust-statrs-0.17 + rust-thiserror-1 + rust-tokio-1) #:cargo-development-inputs - `(("rust-anyhow" ,rust-anyhow-1) - ("rust-ndarray-rand" ,rust-ndarray-rand-0.15) - ("rust-rusoto-credential" ,rust-rusoto-credential-0.48) - ("rust-temp-testdir" ,rust-temp-testdir-0.2) - ("rust-thousands" ,rust-thousands-0.2)) + (list rust-anyhow-1 + rust-ndarray-rand-0.15 + rust-rusoto-credential-0.48 + rust-temp-testdir-0.2 + rust-thousands-0.2) #:imported-modules (append %cargo-build-system-modules %pyproject-build-system-modules) #:modules '((guix build cargo-build-system) ((guix build pyproject-build-system) #:prefix py:) - (guix build utils)) + (guix build utils) + (ice-9 match) + (ice-9 rdelim)) #:phases #~(modify-phases %standard-phases + (add-after 'configure 'set-data-path + (lambda _ + ;; This var is still necessary despite the patch-data-path phase. + ;; Otherwise more tests fail with a read-only filesystem error. + (setenv "BED_READER_DATA_DIR" #+bed-sample-files))) + (add-after 'unpack 'patch-data-path + (lambda _ + ;; If BED_READER_DATA_DIR is unset, default to bed-sample-files. + (substitute* "bed_reader/_sample_data.py" + (("os\\.environ\\.get\\(\"BED_READER_DATA_DIR\"" all) + (format #f "~a, ~s" all #+bed-sample-files))) + ;; XXX: More work is necessary to use another + ;; version of sample files with BED_READER_DATA_DIR + ;; Currently, only the hardcoded Guix version is working. + (substitute* '("bed_reader/tests/test_open_bed_cloud.py" + "src/bed_cloud.rs" + "src/lib.rs" + "src/supplemental_documents/cloud_urls_etc.md" + "tests/tests_api_cloud.rs") + (("\ +https://raw\\.githubusercontent\\.com/fastlmm/bed-sample-files/main") + (string-append "file://" #+bed-sample-files))) + (substitute* "src/tests.rs" + (("bed_reader/tests/data") + #+bed-sample-files)))) (add-after 'install 'prepare-python-module (lambda _ - ;; We don't use maturin. - (delete-file "pyproject.toml") - (call-with-output-file "pyproject.toml" - (lambda (port) - (format port "\ + ;; We don't use maturin. Conveniently, what we want to drop + ;; from pyproject.toml is at the end of the file. + (rename-file "pyproject.toml" "pyproject.toml.bak") + (call-with-input-file "pyproject.toml.bak" + (lambda (in) + (call-with-output-file "pyproject.toml" + (lambda (out) + (let loop () + (match (read-line in) + ((? eof-object? eof) + eof) + ("[build-system]" + (and (format out "\ [build-system] build-backend = 'setuptools.build_meta' requires = ['setuptools'] -"))) - (call-with-output-file "setup.cfg" - (lambda (port) - (format port "\ -[metadata] -name = bed-reader -version = ~a -[options] -packages = find: - -[options.packages.find] -exclude = - src - docs - tests - Cargo.toml -" #$version))))) +[tool.setuptools.packages.find] +where = [\".\"] +exclude = [\"src\", \"docs\", \"tests\", \"Cargo.toml\"] +"))) + ("samples = [\"pooch>=1.5.0\"]" + (and (format out "samples = []~%") + (loop))) + ("[project]" + (and (format out "\ +[project] +version = ~s +" #$version) + (loop))) + (line + (and (format out "~a~%" line) + (loop))))))))))) (add-after 'prepare-python-module 'enable-bytecode-determinism (assoc-ref py:%standard-phases 'enable-bytecode-determinism)) (add-after 'enable-bytecode-determinism 'build-python-module @@ -2279,16 +2337,35 @@ exclude = (let ((site (string-append #$output "/lib/python" #$(version-major+minor (package-version python)) - "/site-packages"))) - (mkdir-p site) + "/site-packages/"))) + (mkdir-p (string-append site "bed_reader")) (copy-file "target/release/libbed_reader.so" - (string-append site "/bed_reader/bed_reader.so"))))) + (string-append site "bed_reader/bed_reader.so"))))) (add-after 'install-python-library 'add-install-to-pythonpath (assoc-ref py:%standard-phases 'add-install-to-pythonpath)) (add-after 'add-install-to-pythonpath 'check-python - (lambda* (#:key tests? test-flags #:allow-other-keys) + (lambda* (#:key tests? #:allow-other-keys) (when tests? - (apply invoke "pytest" "-v" #$output test-flags))))))) + (let ((site (string-append #$output "/lib/python" + #$(version-major+minor + (package-version python)) + "/site-packages/")) + (data-dir "bed_reader/tests/data")) + (symlink (canonicalize-path data-dir) + (string-append site data-dir)) + (invoke "pytest" "-v" #$output + ;; These test require a 84 GB file. + "-k" (string-join + (list "not test_http_two" + "test_http_cloud_urls_rst_3" + "test_http_cloud_urls_rst_4" + ;; XXX: python-pooch dependency removed + "test_optional_dependencies") + " and not ")) + (delete-file-recursively + (string-append site "bed_reader/tests")) + (delete-file-recursively + (string-append #$output "/.pytest_cache"))))))))) (native-inputs (list python-pytest python-pytest-cov python-pytest-datadir @@ -2296,7 +2373,7 @@ exclude = python-recommonmark python-sphinx)) (inputs (list python-wrapper)) - (propagated-inputs (list python-numpy python-pandas python-pooch)) + (propagated-inputs (list python-numpy python-pandas python-scipy)) (home-page "https://fastlmm.github.io/") (synopsis "Read and write the PLINK BED format, simply and efficiently") (description diff --git a/gnu/packages/patches/python-bed-reader-use-store-samples.patch b/gnu/packages/patches/python-bed-reader-use-store-samples.patch new file mode 100644 index 0000000000..813f155225 --- /dev/null +++ b/gnu/packages/patches/python-bed-reader-use-store-samples.patch @@ -0,0 +1,147 @@ +From 7e6bcdfeed54500ca533d2f0eb12078248c43c77 Mon Sep 17 00:00:00 2001 +Message-ID: <7e6bcdfeed54500ca533d2f0eb12078248c43c77.1743682382.git.ngraves@ngraves.fr> +From: Nicolas Graves <ngraves@ngraves.fr> +Date: Thu, 3 Apr 2025 11:33:58 +0200 +Subject: [PATCH] samples: Use deterministic samples in Guix. + +--- + bed_reader/_sample_data.py | 86 +++++++++----------------------------- + 1 file changed, 19 insertions(+), 67 deletions(-) + +diff --git a/bed_reader/_sample_data.py b/bed_reader/_sample_data.py +index 6ca4cc0..6a1146e 100644 +--- a/bed_reader/_sample_data.py ++++ b/bed_reader/_sample_data.py +@@ -1,33 +1,8 @@ ++import os + import tempfile + from pathlib import Path, PurePath + from typing import Union + +-try: +- import pooch +- +- """ +- Load sample data. +- """ +- +- POOCH = pooch.create( +- # Use the default cache folder for the OS +- path=pooch.os_cache("bed_reader"), +- # The remote data is on Github +- base_url="https://raw.githubusercontent.com/" +- + "fastlmm/bed-sample-files/main/", +- # If this is a development version, get the data from the master branch +- version_dev="main", +- # The registry specifies the files that can be fetched +- env="BED_READER_DATA_DIR", +- ) +- +- # Get registry file from package_data +- registry_file = Path(__file__).parent / "tests/registry.txt" +- # Load this registry file +- POOCH.load_registry(registry_file) +-except ImportError: +- pooch = None +- + + def sample_file(filepath: Union[str, Path]) -> str: + """Retrieve a sample .bed file. (Also retrieves associated .fam and .bim files). +@@ -40,50 +15,41 @@ def sample_file(filepath: Union[str, Path]) -> str: + Returns + ------- + str +- Local name of sample .bed file. +- +- +- .. note:: +- This function requires the :mod:`pooch` package. Install `pooch` with: +- +- .. code-block:: bash +- +- pip install --upgrade bed-reader[samples] +- +- +- By default this function puts files under the user's cache directory. +- Override this by setting +- the `BED_READER_DATA_DIR` environment variable. ++ Local path of sample .bed file. + + Example + -------- + + .. doctest:: + +- >>> # pip install bed-reader[samples] # if needed + >>> from bed_reader import sample_file + >>> + >>> file_name = sample_file("small.bed") + >>> print(f"The local file name is '{file_name}'") + The local file name is '...small.bed' +- + """ +- if pooch is None: +- raise ImportError( +- "The function sample_file() requires pooch. " +- + "Install it with 'pip install --upgrade bed-reader[samples]'.", ++ filepath = Path(filepath) ++ sample_dir = os.environ.get("BED_READER_DATA_DIR") ++ if sample_dir is None: ++ raise EnvironmentError( ++ "BED_READER_DATA_DIR environment variable is not set. " ++ "This should point to the directory containing the sample files." + ) + +- filepath = Path(filepath) +- file_string = str(filepath) +- if file_string.lower().endswith(".bed"): +- POOCH.fetch(file_string[:-4] + ".fam") +- POOCH.fetch(file_string[:-4] + ".bim") +- return POOCH.fetch(file_string) ++ file_path = Path(sample_dir) / filepath ++ ++ # Check if file exists ++ if not file_path.exists(): ++ raise FileNotFoundError( ++ f"Sample file '{filepath}' not found in {sample_dir}. " ++ f"Make sure you're using the latest samples in BED_READER_DATA_DIR." ++ ) ++ ++ return str(file_path) + + + def sample_url(filepath: Union[str, Path]) -> str: +- """Retrieve a URL to a sample .bed file. (Also makes ready associated .fam and .bim files). ++ """Retrieve a URL to a sample .bed file. + + Parameters + ---------- +@@ -95,25 +61,11 @@ def sample_url(filepath: Union[str, Path]) -> str: + str + URL to sample .bed file. + +- +- .. note:: +- This function requires the :mod:`pooch` package. Install `pooch` with: +- +- .. code-block:: bash +- +- pip install --upgrade bed-reader[samples] +- +- +- By default this function puts files under the user's cache directory. +- Override this by setting +- the `BED_READER_DATA_DIR` environment variable. +- + Example + -------- + + .. doctest:: + +- >>> # pip install bed-reader[samples] # if needed + >>> from bed_reader import sample_url + >>> + >>> url = sample_url("small.bed") +-- +2.49.0 + |