diff options
author | Troy Figiel <troy@troyfigiel.com> | 2024-01-11 22:17:50 +0100 |
---|---|---|
committer | Mathieu Othacehe <othacehe@gnu.org> | 2024-01-17 11:18:00 +0100 |
commit | dac977ea4ea208a14e3b4615c386e9b8d0f4ed6f (patch) | |
tree | 8c5b805b8957d8ec169fa0d812db54b7b8744e3a /gnu | |
parent | 23cb6187f6ed6605c84753fd0e0783f812b69fc7 (diff) | |
download | guix-dac977ea4ea208a14e3b4615c386e9b8d0f4ed6f.tar.gz guix-dac977ea4ea208a14e3b4615c386e9b8d0f4ed6f.zip |
gnu: Add python-pandera.
* gnu/packages/python-science.scm (python-pandera): New variable.
Diffstat (limited to 'gnu')
-rw-r--r-- | gnu/packages/python-science.scm | 75 |
1 files changed, 75 insertions, 0 deletions
diff --git a/gnu/packages/python-science.scm b/gnu/packages/python-science.scm index 8e492bbaa5..c57fd1f23c 100644 --- a/gnu/packages/python-science.scm +++ b/gnu/packages/python-science.scm @@ -634,6 +634,81 @@ a convention of suggesting best recommended practices for using @code{python-pandas}.") (license license:bsd-3))) +(define-public python-pandera + (package + (name "python-pandera") + (version "0.17.2") + (source + (origin + ;; No tests in the PyPI tarball. + (method git-fetch) + (uri (git-reference + (url "https://github.com/unionai-oss/pandera") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 "1mnqk583z90k1n0z3lfa4rd0ng40v7hqfk7phz5gjmxlzfjbxa1x")) + (modules '((guix build utils))) + ;; These tests require PySpark. We need to remove the entire directory, + ;; since the conftest.py in this directory contains a PySpark import. + ;; (See: https://github.com/pytest-dev/pytest/issues/7452) + (snippet '(delete-file-recursively "tests/pyspark")))) + (build-system pyproject-build-system) + (arguments + (list + #:test-flags '(list "-k" + (string-append + ;; Needs python-pandas >= 1.5 + "not test_python_std_list_dict_generics" + " and not test_python_std_list_dict_empty_and_none" + " and not test_pandas_modules_importable")))) + ;; Pandera comes with a lot of extras. We test as many as possible, but do + ;; not include all of them in the propagated-inputs. Currently, we have to + ;; skip the pyspark and io tests due to missing packages python-pyspark + ;; and python-frictionless. + (propagated-inputs (list python-hypothesis ;strategies extra + python-multimethod + python-numpy + python-packaging + python-pandas + python-pandas-stubs ;mypy extra + python-pydantic + python-scipy ;hypotheses extra + python-typeguard-4 + python-typing-inspect + python-wrapt)) + (native-inputs (list python-dask ;dask extra + python-fastapi ;fastapi extra + python-geopandas ;geopandas extra + python-modin ;modin extra + python-pyarrow ;needed to run fastapi tests + python-pytest + python-pytest-asyncio + python-sphinx + python-uvicorn)) ;needed to run fastapi tests + (home-page "https://github.com/unionai-oss/pandera") + (synopsis "Perform data validation on dataframe-like objects") + (description + "@code{python-pandera} provides a flexible and expressive API for +performing data validation on dataframe-like objects to make data processing +pipelines more readable and robust. Dataframes contain information that +@code{python-pandera} explicitly validates at runtime. This is useful in +production-critical data pipelines or reproducible research settings. With +@code{python-pandera}, you can: + +@itemize +@item Define a schema once and use it to validate different dataframe types. +@item Check the types and properties of columns. +@item Perform more complex statistical validation like hypothesis testing. +@item Seamlessly integrate with existing data pipelines via function decorators. +@item Define dataframe models with the class-based API with pydantic-style syntax. +@item Synthesize data from schema objects for property-based testing. +@item Lazily validate dataframes so that all validation rules are executed. +@item Integrate with a rich ecosystem of tools like @code{python-pydantic}, +@code{python-fastapi} and @code{python-mypy}. +@end itemize") + (license license:expat))) + (define-public python-pythran (package (name "python-pythran") |