;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019 Ludovic Courtès ;;; Copyright © 2017 Jan Nieuwenhuizen ;;; Copyright © 2018 Clément Lassieur ;;; ;;; This file is part of GNU Guix. ;;; ;;; GNU Guix is free software; you can redistribute it and/or modify it ;;; under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 3 of the License, or (at ;;; your option) any later version. ;;; ;;; GNU Guix is distributed in the hope that it will be useful, but ;;; WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with GNU Guix. If not, see . (define-module (gnu ci) #:use-module (guix config) #:use-m
aboutsummaryrefslogtreecommitdiff
;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2015-2024 Ricardo Wurmus <rekado@elephly.net>
;;; Copyright © 2016, 2017, 2018, 2020, 2021 Roel Janssen <roel@gnu.org>
;;; Copyright © 2016 Pjotr Prins <pjotr.guix@thebird.nl>
;;; Copyright © 2016 Ben Woodcroft <donttrustben@gmail.com>
;;; Copyright © 2017, 2022, 2024 Efraim Flashner <efraim@flashner.co.il>
;;; Copyright © 2017, 2018, 2019, 2020, 2021 Tobias Geerinckx-Rice <me@tobias.gr>
;;; Copyright © 2019, 2020, 2021, 2022, 2023 Simon Tournier <zimon.toutoune@gmail.com>
;;; Copyright © 2020 Peter Lo <peterloleungyau@gmail.com>
;;; Copyright © 2020-2023 Mădălin Ionel Patrașcu <madalinionel.patrascu@mdc-berlin.de>
;;; Copyright © 2020 Jakub Kądziołka <kuba@kadziolka.net>
;;; Copyright © 2021 Hong Li <hli@mdc-berlin.de>
;;; Copyright © 2021 Tim Howes <timhowes@lavabit.com>
;;; Copyright © 2021 Nicolas Vallet <nls.vallet@gmail.com>
;;; Copyright © 2023 Navid Afkhami <Navid.Afkhami@mdc-berlin.de>
;;; Copyright © 2024 Spencer King <spencer.king@geneoscopy.com>
;;;
;;; This file is part of GNU Guix.
;;;
;;; GNU Guix is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; GNU Guix is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.

(define-module (gnu packages bioconductor)
  #:use-module ((guix licenses) #:prefix license:)
  #:use-module (guix packages)
  #:use-module (guix download)
  #:use-module (guix gexp)
  #:use-module (guix utils)
  #:use-module (guix git-download)
  #:use-module (guix build-system r)
  #:use-module (gnu packages)
  #:use-module (gnu packages algebra)
  #:use-module (gnu packages autotools)
  #:use-module (gnu packages base)
  #:use-module (gnu packages bioinformatics)
  #:use-module (gnu packages boost)
  #:use-module (gnu packages chemistry)
  #:use-module (gnu packages cran)
  #:use-module (gnu packages compression)
  #:use-module (gnu packages curl)
  #:use-module (gnu packages docker)
  #:use-module (gnu packages gcc)
  #:use-module (gnu packages graph)
  #:use-module (gnu packages graphviz)
  #:use-module (gnu packages haskell-xyz)
  #:use-module (gnu packages image)
  #:use-module (gnu packages java)
  #:use-module (gnu packages javascript)
  #:use-module (gnu packages maths)
  #:use-module (gnu packages ncurses)
  #:use-module (gnu packages netpbm)
  #:use-module (gnu packages python)
  #:use-module (gnu packages perl)
  #:use-module (gnu packages pkg-config)
  #:use-module (gnu packages protobuf)
  #:use-module (gnu packages statistics)
  #:use-module (gnu packages tls)
  #:use-module (gnu packages web)
  #:use-module (gnu packages xml)
  #:use-module ((srfi srfi-1) #:hide (zip)))


;;; Annotations

(define-public r-bsgenome-hsapiens-ucsc-hg38-masked
  (package
    (name "r-bsgenome-hsapiens-ucsc-hg38-masked")
    (version "1.4.5")
    (source
     (origin
       (method url-fetch)
       (uri (bioconductor-uri "BSgenome.Hsapiens.UCSC.hg38.masked" version
                              'annotation))
       (sha256
        (base32 "0j71hdxqvvc0s8mc6jp6zk502mrf095qazj95yzzb4rm6sjvd20m"))))
    (properties `((upstream-name . "BSgenome.Hsapiens.UCSC.hg38.masked")))
    (build-system r-build-system)
    (propagated-inputs (list r-bsgenome r-bsgenome-hsapiens-ucsc-hg38
                             r-genomeinfodb))
    (home-page
     "https://bioconductor.org/packages/BSgenome.Hsapiens.UCSC.hg38.masked")
    (synopsis
     "Full masked genomic sequences for Homo sapiens (UCSC version hg38)")
    (description
     "This package provides the complete genome sequences for Homo sapiens as
provided by UCSC (genome hg38, based on assembly GRCh38.p14 since 2023/01/31).
The sequences are the same as in BSgenome.Hsapiens.UCSC.hg38, except that each
of them has the 4 following masks on top:

@enumerate
@item the mask of assembly gaps (AGAPS mask);
@item the mask of intra-contig ambiguities (AMB mask);
@item the mask of repeats from @code{RepeatMasker} (RM mask);
@item the mask of repeats from Tandem Repeats Finder (TRF mask).
@end enumerate

Only the AGAPS and AMB masks are \"active\" by default.  The sequences are stored
in @code{MaskedDNAString} objects.")
    (license license:artistic2.0)))

(define-public r-ensdb-hsapiens-v79
  (package
    (name "r-ensdb-hsapiens-v79")
    (version "2.99.0")
    (source
     (origin
       (method url-fetch)
       (uri (bioconductor-uri "EnsDb.Hsapiens.v79" version
                              'annotation))
       (sha256
        (base32 "0k94iml0417m3k086d0bzd83fndyb2kn7pimsfzcdmafgy6sxwgg"))))
    (properties `((upstream-name . "EnsDb.Hsapiens.v79")))
    (build-system r-build-system)
    (propagated-inputs (list r-ensembldb))
    (home-page "https://bioconductor.org/packages/EnsDb.Hsapiens.v79")
    (synopsis "Ensembl based annotation package")
    (description "This package exposes an annotation database generated from
Ensembl.")
    (license license:artistic2.0)))

(define-public r-hpo-db
  (package
    (name "r-hpo-db")
    (version "0.99.2")
    (source
     (origin
       (method url-fetch)
       (uri (bioconductor-uri "HPO.db" version
                              'annotation))
       (sha256
        (base32 "1brzrnafvyh76h8a663gk5lprhixxpi9xi65vwgxwf7jh6yw0was"))))
    (properties `((upstream-name . "HPO.db")))
    (build-system r-build-system)
    (arguments
     (list
      #:phases
      #~(modify-phases %standard-phases
          (add-after 'unpack 'avoid-internet-access
            (lambda* (#:key inputs #:allow-other-keys)
              (let* ((cache (string-append #$output "/share/HPO.db/cache"))
                     (file (string-append cache "/118333")))
                (mkdir-p cache)
                (copy-file #$(this-package-native-input "HPO.sqlite") file)
                (substitute* "R/zzz.R"
                  (("ah <- suppressMessages\\(AnnotationHub\\(\\)\\)" m)
                   (string-append
                    "if (Sys.getenv(\"NIX_BUILD_TOP\") == \"\") { " m " };"))
                  (("dbfile <- ah.*" m)
                   (string-append
                    "if (Sys.getenv(\"NIX_BUILD_TOP\") != \"\") { dbfile <- \""
                    file "\";} else { " m " }\n")))))))))
    (propagated-inputs
     (list r-annotationdbi r-annotationhub r-biocfilecache r-dbi))
    (native-inputs
     `(("r-knitr" ,r-knitr)
       ("HPO.sqlite"
        ,(origin
           (method url-fetch)
           (uri "https://annotationhub.bioconductor.org/fetch/118333")
           (file-name "HPO.sqlite")
           (sha256
            (base32 "1wwdwf27iil0p41183qgygh2ifphhmlljjkgjm2h8sr25qycf0md"))))))
    (home-page "https://bioconductor.org/packages/HPO.db")
    (synopsis
     "Annotation maps describing the entire Human Phenotype Ontology")
    (description
     "Human Phenotype Ontology (HPO) was developed to create a consistent
description of gene products with disease perspectives, and is essential for
supporting functional genomics in disease context.  Accurate disease
descriptions can discover new relationships between genes and disease, and new
functions for previous uncharacteried genes and alleles.")
    (license license:artistic2.0)))

(define-public r-jaspar2020
  (package
    (name "r-jaspar2020")
    (version "0.99.10")
    (source
     (origin
       (method url-fetch)
       (uri (bioconductor-uri "JASPAR2020" version 'annotation))
       (sha256
        (base32 "0nrp63z7q2ivv5h87f7inpp2qll2dfgj4227l4rbnzii38a2vfdr"))))
    (properties `((upstream-name . "JASPAR2020")))
    (build-system r-build-system)
    (native-inputs (list r-knitr))
    (home-page "https://jaspar.elixir.no/")
    (synopsis "Data package for JASPAR database (version 2020)")
    (description "Data package for JASPAR2020.  To explore these databases,
utilize the TFBSTools package (version 1.23.1 or higher).")
    (license license:gpl2)))

(define-public r-mafh5-gnomad-v3-1-2-grch38
  (package
    (name "r-mafh5-gnomad-v3-1-2-grch38")
    (version "3.15.1")
    (source
     (origin
       (method url-fetch)
       (uri (bioconductor-uri "MafH5.gnomAD.v3.1.2.GRCh38" version
                              'annotation))
       (sha256
        (base32 "1q9hlva814sjfz8vm9bzw7xzppbcfy5qq7nnz9w742yr59cjb6mp"))))
    (properties `((upstream-name . "MafH5.gnomAD.v3.1.2.GRCh38")))
    (build-system r-build-system)
    (propagated-inputs (list r-bsgenome
                             r-genomeinfodb
                             r-genomicranges
                             r-genomicscores
                             r-hdf5array
                             r-iranges
                             r-rhdf5
                             r-s4vectors))
    (home-page "https://bioconductor.org/packages/MafH5.gnomAD.v3.1.2.GRCh38")
    (synopsis
     "Minor allele frequency data from gnomAD version 3.1.2 for GRCh38")
    (description
     "This package is designed to store minor allele frequency data.
It retrieves this data from the Genome Aggregation Database
(@code{gnomAD} version 3.1.2) for the human genome version GRCh38.")
    (license license:artistic2.0)))

(define-public r-mpo-db
  (package
    (name "r-mpo-db")
    (version "0.99.7")
    (source
     (origin
       (method url-fetch)
       (uri (bioconductor-uri "MPO.db" version
                              'annotation))
       (sha256
        (base32 "0x1rcikg189akbd71yh0p02482km9hry6i69s2srdf5mlgqficvl"))))
    (properties `((upstream-name . "MPO.db")))
    (build-system r-build-system)
    (arguments
     (list
      #:phases
      #~(modify-phases %standard-phases
          (add-after 'unpack 'avoid-internet-access
            (lambda* (#:key inputs #:allow-other-keys)
              (let* ((cache (string-append #$output "/share/MPO.db/cache"))
                     (file (string-append cache "/118299")))
                (mkdir-p cache)
                (copy-file #$(this-package-native-input "MPO.sqlite") file)
                (substitute* "R/zzz.R"
                  (("ah <- suppressMessages\\(AnnotationHub\\(\\)\\)" m)
                   (string-append
                    "if (Sys.getenv(\"NIX_BUILD_TOP\") == \"\") { " m " };"))
                  (("dbfile <- ah.*" m)
                   (string-append
                    "if (Sys.getenv(\"NIX_BUILD_TOP\") != \"\") { dbfile <- \""
                    file "\";} else { " m " }\n")))))))))
    (propagated-inputs
     (list r-annotationdbi r-annotationhub r-biocfilecache r-dbi))
    (native-inputs
     `(("r-knitr" ,r-knitr)
       ("MPO.sqlite"
        ,(origin
           (method url-fetch)
           (uri "https://annotationhub.bioconductor.org/fetch/118299")
           (file-name "MPO.sqlite")
           (sha256
            (base32 "12rf5dpnjrpw55bgnbn68dni2g0p87nvs9c7mamqk0ayafs61zl0"))))))
    (home-page "https://github.com/YuLab-SMU/MPO.db")
    (synopsis "set of annotation maps describing the Mouse Phenotype Ontology")
    (description
     "This is the human disease ontology R package HDO.db, which provides the
semantic relationship between human diseases.  Relying on the DOSE and
GO@code{SemSim} packages, this package can carry out disease enrichment and
semantic similarity analyses.  Many biological studies are achieved through
mouse models, and a large number of data indicate the association between
genotypes and phenotypes or diseases.  The study of model organisms can be
transformed into useful knowledge about normal human biology and disease to
facilitate treatment and early screening for diseases.  Organism-specific
genotype-phenotypic associations can be applied to cross-species phenotypic
studies to clarify previously unknown phenotypic connections in other species.
Using the same principle to diseases can identify genetic associations and
even help to identify disease associations that are not obvious.")
    (license license:artistic2.0)))

(define-public r-org-eck12-eg-db
  (package
    (name "r-org-eck12-eg-db")
    (version "3.19.1")
    (source
     (origin
       (method url-fetch)
       (uri (bioconductor-uri "org.EcK12.eg.db" version 'annotation))
       (sha256
        (base32 "1af2yrbpn58z34jq39qmmwprzsffgxbrs84dsxny4rksa8k6j70s"))))
    (properties
     `((upstream-name . "org.EcK12.eg.db")))
    (build-system r-build-system)
    (propagated-inputs
     (list r-annotationdbi))
    (home-page "https://bioconductor.org/packages/org.EcK12.eg.db")
    (synopsis "Genome wide annotation for E coli strain K12")
    (description
     "This package provides genome wide annotation for E coli strain K12,
primarily based on mapping using Entrez Gene identifiers.  Entrez Gene is
National Center for Biotechnology Information (NCBI)’s database for
gene-specific information.  Entrez Gene maintains records from genomes which
have been completely sequenced, which have an active research community to
submit gene-specific information, or which are scheduled for intense sequence
analysis.")
    (license license:artistic2.0)))

(define-public r-org-bt-eg-db
  (package
    (name "r-org-bt-eg-db")
    (version "3.19.1")
    (source
     (origin
       (method url-fetch)
       (uri (bioconductor-uri
             "org.Bt.eg.db"
             version
             'annotation))
       (sha256
        (base32
         "0rbbjv47fadc6jgpqrimsd91f35fw6sw8d805vf61nx5xhbl3lpc"))))
    (properties `((upstream-name . "org.Bt.eg.db")))
    (build-system r-build-system)
    (propagated-inputs
     (list r-annotationdbi))
    (home-page "https://bioconductor.org/packages/org.Bt.eg.db")
    (synopsis "Genome wide annotation for Bovine")
    (description
     "This package provides genome wide annotations for Bovine, primarily
based on mapping using Entrez Gene identifiers.")
    (license license:artistic2.0)))

(define-public r-org-sc-sgd-db
  (package
    (name "r-org-sc-sgd-db")
    (version "3.19.1")
    (source
     (origin
       (method url-fetch)
       (uri (bioconductor-uri "org.Sc.sgd.db" version
                              'annotation))
       (sha256
        (base32 "1sbjwyf0ibivdhhxsaljgqa4lqp3hcf8gcrlcm2vd78bfm5cm7bg"))))
    (properties `((upstream-name . "org.Sc.sgd.db")))
    (build-system r-build-system)
    (propagated-inputs (list r-annotationdbi))
    (home-page "https://bioconductor.org/packages/org.Sc.sgd.db")
    (synopsis "Genome wide annotation for Yeast")
    (description
     "This package provides genome wide annotation for Yeast, primarily based
on mapping using ORF identifiers from @acronym{SGD, Saccharomyces Genome
Database}.")
    (license license:artistic2.0)))

(define-public r-pd-mapping50k-xba240
  (package
    (name "r-pd-mapping50k-xba240")
    (version "3.12.0")
    (source
     (origin
       (method url-fetch)
       (uri (bioconductor-uri "pd.mapping50k.xba240" version
                              'annotation))
       (sha256
        (base32 "1a1f3lh5ywhyjawdbj2fzban85c8jz70lfcv3pagd5piincjwxq8"))))
    (properties `((upstream-name . "pd.mapping50k.xba240")))
    (build-system r-build-system)
    (propagated-inputs
     (list r-biostrings
           r-dbi
           r-iranges
           r-oligo
           r-oligoclasses
           r-rsqlite
           r-s4vectors))
    (home-page "https://bioconductor.org/packages/pd.mapping50k.xba240")
    (synopsis "Platform design info for Affymetrix Mapping50K_Xba240")
    (description "This package provides platform design info for Affymetrix
Mapping50K_Xba240 (pd.mapping50k.xba240).")
    (license license:artistic2.0)))

(define-public r-reactome-db
  (package
    (name "r-reactome-db")
    (version "1.88.0")
    (source
     (origin
       (method url-fetch)
       (uri (bioconductor-uri "reactome.db" version 'annotation))
       (sha256
        (base32
         "1fwbf53vklhvqa6n6pijb8xqyv2pap1hskkirnm5c42wmd6g4ql7"))))
    (properties `((upstream-name . "reactome.db")))
    (build-system r-build-system)
    (propagated-inputs
     (list r-annotationdbi))
    (home-page "https://bioconductor.org/packages/reactome.db/")
    (synopsis "Annotation maps for reactome")
    (description
     "This package provides a set of annotation maps for the REACTOME
database, assembled using data from REACTOME.")
    (license license:cc-by4.0)))

(define-public r-bsgenome-btaurus-ucsc-bostau8
  (package
    (name "r-bsgenome-btaurus-ucsc-bostau8")
    (version "1.4.2")
    (source (origin
              (method url-fetch)
              (uri (bioconductor-uri "BSgenome.Btaurus.UCSC.bosTau8"
                                     version 'annotation))
              (sha256
               (base32
                "16wjy1aw9nvx03r7w8yh5w7sw3pn8i9nczd0n0728l6nnyqxlsz6"))))
    (properties
     `((upstream-name . "BSgenome.Btaurus.UCSC.bosTau8")))
    (build-system r-build-system)
    (propagated-inputs
     (list r-bsgenome))
    (home-page
     "https://www.bioconductor.org/packages/BSgenome.Btaurus.UCSC.bosTau8/")
    (synopsis "Full genome sequences for Bos taurus (UC