diff options
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r-- | gnu/packages/bioinformatics.scm | 918 |
1 files changed, 233 insertions, 685 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index d37d93047e..e008ca16fb 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -20,6 +20,7 @@ ;;; Copyright © 2020 Pierre Langlois <pierre.langlois@gmx.com> ;;; Copyright © 2020 Bonface Munyoki Kilyungi <bonfacemunyoki@gmail.com> ;;; Copyright © 2021 Tim Howes <timhowes@lavabit.com> +;;; Copyright © 2021 Hong Li <hli@mdc-berlin.de> ;;; ;;; This file is part of GNU Guix. ;;; @@ -52,6 +53,7 @@ #:use-module (guix build-system ocaml) #:use-module (guix build-system perl) #:use-module (guix build-system python) + #:use-module (guix build-system qt) #:use-module (guix build-system r) #:use-module (guix build-system ruby) #:use-module (guix build-system scons) @@ -125,6 +127,7 @@ #:use-module (gnu packages python-science) #:use-module (gnu packages python-web) #:use-module (gnu packages python-xyz) + #:use-module (gnu packages qt) #:use-module (gnu packages rdf) #:use-module (gnu packages readline) #:use-module (gnu packages ruby) @@ -2692,8 +2695,7 @@ trees (phylogenies) and characters.") (define-public python2-dendropy (let ((base (package-with-python2 python-dendropy))) - (package - (inherit base) + (package/inherit base (arguments `(#:phases (modify-phases %standard-phases @@ -3762,15 +3764,17 @@ particular, reads spanning multiple exons.") (define-public hisat2 (package (name "hisat2") - (version "2.0.5") + (version "2.2.1") (source (origin - (method url-fetch) - (uri (string-append "ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2" - "/downloads/hisat2-" version "-source.zip")) + (method git-fetch) + (uri (git-reference + (url "https://github.com/DaehwanKimLab/hisat2/") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) (sha256 (base32 - "0lywnr8kijwsc2aw10dwxic0n0yvip6fl3rjlvc8zzwahamy4x7g")))) + "0lmzdhzjkvxw7n5w40pbv5fgzd4cz0f9pxczswn3d4cr0k10k754")))) (build-system gnu-build-system) (arguments `(#:tests? #f ; no check target @@ -3783,9 +3787,12 @@ particular, reads spanning multiple exons.") (add-after 'unpack 'make-deterministic (lambda _ (substitute* "Makefile" - (("`date`") "0")) - #t)) + (("`date`") "0")))) (delete 'configure) + (add-before 'build 'build-manual + (lambda _ + (mkdir-p "doc") + (invoke "make" "doc"))) (replace 'install (lambda* (#:key outputs #:allow-other-keys) (let* ((out (assoc-ref outputs "out")) @@ -3796,13 +3803,13 @@ particular, reads spanning multiple exons.") (find-files "." "hisat2(-(build|align|inspect)(-(s|l)(-debug)*)*)*$")) (mkdir-p doc) - (install-file "doc/manual.inc.html" doc)) - #t))))) + (install-file "doc/manual.inc.html" doc))))))) (native-inputs - `(("unzip" ,unzip) ; needed for archive from ftp - ("perl" ,perl) + `(("perl" ,perl) ("pandoc" ,pandoc))) ; for documentation - (home-page "https://ccb.jhu.edu/software/hisat2/index.shtml") + (inputs + `(("python" ,python-wrapper))) + (home-page "https://daehwankimlab.github.io/hisat2/") (synopsis "Graph-based alignment of genomic sequencing reads") (description "HISAT2 is a fast and sensitive alignment program for mapping next-generation sequencing reads (both DNA and RNA) to a population of human @@ -6534,7 +6541,7 @@ writing files into the .sra format.") `(("source" ,source) ("tar" ,tar) ("xz" ,xz))) - (home-page "http://www.seqan.de") + (home-page "https://www.seqan.de") (synopsis "Library for nucleotide sequence analysis") (description "SeqAn is a C++ library of efficient algorithms and data structures for @@ -6550,7 +6557,7 @@ bioinformatics file formats, sequence alignment, and more.") (version "1.4.2") (source (origin (method url-fetch) - (uri (string-append "http://packages.seqan.de/seqan-library/" + (uri (string-append "https://packages.seqan.de/seqan-library/" "seqan-library-" version ".tar.bz2")) (sha256 (base32 @@ -7311,387 +7318,6 @@ includes software to ") (license license:cc0)))) -(define-public r-genefilter - (package - (name "r-genefilter") - (version "1.72.1") - (source - (origin - (method url-fetch) - (uri (bioconductor-uri "genefilter" version)) - (sha256 - (base32 - "1c6h3qnjvphs977qhv5vafvsb108r0q7xhaayly6qv6adqfn94rn")))) - (build-system r-build-system) - (native-inputs - `(("gfortran" ,gfortran) - ("r-knitr" ,r-knitr))) - (propagated-inputs - `(("r-annotate" ,r-annotate) - ("r-annotationdbi" ,r-annotationdbi) - ("r-biobase" ,r-biobase) - ("r-biocgenerics" ,r-biocgenerics) - ("r-survival" ,r-survival))) - (home-page "https://bioconductor.org/packages/genefilter") - (synopsis "Filter genes from high-throughput experiments") - (description - "This package provides basic functions for filtering genes from -high-throughput sequencing experiments.") - (license license:artistic2.0))) - -(define-public r-deseq2 - (package - (name "r-deseq2") - (version "1.30.0") - (source - (origin - (method url-fetch) - (uri (bioconductor-uri "DESeq2" version)) - (sha256 - (base32 - "0q2f9cywrcmp1p7ii8f45g4dk4hsnjflq3yqhsxgnpv9fw338qpp")))) - (properties `((upstream-name . "DESeq2"))) - (build-system r-build-system) - (propagated-inputs - `(("r-biobase" ,r-biobase) - ("r-biocgenerics" ,r-biocgenerics) - ("r-biocparallel" ,r-biocparallel) - ("r-genefilter" ,r-genefilter) - ("r-geneplotter" ,r-geneplotter) - ("r-genomicranges" ,r-genomicranges) - ("r-ggplot2" ,r-ggplot2) - ("r-iranges" ,r-iranges) - ("r-locfit" ,r-locfit) - ("r-rcpp" ,r-rcpp) - ("r-rcpparmadillo" ,r-rcpparmadillo) - ("r-s4vectors" ,r-s4vectors) - ("r-summarizedexperiment" ,r-summarizedexperiment))) - (native-inputs - `(("r-knitr" ,r-knitr))) - (home-page "https://bioconductor.org/packages/DESeq2") - (synopsis "Differential gene expression analysis") - (description - "This package provides functions to estimate variance-mean dependence in -count data from high-throughput nucleotide sequencing assays and test for -differential expression based on a model using the negative binomial -distribution.") - (license license:lgpl3+))) - -(define-public r-dexseq - (package - (name "r-dexseq") - (version "1.36.0") - (source - (origin - (method url-fetch) - (uri (bioconductor-uri "DEXSeq" version)) - (sha256 - (base32 - "0wfjb42xcr4wjy8a654b74411dky8hp6sp8xdwf0sxqgsxy106qi")))) - (properties `((upstream-name . "DEXSeq"))) - (build-system r-build-system) - (propagated-inputs - `(("r-annotationdbi" ,r-annotationdbi) - ("r-biobase" ,r-biobase) - ("r-biocgenerics" ,r-biocgenerics) - ("r-biocparallel" ,r-biocparallel) - ("r-biomart" ,r-biomart) - ("r-deseq2" ,r-deseq2) - ("r-genefilter" ,r-genefilter) - ("r-geneplotter" ,r-geneplotter) - ("r-genomicranges" ,r-genomicranges) - ("r-hwriter" ,r-hwriter) - ("r-iranges" ,r-iranges) - ("r-rcolorbrewer" ,r-rcolorbrewer) - ("r-rsamtools" ,r-rsamtools) - ("r-s4vectors" ,r-s4vectors) - ("r-statmod" ,r-statmod) - ("r-stringr" ,r-stringr) - ("r-summarizedexperiment" ,r-summarizedexperiment))) - (native-inputs - `(("r-knitr" ,r-knitr))) - (home-page "https://bioconductor.org/packages/DEXSeq") - (synopsis "Inference of differential exon usage in RNA-Seq") - (description - "This package is focused on finding differential exon usage using RNA-seq -exon counts between samples with different experimental designs. It provides -functions that allows the user to make the necessary statistical tests based -on a model that uses the negative binomial distribution to estimate the -variance between biological replicates and generalized linear models for -testing. The package also provides functions for the visualization and -exploration of the results.") - (license license:gpl3+))) - -(define-public r-annotationforge - (package - (name "r-annotationforge") - (version "1.32.0") - (source - (origin - (method url-fetch) - (uri (bioconductor-uri "AnnotationForge" version)) - (sha256 - (base32 - "0y3820dkvwz09wlmz9drx6gqpsr9cwppaiz40zafwfxbz65y8px7")))) - (properties - `((upstream-name . "AnnotationForge"))) - (build-system r-build-system) - (propagated-inputs - `(("r-annotationdbi" ,r-annotationdbi) - ("r-biobase" ,r-biobase) - ("r-biocgenerics" ,r-biocgenerics) - ("r-dbi" ,r-dbi) - ("r-rcurl" ,r-rcurl) - ("r-rsqlite" ,r-rsqlite) - ("r-s4vectors" ,r-s4vectors) - ("r-xml" ,r-xml))) - (native-inputs - `(("r-knitr" ,r-knitr))) - (home-page "https://bioconductor.org/packages/AnnotationForge") - (synopsis "Code for building annotation database packages") - (description - "This package provides code for generating Annotation packages and their -databases. Packages produced are intended to be used with AnnotationDbi.") - (license license:artistic2.0))) - -(define-public r-rbgl - (package - (name "r-rbgl") - (version "1.66.0") - (source - (origin - (method url-fetch) - (uri (bioconductor-uri "RBGL" version)) - (sha256 - (base32 - "016vyzgixb3gjpzi21rbs6ngnnqcxr77krwjjf1ldnzzj8vqrqsz")))) - (properties `((upstream-name . "RBGL"))) - (build-system r-build-system) - (propagated-inputs - `(("r-bh" ,r-bh) - ("r-graph" ,r-graph))) - (home-page "https://www.bioconductor.org/packages/RBGL") - (synopsis "Interface to the Boost graph library") - (description - "This package provides a fairly extensive and comprehensive interface to -the graph algorithms contained in the Boost library.") - (license license:artistic2.0))) - -(define-public r-gseabase - (package - (name "r-gseabase") - (version "1.52.1") - (source - (origin - (method url-fetch) - (uri (bioconductor-uri "GSEABase" version)) - (sha256 - (base32 - "0dawh1kjmf6921jm77j2s2phrq5237pjc4sdh8fkln89gf48zx6i")))) - (properties `((upstream-name . "GSEABase"))) - (build-system r-build-system) - (propagated-inputs - `(("r-annotate" ,r-annotate) - ("r-annotationdbi" ,r-annotationdbi) - ("r-biobase" ,r-biobase) - ("r-biocgenerics" ,r-biocgenerics) - ("r-graph" ,r-graph) - ("r-xml" ,r-xml))) - (native-inputs - `(("r-knitr" ,r-knitr))) - (home-page "https://bioconductor.org/packages/GSEABase") - (synopsis "Gene set enrichment data structures and methods") - (description - "This package provides classes and methods to support @dfn{Gene Set -Enrichment Analysis} (GSEA).") - (license license:artistic2.0))) - -(define-public r-category - (package - (name "r-category") - (version "2.56.0") - (source - (origin - (method url-fetch) - (uri (bioconductor-uri "Category" version)) - (sha256 - (base32 - "0m77wpnica0h2ia9ajdaiga4plgz1s9wls6pdnxzk7kwl8a68wkr")))) - (properties `((upstream-name . "Category"))) - (build-system r-build-system) - (propagated-inputs - `(("r-annotate" ,r-annotate) - ("r-annotationdbi" ,r-annotationdbi) - ("r-biobase" ,r-biobase) - ("r-biocgenerics" ,r-biocgenerics) - ("r-genefilter" ,r-genefilter) - ("r-graph" ,r-graph) - ("r-gseabase" ,r-gseabase) - ("r-matrix" ,r-matrix) - ("r-rbgl" ,r-rbgl) - ("r-dbi" ,r-dbi))) - (home-page "https://bioconductor.org/packages/Category") - (synopsis "Category analysis") - (description - "This package provides a collection of tools for performing category -analysis.") - (license license:artistic2.0))) - -(define-public r-gostats - (package - (name "r-gostats") - (version "2.56.0") - (source - (origin - (method url-fetch) - (uri (bioconductor-uri "GOstats" version)) - (sha256 - (base32 - "18q8p0fv9fl2r6zjxknfjwqxr69dlyxy6c8amzn6c6dwjq1cxk6j")))) - (properties `((upstream-name . "GOstats"))) - (build-system r-build-system) - (propagated-inputs - `(("r-annotate" ,r-annotate) - ("r-annotationdbi" ,r-annotationdbi) - ("r-annotationforge" ,r-annotationforge) - ("r-biobase" ,r-biobase) - ("r-category" ,r-category) - ("r-go-db" ,r-go-db) - ("r-graph" ,r-graph) - ("r-rgraphviz" ,r-rgraphviz) - ("r-rbgl" ,r-rbgl))) - (home-page "https://bioconductor.org/packages/GOstats") - (synopsis "Tools for manipulating GO and microarrays") - (description - "This package provides a set of tools for interacting with GO and -microarray data. A variety of basic manipulation tools for graphs, hypothesis -testing and other simple calculations.") - (license license:artistic2.0))) - -(define-public r-shortread - (package - (name "r-shortread") - (version "1.48.0") - (source - (origin - (method url-fetch) - (uri (bioconductor-uri "ShortRead" version)) - (sha256 - (base32 - "0w4m8d3h660mmr2ymp206r1n4aqssxmkv8yxkbr5y1swrahxzfk9")))) - (properties `((upstream-name . "ShortRead"))) - (build-system r-build-system) - (inputs - `(("zlib" ,zlib))) - (propagated-inputs - `(("r-biobase" ,r-biobase) - ("r-biocgenerics" ,r-biocgenerics) - ("r-biocparallel" ,r-biocparallel) - ("r-biostrings" ,r-biostrings) - ("r-genomeinfodb" ,r-genomeinfodb) - ("r-genomicalignments" ,r-genomicalignments) - ("r-genomicranges" ,r-genomicranges) - ("r-rhtslib" ,r-rhtslib) - ("r-hwriter" ,r-hwriter) - ("r-iranges" ,r-iranges) - ("r-lattice" ,r-lattice) - ("r-latticeextra" ,r-latticeextra) - ("r-rsamtools" ,r-rsamtools) - ("r-s4vectors" ,r-s4vectors) - ("r-xvector" ,r-xvector) - ("r-zlibbioc" ,r-zlibbioc))) - (home-page "https://bioconductor.org/packages/ShortRead") - (synopsis "FASTQ input and manipulation tools") - (description - "This package implements sampling, iteration, and input of FASTQ files. -It includes functions for filtering and trimming reads, and for generating a -quality assessment report. Data are represented as -@code{DNAStringSet}-derived objects, and easily manipulated for a diversity of -purposes. The package also contains legacy support for early single-end, -ungapped alignment formats.") - (license license:artistic2.0))) - -(define-public r-systempiper - (package - (name "r-systempiper") - (version "1.24.3") - (source - (origin - (method url-fetch) - (uri (bioconductor-uri "systemPipeR" version)) - (sha256 - (base32 - "0ffazyl2q9plbhwlxi04s3fvnli6qj95n7bkjc21535bbi08xfki")))) - (properties `((upstream-name . "systemPipeR"))) - (build-system r-build-system) - (propagated-inputs - `(("r-annotate" ,r-annotate) - ("r-assertthat" ,r-assertthat) - ("r-batchtools" ,r-batchtools) - ("r-biostrings" ,r-biostrings) - ("r-deseq2" ,r-deseq2) - ("r-dot" ,r-dot) - ("r-edger" ,r-edger) - ("r-genomicfeatures" ,r-genomicfeatures) - ("r-genomicranges" ,r-genomicranges) - ("r-ggplot2" ,r-ggplot2) - ("r-go-db" ,r-go-db) - ("r-gostats" ,r-gostats) - ("r-iranges" ,r-iranges) - ("r-limma" ,r-limma) - ("r-magrittr" ,r-magrittr) - ("r-pheatmap" ,r-pheatmap) - ("r-rjson" ,r-rjson) - ("r-rsamtools" ,r-rsamtools) - ("r-rsvg" ,r-rsvg) - ("r-shortread" ,r-shortread) - ("r-stringr" ,r-stringr) - ("r-summarizedexperiment" ,r-summarizedexperiment) - ("r-yaml" ,r-yaml) - ("r-variantannotation" ,r-variantannotation))) - (native-inputs - `(("r-knitr" ,r-knitr))) - (home-page "https://github.com/tgirke/systemPipeR") - (synopsis "Next generation sequencing workflow and reporting environment") - (description - "This R package provides tools for building and running automated -end-to-end analysis workflows for a wide range of @dfn{next generation -sequence} (NGS) applications such as RNA-Seq, ChIP-Seq, VAR-Seq and Ribo-Seq. -Important features include a uniform workflow interface across different NGS -applications, automated report generation, and support for running both R and -command-line software, such as NGS aligners or peak/variant callers, on local -computers or compute clusters. Efficient handling of complex sample sets and -experimental designs is facilitated by a consistently implemented sample -annotation infrastructure.") - (license license:artistic2.0))) - -(define-public r-grohmm - (package - (name "r-grohmm") - (version "1.24.0") - (source - (origin - (method url-fetch) - (uri (bioconductor-uri "groHMM" version)) - (sha256 - (base32 - "08pap9wsaxl4jjlc1py0rc019gmi6daa0f9cr3ih1d97wybncanx")))) - (properties `((upstream-name . "groHMM"))) - (build-system r-build-system) - (propagated-inputs - `(("r-genomeinfodb" ,r-genomeinfodb) - ("r-genomicalignments" ,r-genomicalignments) - ("r-genomicranges" ,r-genomicranges) - ("r-iranges" ,r-iranges) - ("r-mass" ,r-mass) - ("r-rtracklayer" ,r-rtracklayer) - ("r-s4vectors" ,r-s4vectors))) - (home-page "https://github.com/Kraus-Lab/groHMM") - (synopsis "GRO-seq analysis pipeline") - (description - "This package provides a pipeline for the analysis of GRO-seq data.") - (license license:gpl3+))) - (define-public vsearch (package (name "vsearch") @@ -7996,286 +7622,6 @@ including VCF header and contents in RDF and JSON.") (home-page "https://github.com/vcflib/bio-vcf") (license license:expat))) -(define-public r-biocviews - (package - (name "r-biocviews") - (version "1.58.1") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "biocViews" version)) - (sha256 - (base32 - "1by2639z7n62z84dr8rj9jz12gsd1k8q42zsnxacxbwfwp6h0cl4")))) - (properties - `((upstream-name . "biocViews"))) - (build-system r-build-system) - (propagated-inputs - `(("r-biobase" ,r-biobase) - ("r-biocmanager" ,r-biocmanager) - ("r-graph" ,r-graph) - ("r-rbgl" ,r-rbgl) - ("r-rcurl" ,r-rcurl) - ("r-xml" ,r-xml) - ("r-runit" ,r-runit))) - (home-page "https://bioconductor.org/packages/biocViews") - (synopsis "Bioconductor package categorization helper") - (description "The purpose of biocViews is to create HTML pages that -categorize packages in a Bioconductor package repository according to keywords, -also known as views, in a controlled vocabulary.") - (license license:artistic2.0))) - -(define-public r-biocstyle - (package - (name "r-biocstyle") - (version "2.18.1") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "BiocStyle" version)) - (sha256 - (base32 - "0rsxyna4dd99x42vc82mlkxx774vb9375llpakg53max1hhwkrqp")))) - (properties - `((upstream-name . "BiocStyle"))) - (build-system r-build-system) - (propagated-inputs - `(("r-biocmanager" ,r-biocmanager) - ("r-bookdown" ,r-bookdown) - ("r-knitr" ,r-knitr) - ("r-rmarkdown" ,r-rmarkdown) - ("r-yaml" ,r-yaml))) - (native-inputs - `(("r-knitr" ,r-knitr))) - (home-page "https://bioconductor.org/packages/BiocStyle") - (synopsis "Bioconductor formatting styles") - (description "This package provides standard formatting styles for -Bioconductor PDF and HTML documents. Package vignettes illustrate use and -functionality.") - (license license:artistic2.0))) - -(define-public r-bioccheck - (package - (name "r-bioccheck") - (version "1.26.0") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "BiocCheck" version)) - (sha256 - (base32 - "1hyncn9zqj432da95k86rm5b28nbwrvzm52jbhisifkxj1j43cib")))) - (properties - `((upstream-name . "BiocCheck"))) - (build-system r-build-system) - (arguments - '(#:phases - (modify-phases %standard-phases - ;; This package can be used by calling BiocCheck(<package>) from - ;; within R, or by running R CMD BiocCheck <package>. This phase - ;; makes sure the latter works. For this to work, the BiocCheck - ;; script must be somewhere on the PATH (not the R bin directory). - (add-after 'install 'install-bioccheck-subcommand - (lambda* (#:key outputs #:allow-other-keys) - (let* ((out (assoc-ref outputs "out")) - (dest-dir (string-append out "/bin")) - (script-dir - (string-append out "/site-library/BiocCheck/script/"))) - (mkdir-p dest-dir) - (symlink (string-append script-dir "/checkBadDeps.R") - (string-append dest-dir "/checkBadDeps.R")) - (symlink (string-append script-dir "/BiocCheck") - (string-append dest-dir "/BiocCheck"))) - #t))))) - (propagated-inputs - `(("r-codetools" ,r-codetools) - ("r-graph" ,r-graph) - ("r-httr" ,r-httr) - ("r-knitr" ,r-knitr) - ("r-optparse" ,r-optparse) - ("r-biocmanager" ,r-biocmanager) - ("r-biocviews" ,r-biocviews) - ("r-stringdist" ,r-stringdist))) - (native-inputs - `(("r-knitr" ,r-knitr))) - (home-page "https://bioconductor.org/packages/BiocCheck") - (synopsis "Executes Bioconductor-specific package checks") - (description "This package contains tools to perform additional quality -checks on R packages that are to be submitted to the Bioconductor repository.") - (license license:artistic2.0))) - -(define-public r-s4vectors - (package - (name "r-s4vectors") - (version "0.28.1") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "S4Vectors" version)) - (sha256 - (base32 - "0fhf4lsfxrim7glazh6ng46ykzaly5ggwpg170vcz4cc24prv0rh")))) - (properties - `((upstream-name . "S4Vectors"))) - (build-system r-build-system) - (propagated-inputs - `(("r-biocgenerics" ,r-biocgenerics))) - (home-page "https://bioconductor.org/packages/S4Vectors") - (synopsis "S4 implementation of vectors and lists") - (description - "The S4Vectors package defines the @code{Vector} and @code{List} virtual -classes and a set of generic functions that extend the semantic of ordinary -vectors and lists in R. Package developers can easily implement vector-like -or list-like objects as concrete subclasses of @code{Vector} or @code{List}. -In addition, a few low-level concrete subclasses of general interest (e.g. -@code{DataFrame}, @code{Rle}, and @code{Hits}) are implemented in the -S4Vectors package itself.") - (license license:artistic2.0))) - -(define-public r-iranges - (package - (name "r-iranges") - (version "2.24.1") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "IRanges" version)) - (sha256 - (base32 - "01mx46a82vd3gz705pj0kk4wpxg683s8jqxchzjia3gz00b4qw52")))) - (properties - `((upstream-name . "IRanges"))) - (build-system r-build-system) - (propagated-inputs - `(("r-biocgenerics" ,r-biocgenerics) - ("r-s4vectors" ,r-s4vectors))) - (home-page "https://bioconductor.org/packages/IRanges") - (synopsis "Infrastructure for manipulating intervals on sequences") - (description - "This package provides efficient low-level and highly reusable S4 classes -for storing ranges of integers, RLE vectors (Run-Length Encoding), and, more -generally, data that can be organized sequentially (formally defined as -@code{Vector} objects), as well as views on these @code{Vector} objects. -Efficient list-like classes are also provided for storing big collections of -instances of the basic classes. All classes in the package use consistent -naming and share the same rich and consistent \"Vector API\" as much as -possible.") - (license license:artistic2.0))) - -(define-public r-genomeinfodbdata - (package - (name "r-genomeinfodbdata") - (version "1.2.0") - (source (origin - (method url-fetch) - ;; We cannot use bioconductor-uri here because this tarball is - ;; located under "data/annotation/" instead of "bioc/". - (uri (string-append "https://bioconductor.org/packages/release/" - "data/annotation/src/contrib/GenomeInfoDbData_" - version ".tar.gz")) - (sha256 - (base32 - "0di6nlqpsyqf693k2na65ayqldih563x3zfrczpqc5q2hl5kg35c")))) - (properties - `((upstream-name . "GenomeInfoDbData"))) - (build-system r-build-system) - (home-page "https://bioconductor.org/packages/GenomeInfoDbData") - (synopsis "Species and taxonomy ID look up tables for GenomeInfoDb") - (description "This package contains data for mapping between NCBI taxonomy -ID and species. It is used by functions in the GenomeInfoDb package.") - (license license:artistic2.0))) - -(define-public r-genomeinfodb - (package - (name "r-genomeinfodb") - (version "1.26.2") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "GenomeInfoDb" version)) - (sha256 - (base32 - "092izc49maxjhf6m4b0qx21ad16dz7bmxy5pysp3vkyhdrfa2f7v")))) - (properties - `((upstream-name . "GenomeInfoDb"))) - (build-system r-build-system) - (propagated-inputs - `(("r-biocgenerics" ,r-biocgenerics) - ("r-genomeinfodbdata" ,r-genomeinfodbdata) - ("r-iranges" ,r-iranges) - ("r-rcurl" ,r-rcurl) - ("r-s4vectors" ,r-s4vectors))) - (native-inputs - `(("r-knitr" ,r-knitr))) - (home-page "https://bioconductor.org/packages/GenomeInfoDb") - (synopsis "Utilities for manipulating chromosome identifiers") - (description - "This package contains data and functions that define and allow -translation between different chromosome sequence naming conventions (e.g., -\"chr1\" versus \"1\"), including a function that attempts to place sequence -names in their natural, rather than lexicographic, order.") - (license license:artistic2.0))) - -(define-public r-edger - (package - (name "r-edger") - (version "3.32.1") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "edgeR" version)) - (sha256 - (base32 - "1gaic8qf6a6sy0bmydh1xzf52w0wnq31aanpvw3a30pfsi218bcp")))) - (properties `((upstream-name . "edgeR"))) - (build-system r-build-system) - (propagated-inputs - `(("r-limma" ,r-limma) - ("r-locfit" ,r-locfit) - ("r-rcpp" ,r-rcpp) - ("r-statmod" ,r-statmod))) ;for estimateDisp - (home-page "http://bioinf.wehi.edu.au/edgeR") - (synopsis "EdgeR does empirical analysis of digital gene expression data") - (description "This package can do differential expression analysis of -RNA-seq expression profiles with biological replication. It implements a range -of statistical methodology based on the negative binomial distributions, -including empirical Bayes estimation, exact tests, generalized linear models -and quasi-likelihood tests. It be applied to differential signal analysis of -other types of genomic data that produce counts, including ChIP-seq, SAGE and -CAGE.") - (license license:gpl2+))) - -(define-public r-variantannotation - (package - (name "r-variantannotation") - (version "1.36.0") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "VariantAnnotation" version)) - (sha256 - (base32 - "1sl0l6v05lfglj281nszma0h5k234md7rn2pdah8vs2d4iq3kimw")))) - (properties - `((upstream-name . "VariantAnnotation"))) - (propagated-inputs - `(("r-annotationdbi" ,r-annotationdbi) - ("r-biobase" ,r-biobase) - ("r-biocgenerics" ,r-biocgenerics) - ("r-biostrings" ,r-biostrings) - ("r-bsgenome" ,r-bsgenome) - ("r-dbi" ,r-dbi) - ("r-genomeinfodb" ,r-genomeinfodb) - ("r-genomicfeatures" ,r-genomicfeatures) - ("r-genomicranges" ,r-genomicranges) - ("r-iranges" ,r-iranges) - ("r-matrixgenerics" ,r-matrixgenerics) - ("r-summarizedexperiment" ,r-summarizedexperiment) - ("r-rhtslib" ,r-rhtslib) - ("r-rsamtools" ,r-rsamtools) - ("r-rtracklayer" ,r-rtracklayer) - ("r-s4vectors" ,r-s4vectors) - ("r-xvector" ,r-xvector) - ("r-zlibbioc" ,r-zlibbioc))) - (build-system r-build-system) - (home-page "https://bioconductor.org/packages/VariantAnnotation") - (synopsis "Package for annotation of genetic variants") - (description "This R package can annotate variants, compute amino acid -coding changes and predict coding outcomes.") - (license license:artistic2.0))) - (define-public r-limma (package (name "r-limma") @@ -8551,13 +7897,13 @@ tab-delimited (tabix) files.") (define-public r-delayedarray (package (name "r-delayedarray") - (version "0.16.1") + (version "0.16.2") (source (origin (method url-fetch) (uri (bioconductor-uri "DelayedArray" version)) (sha256 (base32 - "1d75zrhha1v7dhbvjp6a4iap441l5k268w0jjxklpqywbqns7l3d")))) + "09lpj951v1afxkrnjvnhzp4qgklq23ykdwlny7k1lyfcdy9q6wm0")))) (properties `((upstream-name . "DelayedArray"))) (build-system r-build-system) @@ -8699,13 +8045,13 @@ as well as query and modify the browser state, such as the current viewport.") (define-public r-genomicfeatures (package (name "r-genomicfeatures") - (version "1.42.1") + (version "1.42.2") (source (origin (method url-fetch) (uri (bioconductor-uri "GenomicFeatures" version)) (sha256 (base32 - "17dyd9hcw6pw16y353dh55wfhxmkxka99lbsxsp9xyrhffwrxi0s")))) + "17ns5hvx5q8mrmkgb6linspwml62mi34i6al5bxlib5xi9d9f04s")))) (properties `((upstream-name . "GenomicFeatures"))) (build-system r-build-system) @@ -10954,7 +10300,7 @@ with narrow binding events such as transcription factor ChIP-seq.") (define-public trim-galore (package (name "trim-galore") - (version "0.6.1") + (version "0.6.6") (source (origin (method git-fetch) @@ -10964,7 +10310,7 @@ with narrow binding events such as transcription factor ChIP-seq.") (file-name (git-file-name name version)) (sha256 (base32 - "1y31wbxwkm9xqzr5zv1pk5q418whnmlmgmfyxxpnl12h83m2i9iv")))) + "0yrwg6325j4sb9vnplvl3jplzab0qdhp92wl480qjinpfq88j4rs")))) (build-system gnu-build-system) (arguments `(#:tests? #f ; no tests @@ -13109,6 +12455,64 @@ concatenates pieces of read sequences to generate the final unitig sequences. Thus the per-base error rate is similar to the raw input reads.") (license license:expat))) +(define-public bandage + (package + (name "bandage") + (version "0.8.1") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/rrwick/Bandage") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 "1bbsn5f5x8wlspg4pbibqz6m5vin8c19nl224f3z3km0pkc97rwv")))) + (build-system qt-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + (replace 'configure + (lambda _ + (invoke "qmake" "Bandage.pro"))) + (replace 'check + (lambda* (#:key tests? #:allow-other-keys) + (when tests? + (substitute* "tests/bandage_command_line_tests.sh" + (("^bandagepath=.*") + (string-append "bandagepath=" (getcwd) "/Bandage\n"))) + (with-directory-excursion "tests" + (setenv "XDG_RUNTIME_DIR" (getcwd)) + (invoke "./bandage_command_line_tests.sh"))) + #t)) + (replace 'install + (lambda* (#:key outputs #:allow-other-keys) + (let ((out (assoc-ref outputs "out"))) + (install-file "Bandage" (string-append out "/bin")) + #t)))))) + (inputs + `(("qtbase" ,qtbase) + ("qtsvg" ,qtsvg))) + (native-inputs + `(("imagemagick" ,imagemagick))) + (home-page "https://rrwick.github.io/Bandage/") + (synopsis + "Bioinformatics Application for Navigating De novo Assembly Graphs Easily") + (description "Bandage is a program for visualising de novo assembly graphs. +It allows users to interact with the assembly graphs made by de novo assemblers +such as Velvet, SPAdes, MEGAHIT and others. De novo assembly graphs contain not +only assembled contigs but also the connections between those contigs, which +were previously not easily accessible. Bandage visualises assembly graphs, with +connections, using graph layout algorithms. Nodes in the drawn graph, which +represent contigs, can be automatically labelled with their ID, length or depth. +Users can interact with the graph by moving, labelling and colouring nodes. +Sequence information can also be extracted directly from the graph viewer. By +displaying connections between contigs, Bandage opens up new possibilities for +analysing and improving de novo assemblies that are not possible by looking at +contigs alone.") + (license (list license:gpl2+ ; bundled ogdf + license:gpl3+)))) + (define-public r-circus (package (name "r-circus") @@ -13359,6 +12763,89 @@ create connections between analogous cells in different batches without altering the counts or PCA space.") (license license:expat))) +(define-public python-drep + (package + (name "python-drep") + (version "3.2.0") + (source + (origin + (method url-fetch) + (uri (pypi-uri "drep" version)) + (sha256 + (base32 + "08vk0x6v5c5n7afgd5pcjhsvb424absypxy22hw1cm1n9kirbi77")))) + (build-system python-build-system) + (propagated-inputs + `(("python-biopython" ,python-biopython) + ("python-matplotlib" ,python-matplotlib) + ("python-numpy" ,python-numpy) + ("python-pandas" ,python-pandas) + ("python-pytest" ,python-pytest) + ("python-scikit-learn" ,python-scikit-learn) + ("python-seaborn" ,python-seaborn) + ("python-tqdm" ,python-tqdm))) + (home-page "https://github.com/MrOlm/drep") + (synopsis "De-replication of microbial genomes assembled from multiple samples") + (description + "dRep is a Python program for rapidly comparing large numbers of genomes. +dRep can also \"de-replicate\" a genome set by identifying groups of highly +similar genomes and choosing the best representative genome for each genome +set.") + (license license:expat))) + +(define-public instrain + (package + (name "instrain") + (version "1.5.2") + (source + (origin + (method url-fetch) + (uri (pypi-uri "inStrain" version)) + (sha256 + (base32 + "0ykqlpf6yz4caihsaz3ys00cyvlr7wdj4s9a8rh56q5r8xf80ic0")))) + (build-system python-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + (add-after 'unpack 'patch-relative-imports + (lambda _ + (substitute* "docker/run_instrain.py" + (("from s3_utils") + "from .s3_utils") + (("from job_utils") + "from .job_utils"))))))) + (inputs + `(("python-biopython" ,python-biopython) + ("python-boto3" ,python-boto3) + ("python-h5py" ,python-h5py) + ("python-lmfit" ,python-lmfit) + ("python-matplotlib" ,python-matplotlib) + ("python-networkx" ,python-networkx) + ("python-numba" ,python-numba) + ("python-numpy" ,python-numpy) + ("python-pandas" ,python-pandas) + ("python-psutil" ,python-psutil) + ("python-pysam" ,python-pysam) + ("python-scikit-learn" ,python-scikit-learn) + ("python-seaborn" ,python-seaborn) + ("python-tqdm" ,python-tqdm) + ;; drep is needed for deprecated plot utilities + ("python-drep" ,python-drep))) + (native-inputs + `(("python-pytest" ,python-pytest))) + (home-page "https://github.com/MrOlm/inStrain") + (synopsis "Calculation of strain-level metrics") + (description + "inStrain is a Python program for analysis of co-occurring genome +populations from metagenomes that allows highly accurate genome comparisons, +analysis of coverage, microdiversity, and linkage, and sensitive SNP detection +with gene localization and synonymous non-synonymous identification.") + ;; The tool itself says that the license is "MIT", but the repository + ;; contains a LICENSE file with the GPLv3. + ;; See https://github.com/MrOlm/inStrain/issues/51 + (license license:expat))) + (define-public gffcompare (let ((commit "be56ef4349ea3966c12c6397f85e49e047361c41") (revision "1")) @@ -14367,14 +13854,14 @@ is a Cython wrapper for FIt-SNE.") (define-public bbmap (package (name "bbmap") - (version "35.82") + (version "38.90") (source (origin (method url-fetch) (uri (string-append "mirror://sourceforge/bbmap/BBMap_" version ".tar.gz")) (sha256 (base32 - "1q4rfhxcb6z3gm8zg2davjz98w22lkf4hm9ikxz9kdl93pil3wkd")))) + "1wb94bcc006qq86x77z2rz0lc8m9f1kpnw6gdhjfg9bdaqf56rm3")))) (build-system ant-build-system) (arguments `(#:build-target "dist" @@ -15785,3 +15272,64 @@ biological processes. SBML is useful for models of metabolism, cell signaling, and more. It continues to be evolved and expanded by an international community.") (license license:lgpl2.1+))) + +(define-public r-signac + (let ((commit "e0512d348adeda4a3f23a2e8f56d1fe09840e03c") + (revision "1")) + (package + (name "r-signac") + (version (git-version "1.1.1" revision commit)) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/timoast/signac/") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "1yihhrv7zs87ax61la1nb4y12lg3knraw4b20k5digbcwm8488lb")))) + (properties `((upstream-name . "Signac"))) + (build-system r-build-system) + (inputs `(("zlib" ,zlib))) + (propagated-inputs + `(("r-annotationfilter" ,r-annotationfilter) + ("r-biocgenerics" ,r-biocgenerics) + ("r-biostrings" ,r-biostrings) + ("r-biovizbase" ,r-biovizbase) + ("r-data-table" ,r-data-table) + ("r-dplyr" ,r-dplyr) + ("r-fastmatch" ,r-fastmatch) + ("r-future" ,r-future) + ("r-future-apply" ,r-future-apply) + ("r-genomeinfodb" ,r-genomeinfodb) + ("r-genomicranges" ,r-genomicranges) + ("r-ggbio" ,r-ggbio) + ("r-ggforce" ,r-ggforce) + ("r-ggplot2" ,r-ggplot2) + ("r-ggrepel" ,r-ggrepel) + ("r-ggseqlogo" ,r-ggseqlogo) + ("r-iranges" ,r-iranges) + ("r-irlba" ,r-irlba) + ("r-lsa" ,r-lsa) + ("r-matrix" ,r-matrix) + ("r-patchwork" ,r-patchwork) + ("r-pbapply" ,r-pbapply) + ("r-rcpp" ,r-rcpp) + ("r-rcpproll" ,r-rcpproll) + ("r-rsamtools" ,r-rsamtools) + ("r-s4vectors" ,r-s4vectors) + ("r-scales" ,r-scales) + ("r-seurat" ,r-seurat) + ("r-seuratobject" ,r-seuratobject) + ("r-stringi" ,r-stringi) + ("r-tidyr" ,r-tidyr))) + (home-page "https://github.com/timoast/signac/") + (synopsis "Analysis of single-cell chromatin data") + (description + "This package provides a framework for the analysis and exploration of +single-cell chromatin data. The Signac package contains functions for +quantifying single-cell chromatin data, computing per-cell quality control +metrics, dimension reduction and normalization, visualization, and DNA +sequence motif analysis.") + (license license:expat)))) |