/scripts/

;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2014-2024 Ricardo Wurmus <rekado@elephly.net>
;;; Copyright © 2015, 2016, 2017, 2018 Ben Woodcroft <donttrustben@gmail.com>
;;; Copyright © 2015, 2016, 2018, 2019, 2020 Pjotr Prins <pjotr.guix@thebird.nl>
;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
;;; Copyright © 2016, 2020, 2021 Roel Janssen <roel@gnu.org>
;;; Copyright © 2016-2024 Efraim Flashner <efraim@flashner.co.il>
;;; Copyright © 2016, 2020, 2022 Marius Bakke <marius@gnu.org>
;;; Copyright © 2016, 2018 Raoul Bonnal <ilpuccio.febo@gmail.com>
;;; Copyright © 2017, 2018 Tobias Geerinckx-Rice <me@tobias.gr>
;;; Copyright © 2017, 2021, 2022, 2024 Arun Isaac <arunisaac@systemreboot.net>
;;; Copyright © 2018 Joshua Sierles, Nextjournal <joshua@nextjournal.com>
;;; Copyright © 2018 Gábor Boskovits <boskovits@gmail.com>
;;; Copyright © 2018-2023 Mădălin Ionel Patrașcu <madalinionel.patrascu@mdc-berlin.de>
;;; Copyright © 2019, 2020, 2021, 2023 Maxim Cournoyer <maxim.cournoyer@gmail.com>
;;; Copyright © 2019 Brian Leung <bkleung89@gmail.com>
;;; Copyright © 2019 Brett Gilio <brettg@gnu.org>
;;; Copyright © 2020 Björn Höfling <bjoern.hoefling@bjoernhoefling.de>
;;; Copyright © 2020 Jakub Kądziołka <kuba@kadziolka.net>
;;; Copyright © 2020 Pierre Langlois <pierre.langlois@gmx.com>
;;; Copyright © 2020 Bonface Munyoki Kilyungi <bonfacemunyoki@gmail.com>
;;; Copyright © 2021 Tim Howes <timhowes@lavabit.com>
;;; Copyright © 2021 Hong Li <hli@mdc-berlin.de>
;;; Copyright © 2021, 2022, 2023 Simon Tournier <zimon.toutoune@gmail.com>
;;; Copyright © 2021 Felix Gruber <felgru@posteo.net>
;;; Copyright © 2022, 2023, 2024 Navid Afkhami <navid.afkhami@mdc-berlin.de>
;;; Copyright © 2022 Antero Mejr <antero@mailbox.org>
;;; Copyright © 2024 Alexis Simon <alexis.simon@runbox.com>
;;; Copyright © 2024 Spencer King <spencer.king@geneoscopy.com>
;;;
;;; This file is part of GNU Guix.
;;;
;;; GNU Guix is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; GNU Guix is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.

(define-module (gnu packages bioinformatics)
  #:use-module ((guix licenses) #:prefix license:)
  #:use-module (guix packages)
  #:use-module (guix gexp)
  #:use-module (guix utils)
  #:use-module (guix download)
  #:use-module (guix git-download)
  #:use-module (guix hg-download)
  #:use-module (guix build-system ant)
  #:use-module (guix build-system gnu)
  #:use-module (guix build-system cargo)
  #:use-module (guix build-system cmake)
  #:use-module (guix build-system go)
  #:use-module (guix build-system haskell)
  #:use-module (guix build-system meson)
  #:use-module (guix build-system perl)
  #:use-module (guix build-system pyproject)
  #:use-module (guix build-system python)
  #:use-module (guix build-system qt)
  #:use-module (guix build-system r)
  #:use-module (guix build-system ruby)
  #:use-module (guix build-system trivial)
  #:use-module (guix deprecation)
  #:use-module (gnu packages)
  #:use-module (gnu packages admin)
  #:use-module (gnu packages assembly)
  #:use-module (gnu packages autotools)
  #:use-module (gnu packages algebra)
  #:use-module (gnu packages base)
  #:use-module (gnu packages bash)
  #:use-module (gnu packages bison)
  #:use-module (gnu packages bioconductor)
  #:use-module (gnu packages boost)
  #:use-module (gnu packages check)
  #:use-module (gnu packages code)
  #:use-module (gnu packages cmake)
  #:use-module (gnu packages compression)
  #:use-module (gnu packages cpp)
  #:use-module (gnu packages cpio)
  #:use-module (gnu packages cran)
  #:use-module (gnu packages crates-io)
  #:use-module (gnu packages curl)
  #:use-module (gnu packages docbook)
  #:use-module (gnu packages documentation)
  #:use-module (gnu packages databases)
  #:use-module (gnu packages datastructures)
  #:use-module (gnu packages dlang)
  #:use-module (gnu packages file)
  #:use-module (gnu packages flex)
  #:use-module (gnu packages gawk)
  #:use-module (gnu packages gcc)
  #:use-module (gnu packages gd)
  #:use-module (gnu packages golang)
  #:use-module (gnu packages golang-build)
  #:use-module (gnu packages golang-check)
  #:use-module (gnu packages golang-compression)
  #:use-module (gnu packages golang-xyz)
  #:use-module (gnu packages glib)
  #:use-module (gnu packages graph)
  #:use-module (gnu packages graphics)
  #:use-module (gnu packages graphviz)
  #:use-module (gnu packages groff)
  #:use-module (gnu packages gtk)
  #:use-module (gnu packages guile)
  #:use-module (gnu packages guile-xyz)
  #:use-module (gnu packages haskell-check)
  #:use-module (gnu packages haskell-web)
  #:use-module (gnu packages haskell-xyz)
  #:use-module (gnu packages image)
  #:use-module (gnu packages image-processing)
  #:use-module (gnu packages imagemagick)
  #:use-module (gnu packages java)
  #:use-module (gnu packages java-compression)
  #:use-module (gnu packages java-xml)
  #:use-module (gnu packages jemalloc)
  #:use-module (gnu packages jupyter)
  #:use-module (gnu packages libffi)
  #:use-module (gnu packages linux)
  #:use-module (gnu packages llvm)
  #:use-module (gnu packages logging)
  #:use-module (gnu packages lsof)
  #:use-module (gnu packages machine-learning)
  #:use-module (gnu packages man)
  #:use-module (gnu packages maths)
  #:use-module (gnu packages mpi)
  #:use-module (gnu packages ncurses)
  #:use-module (gnu packages node)
  #:use-module (gnu packages ocaml)
  #:use-module (gnu packages pcre)
  #:use-module (gnu packages parallel)
  #:use-module (gnu packages pdf)
  #:use-module (gnu packages perl)
  #:use-module (gnu packages perl-check)
  #:use-module (gnu packages perl-web)
  #:use-module (gnu packages pkg-config)
  #:use-module (gnu packages popt)
  #:use-module (gnu packages protobuf)
  #:use-module (gnu packages python)
  #:use-module (gnu packages python-build)
  #:use-module (gnu packages python-check)
  #:use-module (gnu packages python-compression)
  #:use-module (gnu packages python-crypto)
  #:use-module (gnu packages python-science)
  #:use-module (gnu packages python-web)
  #:use-module (gnu packages python-xyz)
  #:use-module (gnu packages qt)
  #:use-module (gnu packages rdf)
  #:use-module (gnu packages readline)
  #:use-module (gnu packages rpc)
  #:use-module (gnu packages rsync)
  #:use-module (gnu packages ruby)
  #:use-module (gnu packages serialization)
  #:use-module (gnu packages shells)
  #:use-module (gnu packages skribilo)
  #:use-module (gnu packages sphinx)
  #:use-module (gnu packages sqlite)
  #:use-module (gnu packages statistics)
  #:use-module (gnu packages swig)
  #:use-module (gnu packages tbb)
  #:use-module (gnu packages tex)
  #:use-module (gnu packages texinfo)
  #:use-module (gnu packages textutils)
  #:use-module (gnu packages time)
  #:use-module (gnu packages tls)
  #:use-module (gnu packages uglifyjs)
  #:use-module (gnu packages video)
  #:use-module (gnu packages vim)
  #:use-module (gnu packages web)
  #:use-module (gnu packages wget)
  #:use-module (gnu packages xml)
  #:use-module (gnu packages xorg)
  #:use-module (guix platform)
  #:use-module (srfi srfi-1)
  #:use-module (srfi srfi-26)
  #:use-module (ice-9 match))

(define-public aragorn
  (package
    (name "aragorn")
    (version "1.2.41")
    (source (origin
              (method url-fetch)
              (uri (string-append "https://www.ansikte.se/ARAGORN/Downloads/"
                                  "aragorn" version ".c"))
              (sha256
               (base32
                "0jkzx7sqiarydvz3bwaxh790fpvpkfc926dhsza1dbdhq32ir8wj"))))
    (build-system gnu-build-system)
    (arguments
     (list
      #:tests? #f ; there are no tests
      #:phases
      #~(modify-phases %standard-phases
          (delete 'configure)
          (replace 'build
            (lambda _
              (invoke "gcc" "-O3" "-ffast-math" "-finline-functions"
                      "-o" "aragorn" #$source)))
          (replace 'install
            (lambda* (#:key inputs #:allow-other-keys)
              (let ((bin (string-append #$output "/bin"))
                    (man (string-append #$output "/share/man/man1")))
                (install-file "aragorn" bin)
                (mkdir-p man)
                (copy-file (assoc-ref inputs "aragorn.1")
                           (string-append man "/aragorn.1"))))))))
    (native-inputs
     `(("aragorn.1"
        ,(origin
           (method url-fetch)
           (uri "https://www.ansikte.se/ARAGORN/Downloads/aragorn.1")
           (sha256
            (base32
             "0bn9lapa6f0cl07dbn2fjrapirv9d4bk7w248w39fhb4vbczcc3f"))))))
    (home-page "https://www.ansikte.se/ARAGORN/")
    (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
    (description
     "Aragorn identifies transfer RNA, mitochondrial RNA and
transfer-messenger RNA from nucleotide sequences, based on homology to known
tRNA consensus sequences and RNA structure.  It also outputs the secondary
structure of the predicted RNA.")
    (license license:gpl3+)))

(define-public bamtools
  (package
    (name "bamtools")
    (version "2.5.2")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/pezmaster31/bamtools")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "14lw571vbks138i0lj66qjdbk8iwa817x2zbpzij61vv1gdgfbn5"))))
    (build-system cmake-build-system)
    (arguments
     (list
      #:configure-flags
      '(list "-DBUILD_SHARED_LIBS=ON")))
    (inputs (list jsoncpp zlib))
    (native-inputs (list pkg-config))
    (home-page "https://github.com/pezmaster31/bamtools")
    (synopsis "C++ API and command-line toolkit for working with BAM data")
    (description
     "BamTools provides both a C++ API and a command-line toolkit for handling
BAM files.")
    (license license:expat)))

(define-public bamutils
  (package
    (name "bamutils")
    (version "1.0.15")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/statgen/bamUtil")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "1pxydf9qsr8667jh525bc2wiqn9nwk8rkg05kbyfmjs8d261fl9y"))))
    (build-system gnu-build-system)
    (arguments
     (list
      #:tests? #f ;fails to link debug libraries
      #:test-target "test"
      #:make-flags
      #~(list (string-append "INSTALLDIR=" #$output "/bin"))
      #:phases
      #~(modify-phases %standard-phases
          (replace 'configure
            (lambda* (#:key inputs #:allow-other-keys)
              (substitute* "src/Makefile"
                (("^DATE=.*") "DATE=\"1970-01-01\"\n"))
              (copy-recursively (assoc-ref inputs "libstatgen")
                                "../libStatGen"))))))
    (inputs
     (list zlib))
    (native-inputs
     `(("libstatgen"
        ,(origin
           (method git-fetch)
           (uri (git-reference
                 (url "https://github.com/statgen/libStatGen/")
                 (commit (string-append "v" version))))
           (file-name (git-file-name "libstatgen" version))
           (sha256
            (base32
             "0spvbpvnpxrgj8kajpkhf1mv7kdyvj723y9zh13jykvnjh8a15j3"))))))
    (home-page "https://genome.sph.umich.edu/wiki/BamUtil")
    (synopsis "Programs for working on SAM/BAM files")
    (description "This package provides several programs that perform
operations on SAM/BAM files.  All of these programs are built into a
single executable called @code{bam}.")
    (license license:gpl3+)))

(define-public bcftools
  (package
    (name "bcftools")
    (version "1.14")
    (source (origin
              (method url-fetch)
              (uri (string-append "https://github.com/samtools/bcftools/"
                                  "releases/download/"
                                  version "/bcftools-" version ".tar.bz2"))
              (sha256
               (base32
                "1jqrma16fx8kpvb3c0462dg0asvmiv5yi8myqmc5ddgwi6p8ivxp"))
              (modules '((guix build utils)))
              (snippet '(begin
                          ;; Delete bundled htslib.
                          (delete-file-recursively "htslib-1.14")))))
    (build-system gnu-build-system)
    (arguments
     `(#:configure-flags
       (list "--enable-libgsl")
       #:test-target "test"
       #:phases
       (modify-phases %standard-phases
         (add-before 'check 'patch-tests
           (lambda _
             (substitute* "test/test.pl"
               (("/bin/bash") (which "bash"))))))))
    (native-inputs
     (list htslib perl))
    (inputs
     (list gsl zlib))
    (home-page "https://samtools.github.io/bcftools/")
    (synopsis "Utilities for variant calling and manipulating VCFs and BCFs")
    (description
     "BCFtools is a set of utilities that manipulate variant calls in the
Variant Call Format (VCF) and its binary counterpart BCF.  All commands work
transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
    ;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
    (license (list license:gpl3+ license:expat))))

(define-public bcftools-1.12
  (package/inherit bcftools
    (version "1.12")
    (source (origin
              (method url-fetch)
              (uri (string-append "https://github.com/samtools/bcftools/"
                                  "releases/download/"
                                  version "/bcftools-" version ".tar.bz2"))
              (sha256
               (base32
                "1x94l1hy2pi3lbz0sxlbw0g6q5z5apcrhrlcwda94ns9n4r6a3ks"))
              (modules '((guix build utils)))
              (snippet '(begin
                          ;; Delete bundled htslib.
                          (delete-file-recursively "htslib-1.12")))))
    (native-inputs (list htslib-1.12 perl))))

(define-public bcftools-1.10
  (package/inherit bcftools
    (version "1.10")
    (source (origin
              (method url-fetch)
              (uri (string-append "https://github.com/samtools/bcftools/"
                                  "releases/download/"
                                  version "/bcftools-" version ".tar.bz2"))
              (sha256
               (base32
                "10xgwfdgqb6dsmr3ndnpb77mc3a38dy8kh2c6czn6wj7jhdp4dra"))
              (modules '((guix build utils)))
              (snippet '(begin
                          ;; Delete bundled htslib.
                          (delete-file-recursively "htslib-1.10")))))
    (native-inputs (list htslib-1.10 perl))))

(define-public bedops
  (package
    (name "bedops")
    (version "2.4.41")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/bedops/bedops")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "046037qdxsn01ln28rbrwnc7wq4a3xahmb2k74l0w75dby5ni42l"))))
    (build-system gnu-build-system)
    (arguments
     (list
      ;; We cannot run the tests because the build system makes strange
      ;; assumptions about where executables are located.
      #:tests? #false
      #:test-target "tests"
      #:make-flags
      #~(list (string-append "CC=" #$(cc-for-target)))
      #:phases
      #~(modify-phases %standard-phases
          (add-after 'unpack 'unpack-tarballs
            (lambda _
              ;; FIXME: Bedops includes tarballs of minimally patched upstream
              ;; libraries jansson, zlib, and bzip2.  We cannot just use stock
              ;; libraries because at least one of the libraries (zlib) is
              ;; patched to add a C++ function definition (deflateInit2cpp).
              ;; Until the Bedops developers offer a way to link against system
              ;; libraries we have to build the in-tree copies of these three
              ;; libraries.

              ;; See upstream discussion:
              ;; https://github.com/bedops/bedops/issues/124

              ;; Unpack the tarballs to benefit from shebang patching.
              (with-directory-excursion "third-party"
                (invoke "tar" "xvf" "jansson-2.6.tar.bz2")
                (invoke "tar" "xvf" "zlib-1.2.7.tar.bz2")
                (invoke "tar" "xvf" "bzip2-1.0.6.tar.bz2"))
              ;; Disable unpacking of tarballs in Makefile.
              (substitute* "system.mk/Makefile.linux"
                (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
                (("\\./configure") "CONFIG_SHELL=bash ./configure"))
              (substitute* "third-party/zlib-1.2.7/Makefile.in"
                (("^SHELL=.*$") "SHELL=bash\n"))))
          (delete 'configure)
          (replace 'install
            (lambda _
              (invoke "make" "install"
                      (string-append "BINDIR=" #$output "/bin")))))))
    (native-inputs
     (list diffutils perl which))
    (home-page "https://github.com/bedops/bedops")
    (synopsis "Tools for high-performance genomic feature operations")
    (description
     "BEDOPS is a suite of tools to address common questions raised in genomic
studies---mostly with regard to overlap and proximity relationships between
data sets.  It aims to be scalable and flexible, facilitating the efficient
and accurate analysis and management of large-scale genomic data.

BEDOPS provides tools that perform highly efficient and scalable Boolean and
other set operations, statistical calculations, archiving, conversion and
other management of genomic data of arbitrary scale.  Tasks can be easily
split by chromosome for distributing whole-genome analyses across a
computational cluster.")
    (license license:gpl2+)))

(define-public bedtools
  (package
    (name "bedtools")
    (version "2.30.0")
    (source (origin
              (method url-fetch)
              (uri (string-append "https://github.com/arq5x/bedtools2/releases/"
                                  "download/v" version "/"
                                  "bedtools-" version ".tar.gz"))
              (sha256
               (base32
                "1f2hh79l7dn147c2xyfgf5wfjvlqfw32kjfnnh2n1qy6rpzx2fik"))))
    (build-system gnu-build-system)
    (arguments
     (list
      #:test-target "test"
      #:make-flags
      #~(list (string-append "prefix=" #$output))
      #:phases
      '(modify-phases %standard-phases
         (delete 'configure))))
    (native-inputs
     (list python-wrapper))
    (inputs
     (list samtools zlib))
    (home-page "https://github.com/arq5x/bedtools2")
    (synopsis "Tools for genome analysis and arithmetic")
    (description
     "Collectively, the bedtools utilities are a swiss-army knife of tools for
a wide-range of genomics analysis tasks.  The most widely-used tools enable
genome arithmetic: that is, set theory on the genome.  For example, bedtools
allows one to intersect, merge, count, complement, and shuffle genomic
intervals from multiple files in widely-used genomic file formats such as BAM,
BED, GFF/GTF, VCF.")
    (license license:expat)))

;; Later releases of bedtools produce files with more columns than
;; what Ribotaper expects.
(define-public bedtools-2.18
  (package (inherit bedtools)
    (name "bedtools")
    (version "2.18.0")
    (source (origin
              (method url-fetch)
              (uri (string-append "https://github.com/arq5x/bedtools2/"
                                  "releases/download/v" version
                                  "/bedtools-" version ".tar.gz"))
              (sha256
               (base32
                "11rvca19ncg03kxd0wzlfx5ws7r3nisd0z8s9j9n182d8ksp2pxz"))))
    (arguments
     (list
      #:test-target "test"
      #:phases
      #~(modify-phases %standard-phases
          (add-after 'unpack 'compatibility
            (lambda _
              (substitute* "src/utils/fileType/FileRecordTypeChecker.h"
                (("static const float PERCENTAGE")
                 "static constexpr float PERCENTAGE"))
              (substitute* "src/utils/general/DualQueue.h"
                (("template <class T, template<class T> class CompareFunc>")
                 "template <class T, template<class U> class CompareFunc>"))
              (substitute* '("src/utils/BamTools/src/api/algorithms/Sort.h"
                             "src/utils/BamTools/src/api/internal/bam/BamMultiMerger_p.h")
                (("(bool operator\\(\\).*) \\{" m pre)
                 (string-append pre " const {")))))
          (delete 'configure)
          (replace 'install
            (lambda _
              (let ((bin (string-append #$output "/bin/")))
                (for-each (lambda (file)
                            (install-file file bin))
                          (find-files "bin" ".*"))))))))
    (native-inputs
     (list python-wrapper))
    (inputs
     (list samtools zlib))))

(define-public bitmapperbs
  (package
    (name "bitmapperbs")
    (version "1.0.2.3")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/chhylp123/BitMapperBS/")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "02ksssfnvmpskld0a2016smfz5nrzm3d90v8974f3cpzywckvp8v"))
              (modules '((guix build utils)))
              ;; This package bundles a modified copy of htslib, so we cannot
              ;; unbundle it.
              (snippet
               '(begin
                  (delete-file-recursively "libdivsufsort-2.0.1")
                  (delete-file-recursively "pSAscan-0.1.0")))))
    (build-system gnu-build-system)
    (arguments
     (list
      #:tests? #false
      #:make-flags '(list "bitmapperBS")
      ;; The build system checks for CPU features.  For this reason, we want
      ;; users to build it locally instead of using substitutes.
      #:substitutable? #false
      #:phases
      #~(modify-phases %standard-phases
          (add-after 'unpack 'fix-build-system
            (lambda _
              (substitute* "Makefile"
                (("make prefix=../htslib_aim install")
                 (string-append "make prefix=" #$output " install-so"))
                (("htslib_aim/include") "htslib")
                (("htslib_aim/lib")
                 (string-append #$output "/lib")))))
          (add-after 'unpack 'patch-references-to-psascan
            (lambda* (#:key inputs #:allow-other-keys)
              (substitute* "Makefile"
                (("\"(./)?psascan" pre all)
                 (string-append "\"" pre (search-input-file inputs "/bin/psascan"))))))
          (delete 'configure)
          (replace 'install
            (lambda _
              (install-file "bitmapperBS"
                            (string-append #$output "/bin/")))))))
    (inputs
     (list libdivsufsort psascan zlib))
    (home-page "https://github.com/chhylp123/BitMapperBS/")
    (synopsis "Read aligner for whole-genome bisulfite sequencing")
    (description
     "BitMapperBS is memory-efficient aligner that is designed for
whole-genome bisulfite sequencing (WGBS) reads from directional protocol.")
    (supported-systems '("x86_64-linux"))
    (license license:asl2.0)))

(define-public bustools
  (package
    (name "bustools")
    (version "0.43.2")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/BUStools/bustools")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "068kjlc4d528269nl5mc3j8h2c95r1v545d3fi1iw1ckg8rba0hg"))))
    (build-system cmake-build-system)
    (arguments (list #:tests? #f))          ;no test target
    (inputs (list zlib))
    (home-page "https://bustools.github.io")
    (synopsis "Tools for working with BUS files")
    (description "bustools is a program for manipulating BUS files for single
cell RNA-Seq datasets.  It can be used to error correct barcodes, collapse
UMIs, produce gene count or transcript compatibility count matrices, and is useful
for many other tasks.")
    (license license:bsd-2)))

(define-public cellsnp-lite
  ;; Last release is from November 2021 and does not contain fixes.
  (let ((commit "0885d746b0b1ea65c8ef92f8943ca7669ca9734a")
        (revision "0"))
    (package
      (name "cellsnp-lite")
      (version (git-version "1.2.2" revision commit))
      (source (origin
                (method git-fetch)
                (uri (git-reference
                      (url "https://github.com/single-cell-genetics/cellsnp-lite")
                      (commit commit)))
                (file-name (git-file-name name version))
                (sha256
                 (base32
                  "1qrvqgbvw6mbhpyqvqbmvv8dmyc67bsk1041cn7ib6zmd47qm444"))))
      (build-system gnu-build-system)
      (arguments
       (list
        #:configure-flags
        #~(list (string-append "--with-htslib="
                               #$(this-package-input "htslib")))))
      (inputs
       (list curl
             htslib
             openssl
             zlib))
      (native-inputs
       (list autoconf))
      (home-page "https://cellsnp-lite.readthedocs.io")
      (synopsis "Pileup expresses alleles in single-cell or bulk RNA-seq data")
      (description
       "This package is designed to pileup the expressed alleles in
single-cell or bulk RNA-seq data, which can be directly used for donor
deconvolution in multiplexed single-cell RNA-seq data, particularly with other
packages, which assigns cells to donors and detects doublets as vireo, even
without genotyping reference.

This package is the C version of the deprecated cellSNP implemented in Python.
Compared to cellSNP, this package is more efficient with higher speed and less
memory usage.")
      (license license:asl2.0))))

(define-public cpat
  (package
    (name "cpat")
    (version "3.0.4")
    (source (origin
              (method url-fetch)
              (uri (pypi-uri "CPAT" version))
              (sha256
               (base32
                "0dfrwwbhv1n4nh2a903d1qfb30fgxgya89sa70aci3wzf8h2z0vd"))
              (modules '((guix build utils)))
              (snippet
               '(for-each delete-file-recursively
                          (list ".eggs"
                                "lib/__pycache__/"
                                "lib/cpmodule/__pycache__/")))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:phases
      '(modify-phases %standard-phases
         (replace 'check
           (lambda* (#:key tests? #:allow-other-keys)
             (when tests?
               (with-directory-excursion "test"
                 ;; There is no test4.fa
                 (substitute* "test.sh"
                   ((".*-g test4.fa.*") ""))
                 (invoke "bash" "test.sh"))))))))
    (propagated-inputs
     (list python-numpy python-pysam))
    (inputs
     (list r-minimal))
    (home-page "https://wlcb.oit.uci.edu/cpat/")
    (synopsis "Alignment-free distinction between coding and noncoding RNA")
    (description
     "CPAT is a method to distinguish coding and noncoding RNA by using a
logistic regression model based on four pure sequence-based, linguistic
features: ORF size, ORF coverage, Ficket TESTCODE, and Hexamer usage bias.
Linguistic features based method does not require other genomes or protein
databases to perform alignment and is more robust.  Because it is
alignment-free, it runs much faster and also easier to use.")
    (license license:gpl2+)))

(define-public pbcopper
  (package
    (name "pbcopper")
    (version "2.0.0")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/PacificBiosciences/pbcopper")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "04mgmla96bsmr9gijbn3ibspry625cv4kqqxv70z4jq4qc407jy3"))))
    (build-system meson-build-system)
    (arguments
     `(#:phases
       (modify-phases %standard-phases
         (add-after 'unpack 'patch-meson-files
           (lambda _
             (substitute* "meson.build"
               (("-msse4.1") "")))))))
    (inputs
     (list boost))
    (native-inputs
     (list googletest pkg-config))
    (home-page "https://github.com/PacificBiosciences/pbcopper")
    (synopsis "Data structures, algorithms, and utilities for PacBio C++ applications")
    (description
     "The pbcopper library provides a suite of data structures, algorithms,
and utilities for PacBio C++ applications.")
    (license license:bsd-3)))

(define-public r-anndatar
  (let ((commit "5c3eb7e498d0d9bf1c522ad66f4eb8ad277238b6")
        (revision "1"))
    (package
      (name "r-anndatar")
      (version (git-version "0.99.0" revision commit))
      (source
       (origin
         (method git-fetch)
         (uri (git-reference
               (url "https://github.com/scverse/anndataR")
               (commit commit)))
         (file-name (git-file-name name version))
         (sha256
          (base32 "0sx87i8cb4p08ihgpgflxs0fhkr1kw6lxvky4w766rq7wqy41cgk"))))
      (properties `((upstream-name . "anndataR")))
      (build-system r-build-system)
      (propagated-inputs (list r-matrix r-r6))
      (native-inputs (list r-knitr))
      (home-page "https://github.com/scverse/anndataR")
      (synopsis "AnnData interoperability in R")
      (description
       "This package aims to bring the power and flexibility of @code{AnnData}
to the R ecosystem, allowing you to effortlessly manipulate and analyze your
single-cell data.  This package lets you work with backed h5ad and zarr files,
directly access various slots (e.g. X, obs, var), or convert the data into
@code{SingleCellExperiment} and Seurat objects.")
      (license license:expat))))

(define-public r-anpan
  (let ((commit "286b88dcf5e9e963a595482139aade154ee1dc86")
        (revision "1"))
    (package
      (name "r-anpan")
      (version (git-version "0.3.0" revision commit))
      (source
       (origin
         (method git-fetch)
         (uri (git-reference
               (url "https://github.com/biobakery/anpan")
               (commit commit)))
         (file-name (git-file-name name version))
         (sha256
          (base32 "10nw5v69gn4pxb4g5gd8nh9r1ywd6yczapl3dpdfms0434wcmkxm"))))
      (properties `((upstream-name . "anpan")))
      (build-system r-build-system)
      (propagated-inputs (list r-ape
                               r-cmdstanr
                               r-data-table
                               r-dplyr
                               r-fastglm
                               r-furrr
                               r-future
                               r-ggdendro
                               r-ggnewscale
                               r-ggplot2
                               r-loo
                               r-mass
                               r-patchwork
                               r-phylogram
                               r-posterior
                               r-progressr
                               r-purrr
                               r-r-utils
                               r-stringr
                               r-tibble
                               r-tidyselect))
      (native-inputs (list r-knitr))
      (home-page "https://github.com/biobakery/anpan")
      (synopsis "Quantifying microbial strain-host associations")
      (description
       "The goal of anpan is to consolidate statistical methods for strain
analysis.  This includes automated filtering of metagenomic functional
profiles, testing genetic elements for association with outcomes, phylogenetic
association testing, and pathway-level random effects models.")
      (license license:expat))))

(define-public r-bedtorch
  (let ((commit "f5ff4f83b94f59eac660333c64e4b2f296b35cea")
        (revision "1"))
    (package
      (name "r-bedtorch")
      (version (git-version "0.1.12.12" revision commit))
      (source (origin
                (method git-fetch)
                (uri (git-reference
                      (url "https://github.com/haizi-zh/bedtorch/")
                      (commit commit)))
                (file-name (git-file-name name version))
                (sha256
                 (base32
                  "08l04iqf54b5995gc7rvqqd7w327fwqs7qjyhc9y5cqfj8yv4c48"))))
      (properties `((upstream-name . "bedtorch")))
      (build-system r-build-system)
      (propagated-inputs
       (list r-assertthat
             r-curl
             r-data-table
             r-dplyr
             r-genomeinfodb
             r-genomicranges
             r-purrr
             r-r-utils
             r-rcpp
             r-rcurl
             r-readr
             r-rhtslib12
             r-s4vectors
             r-stringr
             r-tidyr))
      (home-page "https://github.com/haizi-zh/bedtorch/")
      (synopsis "R package for fast BED-file manipulation")
      (description
       "The goal of bedtorch is to provide a fast BED file manipulation tool
suite native in R.")
      (license license:expat))))

(define-public r-bpcells
  (let ((commit "32ce67312185d3ed1046b4218dd3aaf1b35dcfda")
        (revision "1"))
    (package
      (name "r-bpcells")
      (version (git-version "0.1.0" revision commit))
      (source (origin
                (method git-fetch)
                (uri (git-reference
                      (url "https://github.com/bnprks/BPCells/")
                      (commit commit)))
                (file-name (git-file-name name version))
                (sha256
                 (base32
                  "0im4sqvbii326acmd1hnimyzsllnbvnh9al3dp1nla6isgi7s6cg"))))
      (properties `((upstream-name . "BPCells")))
      (build-system r-build-system)
      (arguments
       (list
        #:phases
        '(modify-phases %standard-phases
           (add-after 'unpack 'do-not-tune
             (lambda _
               (substitute* "configure"
                 (("\"-march=native\"") "\"\"")))))))
      (inputs (list hdf5 zlib))
      (propagated-inputs (list r-dplyr
                               r-ggplot2
                               r-ggrepel
                               r-hexbin
                               r-magrittr
                               r-matrix
                               r-patchwork
                               r-rcolorbrewer
                               r-rcpp
                               r-rcppeigen
                               r-rlang
                               r-scales
                               r-scattermore
                               r-stringr
                               r-tibble
                               r-tidyr
                               r-vctrs))
      (native-inputs (list pkg-config))
      (home-page "https://github.com/bnprks/BPCells/")
      (synopsis "Single cell counts matrices to PCA")
      (description
       "This is a package providing efficient operations for single cell
ATAC-seq fragments and RNA counts matrices.  It is interoperable with standard
file formats, and introduces efficient bit-packed formats that allow large
storage savings and increased read speeds.")
      (license license:gpl3))))

(define-public r-btools
  (let ((commit "fa21d4ca01d37ea4d98b45582453f3bf95cbc2b5")
        (revision "1"))
    (package
      (name "r-btools")
      (version (git-version "0.0.1" revision commit))
      (source (origin
                (method git-fetch)
                (uri (git-reference
                      (url "https://github.com/twbattaglia/btools")
                      (commit commit)))
                (file-name (git-file-name name version))
                (sha256
                 (base32
                  "0bca593dnxz6xdywpdi0ipli0paln2b3bfxxj0csnmj55ryrz428"))))
      (properties `((upstream-name . "btools")))
      (build-system r-build-system)
      (propagated-inputs
       (list r-biomformat
             r-coin
             r-deseq2
             r-dplyr
             r-genefilter
             r-phyloseq
             r-picante
             r-plotly
             r-reshape2
             r-stringr
             r-vegan))
      (home-page "https://github.com/twbattaglia/btools")
      (synopsis "R functions for microbial diversity analyses")
      (description
       "This package provides an assortment of R functions that is suitable
for all types of microbial diversity analyses.")
      (license license:expat))))

(define-public r-codeandroll2
  (let ((commit "d58e258851a5c0b430e8620d34dbeefb597c548f")
        (revision "1"))
    (package
      (name "r-codeandroll2")
      (version (git-version "2.3.6" revision commit))
      (source (origin
                (method git-fetch)
                (uri (git-reference
                      (url "https://github.com/vertesy/CodeAndRoll2")
                      (commit commit)))
                (file-name (git-file-name name version))
                (sha256
                 (base32
                  "0sy88mfgw6qqhpnlc5020qzr1jllkcrxfhl2lw42bkl5nb56is71"))))
      (properties `((upstream-name . "CodeAndRoll2")))
      (build-system r-build-system)
      (propagated-inputs (list r-colorramps
                               r-dplyr
                               r-gplots
                               r-gtools
                               r-plyr
                               r-rcolorbrewer
                               r-sessioninfo
                               r-sm
                               r-stringendo
                               r-stringr))
      (home-page "https://github.com/vertesy/CodeAndRoll2")
      (synopsis "CodeAndRoll2 for vector, matrix and list manipulations")
      (description
       "CodeAndRoll2 is a set of more than 130 productivity functions.
These functions are used by MarkdownReports, ggExpress, and SeuratUtils.")
      (license license:gpl3))))

(define-public r-conospanel
  (let ((commit "39e76b201a783b4e92fd615010a735a61746fbb9")
        (revision "1"))
    (package
      (name "r-conospanel")
      (version (git-version "1.0.0" revision commit))
      (source
       (origin
         (method git-fetch)
         (uri (git-reference
               (url "https://github.com/kharchenkolab/conosPanel")
               (commit commit)))
         (file-name (git-file-name name version))
         (sha256
          (base32 "1zf0aj5d4iaxc3ghvjnaja5qby1avlmljzh94bpyvxbd359z9snn"))))
      (properties `((upstream-name . "conosPanel")))
      (build-system r-build-system)
      (home-page "https://github.com/kharchenkolab/conosPanel")
      (synopsis "Data for the conos package")
      (description "The data within this package is a panel of four samples,
each with 3000 cells.  There are two samples which are bone marrow (BM), and
two samples which are cord blood (CB).")
      (license license:gpl3))))

(define-public r-conqur
  (let ((commit "c7a88794efd4ecfe4d96988dceeec3b410222e48")
        (revision "1"))
    (package
      (name "r-conqur")
      (version (git-version "2.0" revision commit))
      (source (origin
                (method git-fetch)
                (uri (git-reference
                      (url "https://github.com/wdl2459/ConQuR")
                      (commit commit)))
                (file-name (git-file-name name version))
                (sha256
                 (base32
                  "19a7p2l67mgjy99i5ksjxlhzaqmrnyi1vzvwnhgnx2jrr6crj7rq"))))
      (properties `((upstream-name . "ConQuR")))
      (build-system r-build-system)
      (propagated-inputs (list r-ade4
                               r-ape
                               r-compositions
                               r-cqrreg
                               r-doparallel
                               r-dplyr
                               r-fastdummies
                               r-glmnet
                               r-gplots
                               r-gunifrac
                               r-quantreg
                               r-randomforest
                               r-rocr
                               r-vegan))
      (native-inputs (list r-knitr))
      (home-page "https://github.com/wdl2459/ConQuR")
      (synopsis "Batch effects removal for microbiome data")
      (description
       "This package conducts batch effects removal from a taxa read count
table by a conditional quantile regression method.  The distributional
attributes of microbiome data - zero-inflation and over-dispersion, are
simultaneously considered.")
      (license license:gpl3))))

(define-public r-ewastools
  (let ((commit "f7646cacd73266708479b3fea5d625054d179f95")
        (revision "1"))
    (package
      (name "r-ewastools")
      (version (git-version "1.7.2" revision commit))
      (source
       (origin
         (method git-fetch)
         (uri (git-reference
               (url "https://github.com/hhhh5/ewastools/")
               (commit commit)))
         (file-name (git-file-name name version))
         (sha256
          (base32 "0irarlnxfnasa755adxsn67rxsy01zwhjhw18g4cag08cqiyyw41"))))
      (properties `((upstream-name . "ewastools")))
      (build-system r-build-system)
      (propagated-inputs
       (list r-data-table
             r-igraph
             r-illuminaio
             r-mblm
             r-quadprog))
      (native-inputs (list r-knitr))
      (home-page "https://github.com/hhhh5/ewastools/")
      (synopsis
       "Quality control toolset for the Illumina Infinium DNA methylation")
      (description
       "This package provides a collection of useful functions for working
with DNA methylation micro-array data.")
      (license license:unlicense))))

(define-public r-numbat
  (let ((commit "4ab7752e7d267a3f443756675728521a9b0a7295")
        (revision "1"))
    (package
      (name "r-numbat")
      (version (git-version "1.3.2-1" revision commit))
      (source (origin
                (method git-fetch)
                (uri (git-reference
                      (url "https://github.com/kharchenkolab/numbat")
                      (commit commit)))
                (file-name (git-file-name name version))
                (sha256
                 (base32
                  "0wa2cz5iy570r2a57bd74jramxayvfhmxznb0vq4vyk1ia8l5jd1"))))
      (properties `((upstream-name . "numbat")))
      (build-system r-build-system)
      (propagated-inputs
       (list r-ape
             r-catools
             r-data-table
             r-dendextend
             r-dplyr
             r-genomicranges
             r-ggplot2
             r-ggraph
             r-ggtree
             r-glue
             r-igraph
             r-iranges
             r-logger
             r-magrittr
             r-matrix
             r-optparse
             r-paralleldist
             r-patchwork
             r-pryr
             r-purrr
             r-r-utils
             r-rcpp
             r-rcpparmadillo
             r-rhpcblasctl
             r-roptim
             r-scales
             r-scistreer
             r-stringr
             r-tibble
             r-tidygraph
             r-tidyr
             r-vcfr
             r-zoo))
      (home-page "https://github.com/kharchenkolab/numbat")
      (synopsis "Haplotype-Aware CNV Analysis from scRNA-Seq")
      (description
       "This package provides a computational method that infers copy number
variations (CNVs) in cancer scRNA-seq data and reconstructs the tumor
phylogeny.  numbat integrates signals from gene expression, allelic ratio, and
population haplotype structures to accurately infer allele-specific CNVs in
single cells and reconstruct their lineage relationship.  numbat can be used
to:

@enumerate
@item detect allele-specific copy number variations from single-cells;
@item differentiate tumor versus normal cells in the tumor microenvironment;
@item infer the clonal architecture and evolutionary history of profiled
tumors.
@end enumerate

numbat does not require tumor/normal-paired DNA or genotype data, but operates
solely on the donor scRNA-data data (for example, 10x Cell Ranger output).")
      (license license:expat))))

(define-public r-p2data
  (let ((commit "7d4c0e17d7899f9d9b08ab2bf455abe150912f4c")
        (revision "1"))
    (package
      (name "r-p2data")
      (version (git-version "1.0.0" revision commit))
      (source
       (origin
         (method git-fetch)
         (uri (git-reference
               (url "https://github.com/kharchenkolab/p2data")
               (commit commit)))
         (file-name (git-file-name name version))
         (sha256
          (base32 "1hadrldldxvhqs43aqs3c88bqfgql3wcfkbll3jz7fh6z7p3x324"))))
      (properties `((upstream-name . "p2data")))
      (build-system r-build-system)
      (home-page "https://github.com/kharchenkolab/p2data")
      (synopsis "Data for pagoda2")
      (description "This package contains data used by pagoda2.  The data
within this package are the 3000 bone marrow cells used for vignettes.")
      (license license:gpl3))))

(define-public r-rhtslib12
  (let ((commit "ee186daf04876969c7f31c16a0e0fda8e7c16a30")
        (revision "1"))
    (package
      (name "r-rhtslib12")
      (version (git-version "1.23.2" revision commit))
      (source (origin
                (method git-fetch)
                (uri (git-reference
                      (url "https://github.com/haizi-zh/Rhtslib12")
                      (commit commit)))
                (file-name (git-file-name name version))
                (sha256
                 (base32
                  "0a3kkp0phi2fq6ip8p9vfj3axn7l15f2mb51a6v3ai4nlkhqqawj"))))
      (properties `((upstream-name . "Rhtslib12")))
      (build-system r-build-system)
      (propagated-inputs (list curl zlib r-zlibbioc))
      (native-inputs (list pkg-config r-knitr))
      (home-page "https://github.com/haizi-zh/Rhtslib12")
      (synopsis "HTSlib high-throughput sequencing library as an R package")
      (description
       "This package provides version 1.12 of the HTSlib C library for
high-throughput sequence analysis.  The package is primarily useful to
developers of other R packages who wish to make use of HTSlib.")
      (license license:lgpl2.0+))))

(define-public r-scenic
  (let ((commit "cedf8490a634da550cea2c831544e5f7f14467d2")
        (revision "1"))
    (package
      (name "r-scenic")
      (version (git-version "1.3.1" revision commit))
      (source
       (origin
         (method git-fetch)
         (uri (git-reference
               (url "https://github.com/aertslab/SCENIC")
               (commit commit)))
         (file-name (git-file-name name version))
         (sha256
          (base32 "17ai0q260hdqbvm1km1s5dw93pgz4f546ycfii57jyy9m9jka7r0"))))
      (properties `((upstream-name . "SCENIC")))
      (build-system r-build-system)
      (propagated-inputs (list r-aucell
                               r-data-table
                               r-dynamictreecut
                               r-genie3
                               r-ggrepel
                               r-mixtools
                               r-nmf
                               r-rcistarget
                               r-rtsne))
      (native-inputs (list r-knitr))
      (home-page "https://github.com/aertslab/SCENIC")
      (synopsis
       "SCENIC (Single Cell rEgulatory Network Inference and Clustering)")
      (description "SCENIC (Single-cell regulatory network inference and
clustering) is an R package to infer Gene Regulatory Networks and cell types
from single-cell RNA-seq data.")
      ;; As of commit cedf8490a634da550cea2c831544e5f7f14467d2 the license is
      ;; GPLv3.
      (license license:gpl3))))

(define-public r-scent
  (let ((commit "f01f18ac30f8a9bcf85b738c6c7815017e2c8ee5")
        (revision "1"))
    (package
      (name "r-scent")
      (version (git-version "1.0.0" revision commit))
      (source
       (origin
         (method git-fetch)
         (uri (git-reference
               (url "https://github.com/immunogenomics/SCENT")
               (commit commit)))
         (file-name (git-file-name name version))
         (sha256
          (base32 "18krksy8ac7yy8hghzaxscj91c61j195yg7j60zswdq97islvfzi"))))
      (properties `((upstream-name . "SCENT")))
      (build-system r-build-system)
      (inputs (list bedtools))
      (propagated-inputs (list r-boot
                               r-data-table
                               r-hmisc
                               r-lme4
                               r-mass
                               r-matrix
                               r-r-utils
                               r-stringr))
      (native-inputs (list r-knitr))
      (home-page "https://github.com/immunogenomics/SCENT")
      (synopsis
       "Single-Cell enhancer target gene mapping for multimodal single-cell data")
      (description
       "This package contains functions for the SCENT algorithm.
SCENT uses single-cell multimodal data and links ATAC-seq peaks
to their target genes by modeling association between chromatin
accessibility and gene expression across individual single cells.")
      (license license:expat))))

(define-public r-saige
  (let ((commit "c6717ba9c5a967bcf612e97566d845397b1b7167")
        (revision "1"))
    (package
      (name "r-saige")
      (version (git-version "1.3.4" revision commit))
      (source
       (origin
         (method git-fetch)
         (uri (git-reference
               (url "https://github.com/saigegit/SAIGE")
               (commit commit)))
         (file-name (git-file-name name version))
         (sha256
          (base32 "0c3211whqazycs9ivwdz23imj45j4na2xzcfq5l989ykkgmqnjzs"))))
      (properties `((upstream-name . "SAIGE")))
      (build-system r-build-system)
      (arguments
       (list
        #:phases
        '(modify-phases %standard-phases
           (add-after 'unpack 'do-not-download-packages
             (lambda _
               ;; Pretend to be a Conda build to avoid having to install
               ;; things with pip and cget.
               (setenv "CONDA_BUILD" "1")))
           (add-after 'unpack 'link-with-openblas
             (lambda* (#:key inputs #:allow-other-keys)
               (substitute* "src/Makevars"
                 (("-llapack")
                  (search-input-file inputs
                                     "/lib/libopenblas.so"))))))))
      (inputs (list openblas savvy superlu zlib zstd))
      (propagated-inputs (list r-bh
                               r-data-table
                               r-dplyr
                               r-matrix
                               r-metaskat
                               r-optparse
                               r-qlcmatrix
                               r-rcpp
                               r-rcpparmadillo
                               r-rcppeigen
                               r-rcppparallel
                               r-rhpcblasctl
                               r-rsqlite
                               r-skat
                               r-spatest))
      (home-page "https://github.com/saigegit/SAIGE")
      (synopsis "Genome-wide association tests in large-scale data sets")
      (description "SAIGE is a package for efficiently controlling for
case-control imbalance and sample relatedness in single-variant assoc
tests (SAIGE) and controlling for sample relatedness in region-based assoc
tests in large cohorts and biobanks (SAIGE-GENE+).")
      (license license:gpl2+))))

(define-public r-singlet
  (let ((commit "765a6c45081807a1522f0e8983e2417822a36f36")
        (revision "1"))
    (package
      (name "r-singlet")
      (version (git-version "0.99.26" revision commit))
      (source (origin
                (method git-fetch)
                (uri (git-reference
                      (url "https://github.com/zdebruine/singlet")
                      (commit commit)))
                (file-name (git-file-name name version))
                (sha256
                 (base32
                  "040v8wzl9qr8ribr6qss61fz4698d14cqs8nxbc8hqwiqlpy3vs4"))))
      (properties `((upstream-name . "singlet")))
      (build-system r-build-system)
      (propagated-inputs (list r-dplyr
                               r-fgsea
                               r-ggplot2
                               r-knitr
                               r-limma
                               r-matrix
                               r-msigdbr
                               r-rcpp
                               r-rcppml/devel
                               r-reshape2
                               r-scuttle
                               r-seurat))
      (native-inputs (list r-knitr))
      (home-page "https://github.com/zdebruine/singlet")
      (synopsis "Non-negative Matrix Factorization for single-cell analysis")
      (description
       "This is a package for fast @dfn{Non-negative Matrix
Factorization} (NMF) with automatic rank-determination for dimension reduction
of single-cell data using Seurat, RcppML nmf, SingleCellExperiments and
similar.")
      (license license:gpl2+))))

(define-public r-stacas
  (package
    (name "r-stacas")
    (version "2.2.0")
    (source
     (origin
       (method git-fetch)
       (uri (git-reference
             (url "https://github.com/carmonalab/STACAS")
             (commit version)))
       (file-name (git-file-name name version))
       (sha256
        (base32 "13i0h5i6vlbrb8ndq9gr81560z9d74b2c7m3rjfzls01irjza9hm"))))
    (properties `((upstream-name . "STACAS")))
    (build-system r-build-system)
    (propagated-inputs
     (list r-biocneighbors
           r-biocparallel
           r-ggplot2
           r-ggridges
           r-pbapply
           r-r-utils
           r-seurat))
    (home-page "https://github.com/carmonalab/STACAS")
    (synopsis "Sub-type anchoring correction for alignment in Seurat")
    (description
     "This package implements methods for batch correction and integration of
scRNA-seq datasets, based on the Seurat anchor-based integration framework.
In particular, STACAS is optimized for the integration of heterogenous
datasets with only limited overlap between cell sub-types (e.g. TIL sets of
CD8 from tumor with CD8/CD4 T cells from lymphnode), for which the default
Seurat alignment methods would tend to over-correct biological differences.
The 2.0 version of the package allows the users to incorporate explicit
information about cell-types in order to assist the integration process.")
    (license license:gpl3)))

(define-public r-stringendo
  (let ((commit "15594b1bba11048a812874bafec0eea1dcc8618a")
        (revision "1"))
    (package
      (name "r-stringendo")
      (version (git-version "0.6.0" revision commit))
      (source (origin
                (method git-fetch)
                (uri (git-reference
                      (url "https://github.com/vertesy/Stringendo")
                      (commit commit)))
                (file-name (git-file-name name version))
                (sha256
                 (base32
                  "15ij4zf2j9c8m9n4bqhmxkchjh2bhddwjfxngfpwv7c5wjqyi6ir"))))
      (properties `((upstream-name . "Stringendo")))
      (build-system r-build-system)
      (propagated-inputs (list r-clipr))
      (home-page "https://github.com/vertesy/Stringendo")
      (synopsis "Stringendo is a string parsing library")
      (description
       "This package provides string parsing functionalites for generating
plotnames, filenames and paths.")
      (license license:gpl3))))

(define-public r-readwriter
  (let ((commit "91373c44641014a1ce8e1c3e928747608aae8f54")
        (revision "1"))
    (package
      (name "r-readwriter")
      (version (git-version "1.5.3" revision commit))
      (source (origin
                (method git-fetch)
                (uri (git-reference
                      (url "https://github.com/vertesy/ReadWriter")
                      (commit commit)))
                (file-name (git-file-name name version))
                (sha256
                 (base32
                  "156kvmplrip0w1zhs9yl5r0ayjipa0blhy614l65hbsjn1lwbskr"))))
      (properties `((upstream-name . "ReadWriter")))
      (build-system r-build-system)
      (propagated-inputs
       (list r-gtools r-openxlsx r-readr r-stringendo))
      (home-page "https://github.com/vertesy/ReadWriter")
      (synopsis "Functions to read and write files conveniently")
      (description
       "ReadWriter is a set of R functions to read and write files
conveniently.")
      (license license:gpl3))))

(define-public r-streamgraph
  (let ((commit "76f7173ec89d456ace5943a512e20b1f6810bbcb")
        (revision "1"))
    (package
      (name "r-streamgraph")
      (version (git-version "0.9.0" revision commit))
      (source (origin
                (method git-fetch)
                (uri (git-reference
                      (url "https://github.com/hrbrmstr/streamgraph")
                      (commit commit)))
                (file-name (git-file-name name version))
                (sha256
                 (base32
                  "010rhnby5a9dg08jvlkr65b3p9iipdxi2f5m1k6j53s80p25yvig"))))
      (properties `((upstream-name . "streamgraph")))
      (build-system r-build-system)
      (propagated-inputs
       (list r-dplyr
             r-htmltools
             r-htmlwidgets
             r-magrittr
             r-tidyr
             r-xts))
      (native-inputs (list r-knitr))
      (home-page "https://github.com/hrbrmstr/streamgraph")
      (synopsis "Htmlwidget for building streamgraph visualizations")
      (description
       "A streamgraph is a type of stacked area chart.  It represents the
evolution of a numeric variable for several groups.  Areas are usually
displayed around a central axis, and edges are rounded to give a flowing
shape.  This package provides an @code{htmlwidget} for building streamgraph
visualizations.")
      (license license:expat))))

(define-public r-wasabi
  (let ((commit "8c33cabde8d18c2657cd6e38e7cb834f87cf9846")
        (revision "1"))
    (package
      (name "r-wasabi")
      (version (git-version "1.0.1" revision commit))
      (source
       (origin
         (method git-fetch)
         (uri (git-reference
               (url "https://github.com/COMBINE-lab/wasabi")
               (commit commit)))
         (file-name (git-file-name name version))
         (sha256
          (base32 "0rpdj6n4cnx8n2zl60dzgl638474sg49dknwi9x3qb4g56dpphfa"))))
      (properties `((upstream-name . "wasabi")))
      (build-system r-build-system)
      (propagated-inputs (list r-data-table r-rhdf5 r-rjson))
      (home-page "https://github.com/COMBINE-lab/wasabi")
      (synopsis "Use Sailfish and Salmon with Sleuth")
      (description
       "This package converts the output of the Sailfish and Salmon RNA-seq
quantification tools so that it can be used with the Sleuth differential
analysis package.")
      (license license:bsd-3))))

(define-public pbbam
  (package
    (name "pbbam")
    (version "2.1.0")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/PacificBiosciences/pbbam")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "1iyazi3l7dswpfxh39k5j7ydi0ywja0579xz3r6l9kkwz2n1z6dc"))))
    (build-system meson-build-system)
    (arguments
     `(#:phases
       (modify-phases %standard-phases
         (add-after 'unpack 'patch-tests
           (lambda* (#:key inputs #:allow-other-keys)
             ;; Disable this test.  I tried fixing it by including
             ;; optional_io.hpp, but there's a type error.
             (substitute* "tests/src/meson.build"
               (("'test_ReadGroupInfo.cpp',") ""))
             #;
             (substitute* "include/pbbam/ReadGroupInfo.h"
               (("#include <boost/optional.hpp>" m)
                (string-append m "\n#include <boost/optional/optional_io.hpp>")))
             (substitute* '("tests/scripts/cram/_test.py"
                            "tests/scripts/cram/_main.py")
               (("'/bin/sh'")
                (string-append "'" (which "sh") "'"))))))))
    ;; These libraries are listed as "Required" in the pkg-config file.
    (propagated-inputs
     (list htslib-1.14 pbcopper zlib))
    (inputs
     (list boost samtools))
    (native-inputs
     (list googletest
           pkg-config
           python-wrapper)) ;for tests
    (home-page "https://github.com/PacificBiosciences/pbbam")
    (synopsis "Work with PacBio BAM files")
    (description
     "The pbbam software package provides components to create, query, and
edit PacBio BAM files and associated indices.  These components include a core
C++ library, bindings for additional languages, and command-line utilities.
This library is not intended to be used as a general-purpose BAM utility - all
input and output BAMs must adhere to the PacBio BAM format specification.
Non-PacBio BAMs will cause exceptions to be thrown.")
    (license license:bsd-3)))

(define-public pbgzip
  (let ((commit "2b09f97b5f20b6d83c63a5c6b408d152e3982974"))
    (package
      (name "pbgzip")
      (version (git-version "0.0.0" "0" commit))
      (source (origin
                (method git-fetch)
                (uri (git-reference
                      (url "https://github.com/nh13/pbgzip")
                      (commit commit)))
                (file-name (git-file-name name version))
                (sha256
                 (base32
                  "1mlmq0v96irbz71bgw5zcc43g1x32zwnxx21a5p1f1ch4cikw1yd"))))
      (build-system gnu-build-system)
      (native-inputs
       (list autoconf automake))
      (inputs
       (list zlib))
      (home-page "https://github.com/nh13/pbgzip")
      (synopsis "Parallel Block GZIP")
      (description "This package implements parallel block gzip.  For many
formats, in particular genomics data formats, data are compressed in
fixed-length blocks such that they can be easily indexed based on a (genomic)
coordinate order, since typically each block is sorted according to this order.
This allows for each block to be individually compressed (deflated), or more
importantly, decompressed (inflated), with the latter enabling random retrieval
of data in large files (gigabytes to terabytes).  @code{pbgzip} is not limited
to any particular format, but certain features are tailored to genomics data
formats when enabled.  Parallel decompression is somewhat faster, but the true
speedup comes during compression.")
      (license license:expat))))

(define-public blasr-libcpp
  (package
    (name "blasr-libcpp")
    (version "5.3.5")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/PacificBiosciences/blasr_libcpp")
                    (commit version)))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "07cdfnfl29zf2j7fpaaqaxghq3p0wnc109razs0icwm2q6l3gycb"))))
    (build-system meson-build-system)
    (arguments
     `(#:phases
       (modify-phases %standard-phases
         (add-after 'unpack 'link-with-hdf5
           (lambda* (#:key inputs #:allow-other-keys)
             (let ((hdf5 (assoc-ref inputs "hdf5")))
               (substitute* "meson.build"
                 (("libblasr_deps = \\[" m)
                  (string-append
                   m
                   (format #f "cpp.find_library('hdf5', dirs : '~a'), \
cpp.find_library('hdf5_cpp', dirs : '~a'), "
                           hdf5 hdf5)))))))
         (add-after 'unpack 'find-googletest
           (lambda* (#:key inputs #:allow-other-keys)
             ;; It doesn't find gtest_main because there's no pkg-config file
             ;; for it.  Find it another way.
             (substitute* "unittest/meson.build"
               (("libblasr_gtest_dep = dependency\\('gtest_main'.*")
                (format #f "cpp = meson.get_compiler('cpp')
libblasr_gtest_dep = cpp.find_library('gtest_main', dirs : '~a')\n"
                        (assoc-ref inputs "googletest")))))))
       ;; TODO: unittest/libblasr_unittest cannot be linked
       ;; ld: ;; unittest/df08227@@libblasr_unittest@exe/alignment_utils_FileUtils_gtest.cpp.o:
       ;; undefined reference to symbol
       ;; '_ZN7testing8internal9DeathTest6CreateEPKcPKNS0_2REES3_iPPS1_'
       ;; ld: /gnu/store/...-googletest-1.8.0/lib/libgtest.so:
       ;;   error adding symbols: DSO missing from command line
       #:tests? #f
       #:configure-flags '("-Dtests=false")))
    (inputs
     (list boost hdf5 htslib pbbam zlib))
    (native-inputs
     (list googletest pkg-config))
    (home-page
     (string-append "https://web.archive.org/web/20201106122415/"
                    "https://github.com/PacificBiosciences/blasr_libcpp"))
    (synopsis "Library for analyzing PacBio genomic sequences")
    (description
     "This package provides three libraries used by applications for analyzing
PacBio genomic sequences.  This library contains three sub-libraries: pbdata,
hdf and alignment.")
    (license license:bsd-3)))

(define-public blasr
  (package
    (name "blasr")
    (version "5.3.5")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/PacificBiosciences/blasr")
                    (commit version)))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "0axyd06gn2xa0p0k76fihsbxpfxvhlb18jn6bf97c0ii58r1wc0k"))))
    (build-system meson-build-system)
    (arguments
     `(#:phases
       (modify-phases %standard-phases
         (add-after 'unpack 'link-with-hdf5
           (lambda* (#:key inputs #:allow-other-keys)
             (let ((hdf5 (assoc-ref inputs "hdf5")))
               (substitute* "meson.build"
                 (("blasr_deps = \\[" m)
                  (string-append
                   m
                   (format #f "cpp.find_library('hdf5', dirs : '~a'), \
cpp.find_library('hdf5_cpp', dirs : '~a'), "
                           hdf5 hdf5))))))))
       ;; Tests require "cram" executable, which is not packaged.
       #:tests? #f
       #:configure-flags '("-Dtests=false")))
    (inputs
     (list boost blasr-libcpp hdf5 pbbam zlib))
    (native-inputs
     (list pkg-config))
    (home-page (string-append "https://web.archive.org/web/20210813124135/"
                              "https://github.com/PacificBiosciences/blasr"))
    (synopsis "PacBio long read aligner")
    (description
     "Blasr is a genomic sequence aligner for processing PacBio long reads.")
    (license license:bsd-3)))

(define-public randfold
  (package
    (name "randfold")
    (version "2.0.1")
    (source
     (origin
       (method url-fetch)
       (uri (string-append "http://bioinformatics.psb.ugent.be/"
                           "supplementary_data/erbon/nov2003/downloads/"
                           "randfold-" version ".tar.gz"))
       (sha256
        (base32
         "0gqixl4ncaibrxmn25d6lm2hrw4ml2fj13nrc9q1kilsxdfi91mj"))))
    (build-system gnu-build-system)
    (arguments
     (list
      #:tests? #f                       ;no tests provided
      #:phases
      #~(modify-phases %standard-phases
          (delete 'configure)
          (replace 'install
            (lambda _
              (install-file "randfold"
                            (string-append #$output "/bin")))))))
    (inputs (list eddylab-squid))
    (home-page
     "http://bioinformatics.psb.ugent.be/supplementary_data/erbon/nov2003/")
    (synopsis "Minimum free energy of folding randomization test software")
    (description "randfold computes the probability that, for a given
sequence, the @dfn{Minimum Free Energy} (MFE) of the secondary structure is
different from MFE computed with random sequences.")
    (license license:gpl2)))

(define-public ribotaper
  (package
    (name "ribotaper")
    (version "1.3.1")
    (source (origin
              (method url-fetch)
              (uri (string-append "https://ohlerlab.mdc-berlin.de/"
                                  "files/RiboTaper/RiboTaper_Version_"
                                  version ".tar.gz"))
              (sha256
               (base32
                "0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
    (build-system gnu-build-system)
    (arguments
     (list
      #:phases
      #~(modify-phases %standard-phases
          (add-after 'install 'wrap-executables
            (lambda _
              (for-each
               (lambda (script)
                 (wrap-program (string-append #$output "/bin/" script)
                   `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
               '("create_annotations_files.bash"
                 "create_metaplots.bash"
                 "Ribotaper_ORF_find.sh"
                 "Ribotaper.sh")))))))
    (inputs
     (list bash-minimal
           bedtools-2.18
           samtools-0.1
           r-minimal
           r-foreach
           r-xnomial
           r-domc
           r-multitaper
           r-seqinr))
    (home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
    (synopsis "Define translated ORFs using ribosome profiling data")
    (description
     "Ribotaper is a method for defining translated @dfn{open reading
frames} (ORFs) using ribosome profiling (ribo-seq) data.  This package
provides the Ribotaper pipeline.")
    (license license:gpl3+)))

(define-public bioawk
  (package
    (name "bioawk")
    (version "1.0")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/lh3/bioawk")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "1pxc3zdnirxbf9a0az698hd8xdik7qkhypm7v6hn922x8y9qmspm"))))
    (build-system gnu-build-system)
    (inputs (list zlib))
    (native-inputs (list bison))
    (arguments
     (list
      #:tests? #f ; There are no tests to run.
      ;; Bison must generate files, before other targets can build.
      #:parallel-build? #f
      #:phases
      #~(modify-phases %standard-phases
          (delete 'configure)           ; There is no configure phase.
          (replace 'install
            (lambda _
              (let ((bin (string-append #$output "/bin"))
                    (man (string-append #$output "/share/man/man1")))
                (mkdir-p man)
                (copy-file "awk.1" (string-append man "/bioawk.1"))
                (install-file "bioawk" bin)))))))
    (home-page "https://github.com/lh3/bioawk")
    (synopsis "AWK with bioinformatics extensions")
    (description "Bioawk is an extension to Brian Kernighan's awk, adding the
support of several common biological data formats, including optionally gzip'ed
BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names.  It
also adds a few built-in functions and a command line option to use TAB as the
input/output delimiter.  When the new functionality is not used, bioawk is
intended to behave exactly the same as the original BWK awk.")
    (license license:x11)))

(define-public python-bcbio-gff
  (package
    (name "python-bcbio-gff")
    (version "0.6.9")
    (source (origin
              (method url-fetch)
              (uri (pypi-uri "bcbio-gff" version))
              (sha256
               (base32
                "1pm1szyxabhn8jismrj9cjhf88ajgcmm39f0cgf36iagw5qakprl"))))
    (build-system pyproject-build-system)
    (propagated-inputs (list python-biopython python-six))
    (native-inputs (list python-pytest))
    (home-page "https://github.com/chapmanb/bcbb/tree/master/gff")
    (synopsis "Read and write GFF files with Biopython integration")
    (description
     "This package lets you read and write files in Generic Feature
Format (GFF) with Biopython integration.")
    (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))

(define-public python-bcbio-gff/biopython-1.73
  (hidden-package
   (package
     (inherit python-bcbio-gff)
     (propagated-inputs
      (modify-inputs (package-propagated-inputs python-bcbio-gff)
        (replace "python-biopython" python-biopython-1.73))))))

;; Note: the name on PyPi is "biofluff".
(define-public python-biofluff
  (package
    (name "python-biofluff")
    (version "3.0.4")
    ;; PyPi tarball does not contain test data.
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/simonvh/fluff")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "12yvhgp72s2ygf3h07rrc852zd6q8swc41hm28mcczpsyprggxyz"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:test-flags
      ;; Theses tests require internet access
      '(list "--ignore=tests/test_mirror.py"
             "-k" "not test_plots_big")
      #:phases
      '(modify-phases %standard-phases
         (add-after 'unpack 'matplotlib-compatibility
           (lambda _
             (substitute* "fluff/plot.py"
               (("beginarrow=False, endarrow=True,") "")))))))
    (propagated-inputs
     (list htseq
           python-matplotlib
           python-numpy
           python-palettable
           python-pybedtools
           python-pybigwig
           python-pysam
           python-scikit-learn
           python-scipy))
    (native-inputs
     (list python-pytest))
    (home-page "https://github.com/simonvh/fluff/")
    (synopsis "Analysis and visualization of high-throughput sequencing data")
    (description
     "Fluff is a Python package that contains several scripts to produce
pretty, publication-quality figures for next-generation sequencing
experiments.")
    (license license:expat)))

(define-public python-bulkvis
  (let ((commit "00a82a90c7e748a34af896e779d27e78a2c82b5e")
        (revision "2"))
    (package
      (name "python-bulkvis")
      (version (git-version "2.0.0" revision commit))
      (source (origin
                (method git-fetch)
                (uri (git-reference
                      (url "https://github.com/LooseLab/bulkVis")
                      (commit commit)))
                (file-name (git-file-name name version))
                (sha256
                 (base32
                  "02blai158xyyqcg0ljzkmfa6ci05m4awrl4njvp9nwfp717xq8n0"))
                (modules '((guix build utils)))
                (snippet
                 '(substitute* '("requirements.txt"
                                 "setup.py")
                    (("tqdm~=4.46.1") "tqdm")
                    (("tornado~=6.0.4") "tornado")
                    (("pandas~=1.0.5") "pandas")
                    (("h5py~=2.10.0") "h5py")
                    ;; See below for com
                    (("bokeh~=2.1.0") "bokeh")))))
      (build-system pyproject-build-system)
      (arguments
       (list #:tests? #f                ;There are no tests
             #:phases
             '(modify-phases %standard-phases
                ;; See https://github.com/LooseLab/bulkvis/issues/58
                (add-after 'unpack 'bokeh-compatibility
                  (lambda _
                    (substitute* "bulkvis/bulkvis.py"
                      (("import importlib" m)
                       (string-append m "
from bokeh.command.subcommand import Argument
from bokeh.util.dataclasses import entries\n"))
                      (("( *)_parser.add_argument" m indent)
                       (string-append
                        (string-join (list "if isinstance(opts, Argument):\n"
                                           "  opts = dict(entries(opts))\n")
                                     indent 'prefix)
                        m))))))))
      (propagated-inputs (list python-bokeh
                               python-dill
                               python-h5py
                               python-joblib
                               python-matplotlib
                               python-numpy
                               python-pandas
                               python-plotly
                               python-readpaf
                               python-scikit-learn
                               python-scikit-image
                               python-scipy
                               python-seaborn
                               python-tornado-6
                               python-tqdm
                               python-umap-learn))
      (native-inputs (list python-pytest))
      (home-page "https://github.com/LooseLab/bulkVis")
      (synopsis "Interactive visualization of bulk RNA-seq data")
      (description
       "This is a Python package for the interactive visualization of bulk
RNA-seq data.  It provides a range of plotting functions and interactive tools
to explore and analyze bulk RNA-seq data.")
      (license license:expat))))

(define-public python-cell2cell
  (package
    (name "python-cell2cell")
    (version "0.6.8")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/earmingol/cell2cell")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "1hwww0rcv8sc4k312n4d0jhbyix1jjqgv5djg25bw8127q5iym3s"))
              (modules '((guix build utils)))
              (snippet
               '(begin
                  ;; We remove the dependency on statannotations because it
                  ;; will not work with the current version of seaborn.  See
                  ;; https://github.com/trevismd/statannotations/issues/122
                  (substitute* "cell2cell/plotting/factor_plot.py"
                    (("from statannotations.Annotator import Annotator")
                     "")
                    (("if statistical_test is not None")
                     "if False"))
                  (substitute* "setup.py"
                    (("'statannotations',") "")
                    ;; We provide version 1.0.4, which should be fine.
                    (("'gseapy == 1.0.3'") "'gseapy'")
                    ;; Using matplotlib 3.5.2 leads to this bug:
                    ;; https://github.com/earmingol/cell2cell/issues/19 but we
                    ;; can't package a different minor version of matplotlib
                    ;; and limit its use to just this package.
                    (("matplotlib >= 3.2.0,<=3.5.1") ""))))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:tests? #f                  ;There are no tests
      #:phases
      '(modify-phases %standard-phases
         ;; Numba needs a writable dir to cache functions.
         (add-before 'build 'set-numba-cache-dir
           (lambda _ (setenv "NUMBA_CACHE_DIR" "/tmp"))))))
    (propagated-inputs
     (list python-gseapy
           python-kneed
           python-matplotlib
           python-networkx
           python-numpy
           python-openpyxl
           python-pandas
           python-scikit-learn
           python-scipy
           python-seaborn
           python-statsmodels
           python-scanpy
           python-seaborn
           python-tensorly
           python-tqdm
           python-umap-learn
           python-xlrd))
    (home-page "https://github.com/earmingol/cell2cell")
    (synopsis "Python library for cell communication analysis")
    (description
     "Cell2cell is a Python library for cell communication analysis.
This is a method to calculate, visualize and analyze communication between
cell types.  Cell2cell is suitable for single-cell RNA sequencing
(scRNA-seq) data.")
    (license license:bsd-3)))

(define-public python-cellbender
  (package
    (name "python-cellbender")
    (version "0.2.2")
    (source
     (origin
       (method git-fetch)
       (uri (git-reference
             (url "https://github.com/broadinstitute/CellBender")
             (commit (string-append "v" version))))
       (file-name (git-file-name name version))
       (sha256
        (base32
         "0h9d9pznffdbya631hkk7b7jwjrgx5saqssar1d42qbyvdji3hgy"))))
    (build-system pyproject-build-system)
    (arguments
     (list #:tests? #false)) ;there are none
    (propagated-inputs
     (list python-anndata
           python-matplotlib
           python-numpy
           python-pandas
           python-pyro-ppl
           python-scikit-learn
           python-scipy
           python-sphinx
           python-sphinx-argparse
           python-sphinx-autodoc-typehints
           python-sphinx-rtd-theme
           python-sphinxcontrib-programoutput
           python-tables))
    (home-page "https://cellbender.rtfd.io/")
    (synopsis "Eliminate technical artifacts from single-cell RNA-seq data")
    (description
     "CellBender is a software package for eliminating technical artifacts
from high-throughput single-cell RNA sequencing (scRNA-seq) data.")
    (license license:bsd-3)))

(define-public python-celltypist
  (package
    (name "python-celltypist")
    (version "1.6.2")
    (source
     (origin
       (method git-fetch)
       (uri (git-reference
             (url "https://github.com/Teichlab/celltypist")
             (commit version)))
       (file-name (git-file-name name version))
       (sha256
        (base32
         "0c42cx01zkxr0dk5f1d7q71qdi18v2smlc3wpvwyjlzplya7k2iy"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:tests? #false ;there are none
      #:phases
      '(modify-phases %standard-phases
         (add-before 'check 'set-home
           ;; The sanity check requires a HOME directory, because celltypist
           ;; wants to write settings.
           (lambda _ (setenv "HOME" "/tmp")))
         ;; Numba needs a writable dir to cache functions.
         (add-before 'build 'set-numba-cache-dir
           (lambda _ (setenv "NUMBA_CACHE_DIR" "/tmp"))))))
    (propagated-inputs
     (list python-click
           python-leidenalg
           python-numpy
           python-openpyxl
           python-pandas
           python-scanpy
           python-scikit-learn
           python-requests))
    (home-page "https://github.com/Teichlab/celltypist")
    (synopsis "Tool for semi-automatic cell type classification")
    (description
     "CellTypist is an automated cell type annotation tool for scRNA-seq
datasets on the basis of logistic regression classifiers optimised by the
stochastic gradient descent algorithm.  CellTypist allows for cell prediction
using either built-in (with a current focus on immune sub-populations) or
custom models, in order to assist in the accurate classification of different
cell types and subtypes.")
    (license license:expat)))

(define-public python-cmseq
  (package
    (name "python-cmseq")
    (version "1.0.4")
    (source (origin
              (method url-fetch)
              (uri (pypi-uri "CMSeq" version))
              (sha256
               (base32
                "0p6a99c299m5wi2z57dgqz52m1z3nfr8mv7kdnk2jvl2p9nql0wk"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:tests? #false ;there are no tests
      #:phases
      '(modify-phases %standard-phases
         (add-after 'unpack 'patch-samtools-reference
           (lambda* (#:key inputs #:allow-other-keys)
             (substitute* "cmseq/cmseq.py"
               (("'samtools'")
                (string-append "'" (search-input-file inputs "/bin/samtools") "'"))))))))
    (inputs (list samtools))
    (propagated-inputs
     (list python-bcbio-gff/biopython-1.73
           python-biopython-1.73
           python-numpy
           python-pandas
           python-pysam
           python-scipy))
    (home-page "https://github.com/SegataLab/cmseq/")
    (synopsis "Set of utilities on sequences and BAM files")
    (description
     "CMSeq is a set of commands to provide an interface to .bam files for coverage
and sequence consensus.")
    (license license:expat)))

(define-public python-cyvcf2
  (package
    (name "python-cyvcf2")
    (version "0.30.28")
    (source
     (origin
       (method git-fetch)
       (uri (git-reference
             (url "https://github.com/brentp/cyvcf2")
             (commit (string-append "v" version))))
       (file-name (git-file-name name version))
       (sha256
        (base32 "16yhfax509zyip8kkq2b0lflx5bdq5why7d785ayrqyzzq2rxqkk"))
       (modules '((guix build utils)))
       (snippet
        ;; Delete bundled library
        '(delete-file-recursively "htslib"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:phases
      #~(modify-phases %standard-phases
          (add-before 'check 'build-extensions
            (lambda _
              ;; Cython extensions have to be built before running the tests.
              (invoke "python" "setup.py" "build_ext" "--inplace")))
          (add-after 'unpack 'fix-setup
            (lambda* (#:key inputs #:allow-other-keys)
              (substitute* "setup.py"
                (("^htslib_include_dirs =.*")
                 (string-append "htslib_include_dirs = [\""
                                #$(this-package-input "htslib") "/include\"]\n"))
                (("lib_name = \"libhts.so\"")
                 (string-append "lib_name = \""
                                (search-input-file inputs "lib/libhts.so.3")
                                "\"\n")))))
          (add-before 'build 'use-system-htslib-package
            (lambda _
              (setenv "CYTHONIZE" "1")
              (setenv "CYVCF2_HTSLIB_MODE" "EXTERNAL"))))))
    (inputs (list curl htslib libdeflate openssl zlib))
    (native-inputs (list python-cython python-pytest))
    (propagated-inputs
     (list python-click
           python-coloredlogs
           python-numpy))
    (home-page "https://github.com/brentp/cyvcf2/")
    (synopsis "Fast vcf file parsing with Cython and htslib")
    (description "Cyvcf2 is a Cython wrapper around htslib built for fast
parsing of Variant Call Format (VCF) files.")
    (license license:expat)))

(define-public python-decoupler-py
  (package
    (name "python-decoupler-py")
    (version "1.6.0")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/saezlab/decoupler-py")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "1mqkp0i8k5hzhfnka4nc2f0phmrs0k404ynbl1lqfjzywx25y75h"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:test-flags
      '(list "-k"
             ;; These tests require internet access
             (string-append "not test_get_resource"
                            " and not test_show_resources"
                            " and not test_get_dorothea"
                            " and not test_get_progeny"
                            " and not test_get_ksn_omnipath"
                            ;; This attempts to download things for Omnipath
                            " and not test_get_collectri"))
      #:phases
      '(modify-phases %standard-phases
         (add-before 'check 'set-home
           ;; Some tests require a home directory to be set.
           (lambda _ (setenv "HOME" "/tmp")))
         ;; Numba needs a writable dir to cache functions.
         (add-before 'build 'set-numba-cache-dir
           (lambda _ (setenv "NUMBA_CACHE_DIR" "/tmp"))))))
    (propagated-inputs (list python-adjusttext
                             python-anndata
                             python-ipython
                             python-matplotlib
                             python-nbsphinx
                             python-numba
                             python-numpy
                             python-numpydoc
                             python-omnipath
                             python-scanpy
                             python-scikit-learn
                             python-scipy
                             python-skranger
                             python-tqdm
                             python-typing-extensions))
    (native-inputs (list python-pytest))
    (home-page "https://github.com/saezlab/decoupler-py")
    (synopsis
     "Framework for modeling, analyzing and interpreting single-cell RNA-seq data")
    (description
     "This package provides different statistical methods to extract
biological activities from omics data within a unified framework.")
    (license license:gpl3+)))

(define-public python-demuxem
  (package
    (name "python-demuxem")
    (version "0.1.7")
    (source (origin
              (method url-fetch)
              (uri (pypi-uri "demuxEM" version))
              (sha256
               (base32
                "1bhyxqjk44bmyd26m1smapf68wyf7252kk65i27k50dd3kswgnd6"))))
    (build-system pyproject-build-system)
    ;; There are no tests.
    (arguments (list #:tests? #false))
    (propagated-inputs
     (list python-docopt
           python-importlib-metadata
           python-numpy
           python-pandas
           python-pegasusio
           python-scikit-learn
           python-scipy
           python-seaborn))
    (native-inputs (list python-cython python-setuptools-scm))
    (home-page "https://github.com/lilab-bcb/demuxEM")
    (synopsis "Analyze cell-hashing/nucleus-hashing data")
    (description
     "This is a Python module for analyzing cell-hashing/nucleus-hashing data.
It is the demultiplexing module of Pegasus, which is used by Cumulus in the
demultiplexing step.")
    (license license:bsd-3)))

(define-public python-doubletdetection
  (package
    (name "python-doubletdetection")
    (version "4.2")
    (source (origin
              (method url-fetch)
              (uri (pypi-uri "doubletdetection" version))
              (sha256
               (base32
                "0v0a19014h4p6x8pyz1s78xn3q5w5166cysvg574z6vw79a3s9vp"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:tests? #false ;there are none
      #:phases
      #~(modify-phases %standard-phases
          (add-after 'unpack 'use-poetry-core
            (lambda _
              ;; Patch to use the core poetry API.
              (substitute* "pyproject.toml"
                (("poetry.masonry.api")
                 "poetry.core.masonry.api")))))))
    (propagated-inputs
     (list python-anndata
           python-ipywidgets
           python-leidenalg
           python-vtraag-louvain
           python-matplotlib
           python-numpy
           python-pandas
           python-phenograph
           python-scanpy
           python-scipy
           python-tqdm))
    (native-inputs
     (list python-black
           python-flake8
           python-poetry-core
           python-pytest))
    (home-page "https://github.com/JonathanShor/DoubletDetection")
    (synopsis
     "This is a package to detect doublets in single-cell RNA-seq count matrices")
    (description
     "This package provides a method to detect and enable removal of doublets
from single-cell RNA-sequencing.")
    (license license:expat)))

(define-public python-hclust2
  (package
    (name "python-hclust2")
    (version "1.0.0")
    (source (origin
              (method url-fetch)
              (uri (pypi-uri "hclust2" version))
              (sha256
               (base32
                "0v89n2g42d7jhgfs8glf06apgxx6aswp3mfisgnhm518cv8z2rwn"))))
    (build-system pyproject-build-system)
    (arguments (list #:tests? #f))      ;there are no tests
    (propagated-inputs
     (list python-matplotlib
           python-numpy
           python-pandas
           python-scipy))
    (home-page "https://github.com/SegataLab/hclust2/")
    (synopsis "Plotting heat-maps for publications")
    (description
     "Hclust2 is a handy tool for plotting heat-maps with several useful options
to produce high quality figures that can be used in publications.")
    (license license:expat)))

(define-public python-htsget
  (package
   (name "python-htsget")
   (version "0.2.6")
   (source (origin
            (method url-fetch)
            (uri (pypi-uri "htsget" version))
            (sha256
             (base32
              "111q4pzkav26aa3hkgh948wqlyrq7dq6sjml9z63n3blw8s6b0c4"))))
   (build-system pyproject-build-system)
   (native-inputs
    (list python-setuptools-scm))
   (propagated-inputs
    (list python-humanize python-requests python-six))
   (home-page "https://pypi.org/project/htsget/")
   (synopsis "Python API and command line interface for the GA4GH htsget API")
   (description "This package is a client implementation of the GA4GH htsget
protocol.  It provides a simple and reliable way to retrieve genomic data from
servers supporting the protocol.")
   (license license:asl2.0)))

(define-public python-liana-py
  (package
    (name "python-liana-py")
    (version "1.1.0")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/saezlab/liana-py")
                    (commit version)))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "0f5al0v55haja91q9gd409v7q78mmp1wv9znsplsbjp6lfspjfnw"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:test-flags
      '(list "-k"
             ;; These tests require internet access.
             (string-append "not test_generate_lr_resource"
                            " and not test_get_metalinks"
                            " and not test_get_metalinks_values"
                            " and not test_describe_metalinks"
                            " and not test_generate_nondefault_lr_resource"
                            ;; Minor accuracy difference
                            " and not test_bivar_morans_perms"
                            ;; XXX unclear failure: 'coo_matrix' object is not
                            ;; subscriptable
                            " and not test_bivar_product"
                            )
             ;; These need the optional squidpy, which we don't have yet.
             "--ignore=liana/tests/test_misty.py"
             ;; These need the optional corneto.
             "--ignore=liana/tests/test_causalnet.py")
      #:phases
      '(modify-phases %standard-phases
         (add-after 'unpack 'relax-requirements
           (lambda _
             ;; Don't fail the sanity check when these optional inputs aren't
             ;; available.
             (substitute* "pyproject.toml"
               (("^pre-commit =.*") ""))))
         ;; Numba needs a writable directory to cache functions.
         (add-before 'build 'set-numba-cache-dir
           (lambda _ (setenv "NUMBA_CACHE_DIR" "/tmp"))))))
    (propagated-inputs (list python-anndata
                             python-cell2cell
                             python-decoupler-py
                             python-hypothesis
                             python-ipykernel
                             python-ipython
                             python-mudata
                             python-nbconvert
                             python-nbsphinx
                             python-numpy
                             python-numpydoc
                             python-omnipath
                             python-pandas
                             python-plotnine
                             python-pypandoc
                             python-scipy
                             python-requests
                             python-scanpy
                             python-statsmodels
                             python-tqdm
                             tzdata))
    (native-inputs
     (list python-black
           python-poetry-core
           python-pytest
           python-pytest-cov))
    (home-page "https://github.com/saezlab/liana-py")
    (synopsis "LIANA is a ligand-receptor analysis framework")
    (description "This is a Ligand-Receptor inference framework.  The
framework enables the use of any LR method with any resources.")
    (license license:gpl3+)))

(define-public python-logomaker
  (package
    (name "python-logomaker")
    (version "0.8")
    (source (origin
              (method url-fetch)
              (uri (pypi-uri "logomaker" version))
              (sha256
               (base32
                "0v9z3ml1s7imk28hqyhrqjqg3sq0j29lx975d36n2ybdgld51iyq"))))
    (build-system pyproject-build-system)
    (propagated-inputs
     (list python-matplotlib python-numpy python-pandas))
    (home-page "https://logomaker.readthedocs.io")
    (synopsis "Package for making Sequence Logos")
    (description "Logomaker is a Python package for generating
publication-quality sequence logos.  Logomaker can generate both standard and
highly customized logos illustrating the properties of DNA, RNA, or protein
sequences.  Logos are rendered as vector graphics embedded within native
matplotlib Axes objects, making them easy to style and incorporate into
multi-panel figures.")
    (license license:expat)))

(define-public python-magic-impute
  (package
    (name "python-magic-impute")
    (version "1.2.1")
    (source
     (origin
       (method git-fetch)
       (uri (git-reference
             (url "https://github.com/KrishnaswamyLab/MAGIC")
             (commit (string-append "v" version))))
       (file-name (git-file-name name version))
       (sha256
        (base32
         "1yjs16vg87lcg9g16bnblg1v9sk73j6dm229lkcz0bfjlzxjhv8w"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:tests? #false ;there are none
      #:phases
      '(modify-phases %standard-phases
         (add-after 'unpack 'chdir
           (lambda _ (chdir "python"))))))
    (propagated-inputs
     (list python-future
           python-graphtools
           python-matplotlib
           python-numpy
           python-pandas
           python-scikit-learn
           python-scipy
           python-tasklogger))
    (home-page "https://github.com/KrishnaswamyLab/MAGIC")
    (synopsis "Markov affinity-based graph imputation of cells")
    (description "MAGIC is an interactive tool to impute missing values in
single-cell sequencing data and to restore the structure of the data.  It also
provides data pre-processing functionality such as dimensionality reduction
and gene expression visualization.")
    (license license:gpl2+)))

(define-public python-metacells
  (package
    (name "python-metacells")
    (version "0.9.4")
    (source
     (origin
       (method url-fetch)
       (uri (pypi-uri "metacells" version))
       (sha256
        (base32 "02f63nxz6b60vl6s4n9vapaysnq1w5f3x7c179rh2rr7j2k5cf1y"))))
    #;
    (properties '((tunable? . #t)))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:phases
      #~(modify-phases %standard-phases
          ;; The package "python-igraph" has been deprecated in favor of
          ;; just "igraph".
          (add-after 'unpack 'rename-igraph
            (lambda _
              (substitute* "requirements.txt"
                (("python-igraph") "igraph"))))
          (add-after 'unpack 'do-not-tune
            (lambda _
              ;; Without this they pass -march=native to the compiler.
              (setenv "WHEEL" "1")))
          ;; Numba needs a writable dir to cache functions.
          (add-before 'check 'set-numba-cache-dir
            (lambda _ (setenv "NUMBA_CACHE_DIR" "/tmp")))
          (add-before 'build 'build-extensions
            (lambda _
              (invoke "python" "setup.py" "build_ext" "--inplace"))))))
    (propagated-inputs (list python-anndata
                             python-cvxpy
                             python-fastcluster
                             python-importlib-metadata
                             python-numpy
                             python-pandas
                             python-psutil
                             python-igraph
                             python-pyyaml
                             python-scanpy
                             python-scipy
                             python-threadpoolctl
                             python-umap-learn))
    (native-inputs (list python-black
                         python-bumpversion
                         python-flake8
                         python-isort
                         python-mypy
                         python-mypy-extensions
                         python-pylint
                         python-pytest
                         python-pytest-cov
                         python-sphinx
                         python-sphinx-rtd-theme
                         python-tox
                         python-twine))
    (home-page "https://github.com/tanaylab/metacells.git")
    (synopsis "Single-cell RNA Sequencing Analysis")
    (description "The metacells package implements the improved metacell
algorithm for single-cell RNA sequencing (scRNA-seq) data analysis within the
scipy framework, and projection algorithm based on it.  The original metacell
algorithm was implemented in R.  The Python package contains various
algorithmic improvements and is scalable for larger data sets (millions of
cells).")
    (license license:expat)))

(define-public python-parabam
  (package
    (name "python-parabam")
    (version "3.0.1")
    (source
     (origin
       (method url-fetch)
       (uri (pypi-uri "parabam" version))
       (sha256
        (base32 "1cy9q3gzdawi1kilycpd7waymjmrwsg8czwycfp13g301ir9xyp3"))
       (modules '((guix build utils)))
       (snippet
        '(substitute* "setup.py"
           (("'argparse',") "")))))
    (build-system pyproject-build-system)
    (propagated-inputs (list python-numpy python-pysam))
    (home-page "https://github.com/cancerit/parabam")
    (synopsis "Parallel BAM File Analysis")
    (description "Parabam is a tool for processing sequencing files in
parallel.  It uses Python's native multiprocessing framework to apply a user
defined rule on an input file.")
    (license license:gpl3)))

(define-public python-pdbfixer
  (package
    (name "python-pdbfixer")
    (version "1.9")
    (source
     (origin
       (method git-fetch)
       (uri (git-reference
             (url "https://github.com/openmm/pdbfixer")
             (commit version)))
       (file-name (git-file-name name version))
       (sha256
        (base32 "1zjhb19q5dclkwvzh8n29p31n1vzkhlmmzwqllimi89jsis1cx35"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:test-flags
      '(list "-k"
             ;; These tests fail because they require internet access.
             (string-append "not test_build_and_simulate.py"
                            " and not test_cli.py"
                            " and not test_mutate.py"))))
    (propagated-inputs (list openmm python-numpy))
    (native-inputs (list python-pytest))
    (home-page "https://github.com/openmm/pdbfixer")
    (synopsis "Application for fixing problems in Protein Data Bank")
    (description
     "PDBFixer is designed to rectify issues in Protein Data Bank files.
Its intuitive interface simplifies the process of resolving problems
encountered in PDB files prior to simulation tasks.")
    (license license:expat)))

(define-public python-peaks2utr
  (package
    (name "python-peaks2utr")
    (version "1.2.0")
    (source (origin
              (method url-fetch)
              (uri (pypi-uri "peaks2utr" version))
              (sha256
               (base32
                "1idp9cgwqxvryf4qqrc1xjsamfqn3jmr56kmjp2h1ysmckwmhw4v"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:test-flags
      ;; These two tests fail because file names are not URLs.
      '(list "-k" "not test_annotation.py")))
    (propagated-inputs
     (list python-asgiref
           python-gffutils
           python-importlib-resources
           macs
           python-numpy
           python-psutil
           python-pybedtools
           python-pysam
           python-requests
           python-tqdm
           python-typing-extensions
           python-zipp))
    (home-page "https://github.com/haessar/peaks2utr")
    (synopsis "Python CLI for annotating three prime UTR")
    (description
     "This package provides a robust, parallelized Python CLI for annotating
three prime UTR.")
    (license license:gpl3+)))

(define-public python-pegasusio
  (package
    (name "python-pegasusio")
    (version "0.7.1")
    (source (origin
              (method url-fetch)
              (uri (pypi-uri "pegasusio" version))
              (sha256
               (base32
                "0gqygspdy398vjymdy6756jmk99s7fhwav9rivdx59kpqjcdxaz9"))))
    (build-system pyproject-build-system)
    ;; There are no tests.
    (arguments (list #:tests? #false))
    (propagated-inputs
     (list python-anndata
           python-docopt
           python-h5py
           python-importlib-metadata
           python-loompy
           python-natsort
           python-numpy
           python-pandas
           python-pillow
           python-scipy
           python-zarr))
    (native-inputs (list python-cython python-setuptools-scm))
    (home-page "https://github.com/lilab-bcb/pegasusio")
    (synopsis "Read or write single-cell genomics data")
    (description
     "Pegasusio is a Python package for reading or writing single-cell
genomics data.")
    (license license:bsd-3)))

(define-public python-phenograph
  (package
    (name "python-phenograph")
    (version "1.5.7")
    (source (origin
              (method url-fetch)
              (uri (pypi-uri "PhenoGraph" version))
              (sha256
               (base32
                "0nji449mzwgp1f87iknl5fmnjdkrhkfkapxvafxdw01s0jg8zcj6"))
              (modules '((guix build utils)))
              ;; Remove bundled binaries
              (snippet
               '(delete-file-recursively "phenograph/louvain"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:phases
      #~(modify-phases %standard-phases
          ;; This test can never succeed because Q_leiden is never set to
          ;; anything other than None.
          (add-after 'unpack 'disable-leiden-test
            (lambda _
              (substitute* "tests/test_cluster.py"
                (("def test_run_leiden") "def _test_run_leiden"))))
          (add-after 'unpack 'patch-louvain
            (lambda* (#:key inputs #:allow-other-keys)
              (substitute* "phenograph/core.py"
                (("lpath = os.path.*")
                 (string-append "lpath = \""
                                (dirname (search-input-file inputs "/bin/community"))
                                "\"\n"))
                (("linux-(community|hierarchy|convert)" _ thing) thing)
                ;; Do not write binaries, because the unmodified "convert"
                ;; from louvain only knows how to process plain text files.
                (("with open\\(filename \\+ \".bin\", \"w\\+b\"\\) as f:")
                 "with open(filename + \".bin\", \"w+\") as f:")
                (("f.writelines\\(\\[e for t in zip\\(ij, s\\) for e in t\\]\\)")
                 "for [src, dest], weight in zip(ij, s): \
f.write(src.astype(\"str\") + ' ' + \
dest.astype(\"str\") + ' ' + \
weight.astype(\"str\") + '\\n')")))))))
    (inputs
     (list louvain))
    (propagated-inputs
     (list python-leidenalg
           python-numpy
           python-psutil
           python-scikit-learn
           python-scipy))
    (native-inputs
     (list python-pytest))
    (home-page "https://github.com/dpeerlab/PhenoGraph.git")
    (synopsis "Graph-based clustering for high-dimensional single-cell data")
    (description
     "PhenoGraph is a clustering method designed for high-dimensional
single-cell data.  It works by creating a graph representing phenotypic
similarities between cells and then identifying communities in this graph.")
    (license license:expat)))

(define-public python-phylophlan
  (package
    (name "python-phylophlan")
    (version "3.0.3")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/biobakery/phylophlan")
                    (commit version)))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "1wz70xzxqx2sf5flmf45m15jq027dqijfaj1r51pl50w5x6dkawx"))))
    (build-system pyproject-build-system)
    (arguments (list #:tests? #f))      ;there are no tests
    (propagated-inputs
     (list python-biopython
           python-dendropy
           python-matplotlib
           python-numpy
           python-pandas
           python-seaborn))
    (home-page "https://github.com/biobakery/phylophlan")
    (synopsis
     "Phylogenetic analysis of microbial isolates and genomes from metagenomes")
    (description
     "This package is an integrated pipeline for large-scale phylogenetic
profiling of genomes and metagenomes.  PhyloPhlAn is an accurate, rapid, and
easy-to-use method for large-scale microbial genome characterization and
phylogenetic analysis at multiple levels of resolution.  This software package
can assign both genomes and @acronym{MAGs, metagenome-assembled genomes} to
@acronym{SGBs, species-level genome bins}.  PhyloPhlAn can reconstruct
strain-level phylogenies using clade- specific maximally informative
phylogenetic markers, and can also scale to very large phylogenies comprising
>17,000 microbial species.")
    (license license:expat)))

(define-public python-pybedtools
  (package
    (name "python-pybedtools")
    (version "0.9.0")
    (source (origin
              (method url-fetch)
              (uri (pypi-uri "pybedtools" version))
              (sha256
               (base32
                "18rhzk08d3rpxhi5xh6pqg64x6v5q3daw6y3v54k85v4swncjrwj"))))
    (build-system pyproject-build-system)
    (arguments
     `(#:modules ((srfi srfi-26)
                  (guix build utils)
                  (guix build python-build-system)
                  (guix build pyproject-build-system))
       ;; See https://github.com/daler/pybedtools/issues/192
       #:phases
       (modify-phases %standard-phases
         (add-after 'unpack 'disable-broken-tests
           (lambda _
             (substitute* "pybedtools/test/test_helpers.py"
               ;; Requires internet access.
               (("def test_chromsizes")
                "def _do_not_test_chromsizes")
               ;; Broken as a result of the workaround used in the check phase
               ;; (see: https://github.com/daler/pybedtools/issues/192).
               (("def test_getting_example_beds")
                "def _do_not_test_getting_example_beds"))
             ;; This issue still occurs on python2
             (substitute* "pybedtools/test/test_issues.py"
               (("def test_issue_303")
                "def _test_issue_303"))))
         ;; Force the Cythonization of C++ files to guard against compilation
         ;; problems.
         (add-after 'unpack 'remove-cython-generated-files
           (lambda _
             (let ((cython-sources (map (cut string-drop-right <> 4)
                                        (find-files "." "\\.pyx$")))
                   (c/c++-files (find-files "." "\\.(c|cpp|cxx)$")))
               (define (strip-extension filename)
                 (string-take filename (string-index-right filename #\.)))
               (define (cythonized? c/c++-file)
                 (member (strip-extension c/c++-file) cython-sources))
               (for-each delete-file (filter cythonized? c/c++-files)))))
         (add-after 'remove-cython-generated-files 'generate-cython-extensions
           (lambda _
             (invoke "python" "setup.py" "cythonize")))
         (replace 'check
           (lambda _
             ;; The tests need to be run from elsewhere...
             (mkdir-p "/tmp/test")
             (copy-recursively "pybedtools/test" "/tmp/test")
             (with-directory-excursion "/tmp/test"
               (invoke "pytest" "-v" "--doctest-modules")))))))
    (propagated-inputs
     (list bedtools samtools python-matplotlib python-pysam
           python-pyyaml))
    (native-inputs
     (list python-numpy
           python-pandas
           python-cython
           kentutils ; for bedGraphToBigWig
           python-six
           ;; For the test suite.
           python-pytest
           python-psutil))
    (home-page "https://pythonhosted.org/pybedtools/")
    (synopsis "Python wrapper for BEDtools programs")
    (description
     "This package is a Python wrapper for Aaron Quinlan's BEDtools programs,
which are widely used for genomic interval manipulation or \"genome algebra\".
pybedtools extends BEDTools by offering feature-level manipulations from with
Python.")
    ;; pypi lists GPLv2 in the PKG-INFO and website, but was relicensed in
    ;; version 0.9.0 and the LICENSE.txt is consistent with the source code.
    ;;
    ;; pybedtools/include/gzstream.cpp and pybedtools/include/gzstream.h are
    ;; licensed lgpl2.1+
    (license (list license:expat license:lgpl2.1+))))

(define-public python-ega-download-client
  (package
    (name "python-ega-download-client")
    (version "5.1.0")
    (source
     (origin
       (method git-fetch)
       (uri (git-reference
             (url "https://github.com/EGA-archive/ega-download-client")
             (commit (string-append "v" version))))
       (file-name (git-file-name name version))
       (sha256
        (base32 "0k9rfq2yyvfxs5sq9lsm8krp9ddx4s18hv85ikf3b37zv24kpwjk"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:test-flags
      '(list
        ;; These tests fail because they require internet access.
        "--ignore=tests/functional/test_download.py"
        "--ignore=tests/functional/test_htsget.py"
        "-k"
        (string-append "not test_error_5xx"
                       " and not test_error_too_many_requests"
                       ;; Something's wrong here.  On some powerful machines
                       ;; (but not on my laptop) these fail, and tests like
                       ;; test_file_is_saved_into_an_existing_directory_which_was_specified_by_the_user
                       ;; take a *very* long time to complete.
                       ;;
                       ;; It looks like "dataset_in_fire.download" takes an
                       ;; unusually long time on those machines.  We disable
                       ;; tests that fail under these conditions.
                       " and not test_download_file"
                       " and not test_output_file_is_removed_if_md5_was_invalid"
                       " and not test_post_stats_if_download_succeeded"))
      #:phases
      '(modify-phases %standard-phases
         (add-after 'unpack 'relax-requirements
           (lambda _
             (substitute* "setup.py"
               (("==") ">=")))))))
    (propagated-inputs (list python-htsget python-psutil python-requests
                             python-tqdm python-urllib3))
    (native-inputs (list python-coverage python-pytest python-pyfakefs
                         python-responses python-mock))
    (home-page "https://github.com/EGA-archive/ega-download-client")
    (synopsis "EGA download client")
    (description "PyEGA3 is a tool for viewing and downloading files from
authorized EGA datasets.  It uses the EGA data API and has several key
features:

@itemize
@item Files are transferred over secure https connections and received
  unencrypted, so no need for decryption after download.
@item Downloads resume from where they left off in the event that the
  connection is interrupted.
@item Supports file segmenting and parallelized download of segments,
  improving overall performance.
@item After download completes, file integrity is verified using checksums.
@item Implements the GA4GH-compliant htsget protocol for download of genomic
  ranges for data files with accompanying index files.
@end itemize\n")
    (license license:asl2.0)))

(define-public python-scdamandtools
  (package
    (name "python-scdamandtools")
    (version "1.0")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/KindLab/scDamAndTools")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "1mblw6cn5jqik6ky8cv7ry99z6jm1i4r71pzdfl398vrwbda65gd"))))
    (build-system pyproject-build-system)
    (arguments
     (list #:tests? #f))                ;there are none
    (propagated-inputs (list python-h5py
                             python-numpy
                             python-sortedcontainers
                             python-pandas
                             python-pysam
                             python-tqdm))
    (native-inputs (list python-cython python-pytest))
    (home-page "https://github.com/KindLab/scDamAndTools")
    (synopsis "Functions for processing raw scDam&T-seq data")
    (description
     "This is a set of functions for processing raw scDam&T-seq data.
scDam&T-seq is a method to simultaneously measure protein-DNA interactions and
transcription from single cells (Rooijers et al., 2019).  It combines a
DamID-based method to measure protein-DNA interactions and an adaptation of
CEL-Seq to measure transcription.  The starting point of the workflow is raw
sequencing data and the end result are tables of UMI-unique DamID and CEL-Seq
counts.")
    (license license:expat)))

(define-public python-snaptools
  (package
    (name "python-snaptools")
    (version "1.4.8")
    (source
     (origin
       (method url-fetch)
       (uri (pypi-uri "snaptools" version))
       (sha256
        (base32
         "1s5373g5jjbshh3q39zy7dlxr7nda6ksxq9d1gw46h82c4fsmfbn"))))
    (build-system pyproject-build-system)
    (arguments (list #:tests? #false)) ;there are none
    (propagated-inputs
     (list python-future
           python-h5py
           python-louvain
           python-numpy
           python-pybedtools
           python-pysam))
    (home-page "https://github.com/r3fang/SnapTools")
    (synopsis "Tools for processing snap files" )
    (description
     "@code{SnapTools} can operate on snap files the following types of
operations:

@itemize
@item index the reference genome before alignment;
@item align reads to the corresponding reference genome;
@item pre-process by convert pair-end reads into fragments, checking the
  mapping quality score, alingment and filtration;
@item create the cell-by-bin matrix.
@end itemize")
    (license license:asl2.0)))

(define-public python-telomerecat
  (package
    (name "python-telomerecat")
    (version "4.0.2")
    (source
     (origin
       (method url-fetch)
       (uri (pypi-uri "telomerecat" version))
       (sha256
        (base32 "16mfdqmp0j6g3h26h59334w9lqb4qihqrlzwvgznj0fiqs1rkxn2"))))
    (build-system pyproject-build-system)
    (arguments (list #:tests? #false)) ;there are none
    (propagated-inputs (list python-click python-numpy python-pandas
                             python-parabam python-pysam))
    (home-page "https://github.com/cancerit/telomerecat")
    (synopsis "Telomere computational analysis tool")
    (description "Telomerecat is a tool for estimating the average telomere
length (TL) for a paired end, whole genome sequencing (WGS) sample.

Telomerecat is adaptable, accurate and fast.  The algorithm accounts for
sequencing amplification artifacts, anneouploidy (common in cancer samples)
and noise generated by WGS.  For a high coverage WGS BAM file of around 100GB
telomerecat can produce an estimate in ~1 hour.")
    (license license:gpl3)))

(define-public python-bioframe
  (package
    (name "python-bioframe")
    (version "0.6.4")
    (source
     (origin
       (method git-fetch)
       ;; pypi version does not contain tests and requirements.txt
       (uri (git-reference
             (url "https://github.com/open2c/bioframe")
             (commit (string-append "v" version))))
       (file-name (git-file-name name version))
       (sha256
        (base32
         "1m99hgxw4cb2x4qszb2lhp1isz57sdkqbmcgisnbqxqxkv4gba7v"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:test-flags
      '(list "-k" (string-append "not test_fetch_chromsizes"
                                 " and not test_fetch_chromsizes_local_vs_ucsc"
                                 " and not test_fetch_centromeres"))
      #:phases
      '(modify-phases %standard-phases
         (add-before 'check 'pre-check
           (lambda _ (setenv "MPLCONFIGDIR" "/tmp"))))))
    (native-inputs
     (list python-biopython
           python-hatchling
           python-pysam
           python-pytest
           python-wheel))
    (propagated-inputs
     (list python-matplotlib
           python-numpy
           python-pandas
           python-pyyaml
           python-requests))
    (home-page "https://github.com/open2c/bioframe")
    (synopsis "Pandas utilities for tab-delimited and other genomic files")
    (description
     "This package is a library to enable flexible and scalable operations on
genomic interval dataframes in Python.  Bioframe enables access to a rich set
of dataframe operations.  Working in Python enables rapid visualization and
iteration of genomic analyses.  The philosophy underlying bioframe is to
enable flexible operations.  Instead of creating a function for every possible
use-case, we encourage users to compose functions to achieve their goals.")
    (license license:expat)))

(define-public python-biom-format
  (package
    (name "python-biom-format")
    (version "2.1.12")
    (source
     (origin
       (method git-fetch)
       ;; Use GitHub as source because PyPI distribution does not contain
       ;; test data: https://github.com/biocore/biom-format/issues/693
       (uri (git-reference
             (url "https://github.com/biocore/biom-format")
             (commit version)))
       (file-name (git-file-name name version))
       (sha256
        (base32
         "06x2d8fv80jp86kd66fm3ragmxrpa2j0lzsbm337ziqjnpsdwc0f"))
       (modules '((guix build utils)))
       ;; Delete generated C files.
       (snippet
        '(for-each delete-file (find-files "." "\\.c")))))
    (build-system python-build-system)
    (arguments
     (list
      #:phases
      '(modify-phases %standard-phases
         (add-after 'unpack 'use-cython
           (lambda _ (setenv "USE_CYTHON" "1")))
         (add-after 'unpack 'pandas-compatibility
           (lambda _
             (substitute* "biom/tests/test_table.py"
               (("import pandas.util.testing")
                "import pandas.testing"))))
         (add-after 'unpack 'disable-broken-tests
           (lambda _
             (substitute* "biom/tests/test_util.py"
               (("^(.+)def test_biom_open_hdf5_no_h5py" m indent)
                (string-append indent
                               "@npt.dec.skipif(True, msg='Guix')\n"
                               m)))
             (substitute* "biom/tests/test_table.py"
               (("^(.+)def test_from_hdf5_issue_731" m indent)
                (string-append indent
                               "@npt.dec.skipif(True, msg='Guix')\n"
                               m))
               ;; Unclear why this one fails.  There is no backtrace.
               (("^(.+)def test_to_dataframe_is_sparse" m indent)
                (string-append indent
                               "@npt.dec.skipif(True, msg='Guix')\n"
                               m))
               ;; These need skbio, but that neeeds biom-format.
               (("^(.+)def test_align_tree_intersect_obs" m indent)
                (string-append indent
                               "@npt.dec.skipif(True, msg='Guix')\n"
                               m))
               (("^(.+)def test_align_tree_intersect_tips" m indent)
                (string-append indent
                               "@npt.dec.skipif(True, msg='Guix')\n"
                               m))
               (("^(.+)def test_align_tree_sample" m indent)
                (string-append indent
                               "@npt.dec.skipif(True, msg='Guix')\n"
                               m))))))))
    (propagated-inputs
     (list python-anndata
           python-click
           python-flake8
           python-future
           python-h5py
           python-numpy
           python-pandas
           ;;python-scikit-bio ;mutually recursive dependency
           python-scipy))
    (native-inputs
     (list python-cython python-pytest python-pytest-cov python-nose))
    (home-page "https://www.biom-format.org")
    (synopsis "Biological Observation Matrix (BIOM) format utilities")
    (description
     "The BIOM file format is designed to be a general-use format for
representing counts of observations e.g. operational taxonomic units, KEGG
orthology groups or lipid types, in one or more biological samples
e.g. microbiome samples, genomes, metagenomes.")
    (license license:bsd-3)))

(define-public python-pairtools
  (package
    (name "python-pairtools")
    (version "1.0.2")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/open2c/pairtools")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "0xn4cg4jq3rfn42h8rfwg0k6xkvihjrv32gwldb9y0jp05lzw9cs"))))
    (build-system python-build-system)
    (arguments
     `(#:phases
       (modify-phases %standard-phases
         (add-after 'unpack 'fix-references
           (lambda _
             (substitute* '("pairtools/cli/header.py"
                            "pairtools/cli/merge.py"
                            "pairtools/cli/sort.py")
               (("/bin/bash") (which "bash")))))
         (replace 'check
           (lambda* (#:key tests? #:allow-other-keys)
             (when tests?
               (with-directory-excursion "/tmp"
                 (invoke "pytest" "-v"))))))))
    (native-inputs
     (list python-cython python-pytest))
    (propagated-inputs
     (list htslib ; for bgzip, looked up in PATH
           samtools ; looked up in PATH
           lz4 ; for lz4c
           python-bioframe
           python-click
           python-numpy
           python-pandas
           python-pysam
           python-pyyaml
           python-scipy))
    (home-page "https://github.com/open2c/pairtools")
    (synopsis "Process mapped Hi-C data")
    (description "Pairtools is a simple and fast command-line framework to
process sequencing data from a Hi-C experiment.  Process pair-end sequence
alignments and perform the following operations:

@itemize
@item detect ligation junctions (a.k.a. Hi-C pairs) in aligned paired-end
  sequences of Hi-C DNA molecules
@item sort @code{.pairs} files for downstream analyses
@item detect, tag and remove PCR/optical duplicates
@item generate extensive statistics of Hi-C datasets
@item select Hi-C pairs given flexibly defined criteria
@item restore @code{.sam} alignments from Hi-C pairs.
@end itemize
")
    (license license:expat)))

(define-public python-readpaf
  (package
    (name "python-readpaf")
    (version "0.0.10")
    (source (origin
              (method url-fetch)
              (uri (pypi-uri "readpaf" version))
              (sha256
               (base32
                "15m6ffks4zwpp1ycwk6n02py6mw2yh7qr0vhpc178b91gldr97ia"))))
    (build-system pyproject-build-system)
    (arguments (list #:tests? #false))  ;there are none
    (propagated-inputs (list python-pandas))
    (home-page "https://github.com/alexomics/read-paf")
    (synopsis "Minimap2 PAF file reader")
    (description
     "This is a fast parser for minimap2 PAF (Pairwise mApping Format)
files.")
    (license license:expat)))

(define-public bioperl-minimal
  (package
    (name "bioperl-minimal")
    (version "1.7.0")
    (source
     (origin
       (method git-fetch)
       (uri (git-reference
             (url "https://github.com/bioperl/bioperl-live")
             (commit (string-append "release-"
                                    (string-map (lambda (c)
                                                  (if (char=? c #\.)
                                                      #\- c)) version)))))
       (file-name (git-file-name name version))
       (sha256
        (base32
         "0wl8yvzcls59pwwk6m8ahy87pwg6nnibzy5cldbvmcwg2x2w7783"))))
    (build-system perl-build-system)
    (arguments
     (let ((transitive-inputs
            (map (compose package-name cadr)
                 (delete-duplicates
                  (concatenate
                   (map (compose package-transitive-target-inputs cadr)
                        (package-inputs this-package)))))))
       `(#:phases
         (modify-phases %standard-phases
           (add-after 'install 'wrap-programs
             (lambda* (#:key outputs #:allow-other-keys)
               ;; Make sure all executables in "bin" find the required Perl
               ;; modules at runtime.  As the PERL5LIB variable contains also
               ;; the paths of native inputs, we pick the transitive target
               ;; inputs from %build-inputs.
               (let* ((out  (assoc-ref outputs "out"))
                      (bin  (string-append out "/bin/"))
                      (path (string-join
                             (cons (string-append out "/lib/perl5/site_perl")
                                   (map (lambda (name)
                                          (assoc-ref %build-inputs name))
                                        ',transitive-inputs))
                             ":")))
                 (for-each (lambda (file)
                             (wrap-program file
                               `("PERL5LIB" ":" prefix (,path))))
                           (find-files bin "\\.pl$")))))))))
    (inputs
     (list bash-minimal perl-module-build perl-data-stag perl-libwww perl-uri))
    (native-inputs
     (list perl-test-most))
    (home-page "https://metacpan.org/release/BioPerl")
    (synopsis "Bioinformatics toolkit")
    (description
     "BioPerl is the product of a community effort to produce Perl code which
is useful in biology.  Examples include Sequence objects, Alignment objects
and database searching objects.  These objects not only do what they are
advertised to do in the documentation, but they also interact - Alignment
objects are made from the Sequence objects, Sequence objects have access to
Annotation and SeqFeature objects and databases, Blast objects can be
converted to Alignment objects, and so on.  This means that the objects
provide a coordinated and extensible framework to do computational biology.")
    (license license:perl-license)))

(define-public perl-bio-db-hts
  (package
    (name "perl-bio-db-hts")
    (version "3.01")
    (source
     (origin
       (method url-fetch)
       (uri (string-append "mirror://cpan/authors/id/A/AV/AVULLO/Bio-DB-HTS-"
                           version ".tar.gz"))
       (sha256
        (base32
         "0hjg0igfkpvh27zdkdr6pa7cqm9n6r7cwz0np74cl4wmawgvr9hj"))))
    (build-system perl-build-system)
    (native-inputs
     (list perl-module-build pkg-config))
    (propagated-inputs
     (list bioperl-minimal htslib-1.9))
    (home-page "https://metacpan.org/release/Bio-DB-HTS")
    (synopsis "Perl interface to HTS library for DNA sequencing")
    (description "This is a Perl interface to the HTS library for DNA
sequencing.")
    (license license:asl2.0)))

(define-public python-biopython
  (package
    (name "python-biopython")
    (version "1.80")
    (source (origin
              (method url-fetch)
              ;; use PyPi rather than biopython.org to ease updating
              (uri (pypi-uri "biopython" version))
              (sha256
               (base32
                "0hqf3jsxn2sphcx81fx7x3i69sarpjsi70fzw98f8rw7z2d5x02j"))))
    (build-system pyproject-build-system)
    (arguments
     '(#:phases
       (modify-phases %standard-phases
         (add-before 'check 'set-home
           ;; Some tests require a home directory to be set.
           (lambda _ (setenv "HOME" "/tmp"))))))
    (propagated-inputs
     (list python-numpy))
    (home-page "https://biopython.org/")
    (synopsis "Tools for biological computation in Python")
    (description
     "Biopython is a set of tools for biological computation including parsers
for bioinformatics files into Python data structures; interfaces to common
bioinformatics programs; a standard sequence class and tools for performing
common operations on them; code to perform data classification; code for
dealing with alignments; code making it easy to split up parallelizable tasks
into separate processes; and more.")
    (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))

(define-public python-biopython-1.73
  (package
    (inherit python-biopython)
    (version "1.73")
    (source (origin
              (method url-fetch)
              ;; use PyPi rather than biopython.org to ease updating
              (uri (pypi-uri "biopython" version))
              (sha256
               (base32
                "1q55jhf76z3k6is3psis0ckbki7df26x7dikpcc3vhk1vhkwribh"))))))

(define-public python-fastalite
  (package
    (name "python-fastalite")
    (version "0.3")
    (source
     (origin
       (method url-fetch)
       (uri (pypi-uri "fastalite" version))
       (sha256
        (base32
         "1qli6pxp77i9xn2wfciq2zaxhl82bdxb33cpzqzj1z25yd036wqj"))))
    (build-system python-build-system)
    (arguments
     `(#:tests? #f)) ; Test data is not distributed.
    (home-page "https://github.com/nhoffman/fastalite")
    (synopsis "Simplest possible FASTA parser")
    (description "This library implements a FASTA and a FASTQ parser without
relying on a complex dependency tree.")
    (license license:expat)))

(define-public biosoup
  (package
    (name "biosoup")
    (version "0.10.0")
    (source
     (origin
       (method git-fetch)
       (uri (git-reference
             (url "https://github.com/rvaser/biosoup")
             ;; Corresponds to version 0.10.0
             (commit "38181f09854ff42cbd9632200a2ec9fb37a4b7b6")))
       (file-name (git-file-name name version))
       (sha256
        (base32
         "02hvyka703zagx0nvv2yx3dkc748zc8g6qbrpya7r8kfkcl7y8hw"))))
    (build-system cmake-build-system)
    (arguments
     `(#:phases
       (modify-phases %standard-phases
         (replace 'check
           (lambda* (#:key tests? #:allow-other-keys)
             (when tests?
               (invoke "./bin/biosoup_test")))))))
    (native-inputs
     (list googletest))
    (home-page "https://github.com/rvaser/biosoup")
    (synopsis "C++ support library for bioinformatics tools")
    (description "Biosoup is a C++ collection of header-only data structures
used for storage and logging in bioinformatics tools.")
    (license license:expat)))

(define-public bioparser
  (package
    (name "bioparser")
    (version "3.0.13")
    (source
     (origin
       (method git-fetch)
       (uri (git-reference
             (url "https://github.com/rvaser/bioparser")
             ;; Corresponds to tag 3.0.13
             (commit "13341e6e0855c6b358ffcea6dad216e1009e1287")))
       (file-name (git-file-name name version))
       (sha256
        (base32
         "0c5p2dl8jb12ci9f427jzrmmm9cgvc1k4fxsn2ggkfsin6r1r82i"))))
    (build-system cmake-build-system)
    (arguments
     `(#:phases
       (modify-phases %standard-phases
         (replace 'check
           (lambda* (#:key tests? #:allow-other-keys)
             (when tests?
               (invoke "./bin/bioparser_test")))))))
    (inputs
     (list biosoup))
    (propagated-inputs
     (list zlib))
    (native-inputs
     (list googletest))
    (home-page "https://github.com/rvaser/bioparser")
    (synopsis "C++ library for parsing several formats in bioinformatics")
    (description "Bioparser is a C++ header only parsing library for several
bioinformatics formats (FASTA/Q, MHAP/PAF/SAM), with support for zlib
compressed files.")
    (license license:expat)))

(define-public circtools
  (package
    (name "circtools")
    (version "1.0.0")
    (source
     (origin
       (method git-fetch)
       (uri (git-reference
             (url "https://github.com/Kevinzjy/circtools")
             ;; Corresponds to tag v1.0.0
             (commit "79380de59013601021ca3b1352d6f64d2fb89646")
             (recursive? #t)))
       (file-name (git-file-name name version))
       (sha256
        (base32
         "0wg1s927g32k25j967kfr8l30nmr4c0p4zvy5igvy7cs6chd60lh"))))
    (build-system cargo-build-system)
    (arguments
     `(#:phases
       (modify-phases %standard-phases
         (add-after 'unpack 'make-writable
           (lambda _
             (for-each make-file-writable (find-files "."))))
         (add-after 'unpack 'prepare-spoa-dependencies
           (lambda* (#:key inputs #:allow-other-keys)
             (substitute* "vendor/spoa/CMakeLists.txt"
               (("find_package\\(bioparser 3.0.13 QUIET\\)")
                "find_package(bioparser 3.0.13 CONFIG)")
               (("find_package\\(biosoup 0.10.0 QUIET\\)")
                "find_package(biosoup 0.10.0 CONFIG)")
               (("GTest_FOUND") "TRUE")))))
       #:cargo-inputs
       (("rust-anyhow" ,rust-anyhow-1)
        ("rust-bio" ,rust-bio-0.33)
        ("rust-chrono" ,rust-chrono-0.4)
        ("rust-docopt" ,rust-docopt-1)
        ("rust-flate2" ,rust-flate2-1)
        ("rust-indicatif" ,rust-indicatif-0.15)
        ("rust-libc" ,rust-libc-0.2)
        ("rust-serde" ,rust-serde-1)
        ("rust-seq-io" ,rust-seq-io-0.3))))
    (inputs
     (list bioparser biosoup))
    (native-inputs
     (list cmake pkg-config googletest))
    (home-page "https://github.com/Kevinzjy/circtools")
    (synopsis "Accelerating functions in CIRI toolkit")
    (description "This package provides accelerated functions for the CIRI
toolkit.  It also provides the @code{ccs} executable to scan for circular
consensus sequences.")
    (license license:expat)))

(define-public ciri-long
  (package
    (name "ciri-long")
    (version "1.0.2")
    (source
     (origin
       (method git-fetch)
       (uri (git-reference
             (url "https://github.com/bioinfo-biols/CIRI-long")
             (commit (string-append "v" version))))
       (file-name (git-file-name name version))
       (sha256
        (base32
         "10k88i1fcqchrrjv82rmylwvbwqfba0n51palhig9hsg71xs0dbi"))
       ;; Delete bundled binary
       (snippet '(delete-file "libs/ccs"))))
    (build-system python-build-system)
    (arguments
     `(#:phases
       (modify-phases %standard-phases
         (add-after 'unpack 'relax-requirements
           (lambda _
             (substitute* "setup.py"
               (("'argparse[^']*',") "") ; only for python2
               (("==") ">="))))
         (add-before 'build 'build-libssw
           (lambda _
             (with-directory-excursion "libs/striped_smith_waterman"
               (invoke "make" "libssw.so"))))
         (add-before 'build 'fix-reference-to-ccs
           (lambda* (#:key inputs #:allow-other-keys)
             (substitute* "CIRI_long/pipeline.py"
               (("'ccs -i")
                (string-append "'"
                               (assoc-ref inputs "circtools") "/bin/ccs"
                               " -i")))
             ;; yuck!
             (substitute* "CIRI_long/main.py"
               (("os.chmod\\(lib_path.*") "")))))))
    (inputs
     (list circtools
           python-biopython
           python-bwapy
           python-levenshtein
           python-mappy
           python-numpy
           python-pandas
           python-pysam
           python-pyspoa
           python-scikit-learn
           python-scipy))
    (native-inputs
     (list python-cython python-nose python-setuptools))
    (home-page "https://ciri-cookbook.readthedocs.io/")
    (synopsis "Circular RNA identification for Nanopore sequencing")
    (description "CIRI-long is a package for circular RNA identification using
long-read sequencing data.")
    (license license:expat)))

(define-public qtltools
  (package
    (name "qtltools")
    (version "1.3.1")
    (source (origin
              (method url-fetch/tarbomb)
              (uri (string-append "https://qtltools.github.io/qtltools/"
                                  "binaries/QTLtools_" version
                                  "_source.tar.gz"))
              (sha256
               (base32
                "13gdry5l43abn3464fmk8qzrxgxnxah2612r66p9dzhhl92j30cd"))))
    (build-system gnu-build-system)
    (arguments
     `(#:tests? #f                      ; no tests included
       #:make-flags
       ,#~(list (string-append "BOOST_INC="
                               #$(this-package-input "boost") "/include")
                (string-append "BOOST_LIB="
                               #$(this-package-input "boost") "/lib")
                (string-append "HTSLD_INC="
                               #$(this-package-input "htslib") "/include")
                (string-append "HTSLD_LIB="
                               #$(this-package-input "htslib") "/lib")
                (string-append "RMATH_INC="
                               #$(this-package-input "rmath-standalone")
                               "/include")
                (string-append "RMATH_LIB="
                               #$(this-package-input "rmath-standalone")
                               "/lib"))
       #:phases
       (modify-phases %standard-phases
         (add-after 'unpack 'fix-linkage
           (lambda _
             (substitute* "qtltools/Makefile"
               (("libboost_iostreams.a")
                "libboost_iostreams.so")
               (("libboost_program_options.a")
                "libboost_program_options.so")
               (("-lblas") "-lopenblas"))))
         (add-before 'build 'chdir
           (lambda _ (chdir "qtltools")))
         (replace 'configure
           (lambda _
             (substitute* "qtltools/Makefile"
               (("LIB_FLAGS=-lz")
                "LIB_FLAGS=-lz -lcrypto -lssl")
               (("LIB_FILES=\\$\\(RMATH_LIB\\)/libRmath.a \
\\$\\(HTSLD_LIB\\)/libhts.a \
\\$\\(BOOST_LIB\\)/libboost_iostreams.a \
\\$\\(BOOST_LIB\\)/libboost_program_options.a")
                "LIB_FILES=$(RMATH_LIB)/libRmath.so \
$(HTSLD_LIB)/libhts.so \
$(BOOST_LIB)/libboost_iostreams.so \
$(BOOST_LIB)/libboost_program_options.so"))))
         (replace 'install
           (lambda* (#:key outputs #:allow-other-keys)
             (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
               (mkdir-p bin)
               (install-file "bin/QTLtools" bin)))))))
    (inputs
     (list curl
           gsl
           boost
           rmath-standalone
           htslib-1.3
           openssl
           openblas
           zlib))
    (home-page "https://qtltools.github.io/qtltools/")
    (synopsis "Tool set for molecular QTL discovery and analysis")
    (description "QTLtools is a tool set for molecular QTL discovery
and analysis.  It allows going from the raw genetic sequence data to
collection of molecular @dfn{Quantitative Trait Loci} (QTLs) in few
easy-to-perform steps.")
    (license license:gpl3+)))

(define-public bpp-core
  (package
    (name "bpp-core")
    (version "2.4.1")
    (source
      (origin
       (method git-fetch)
       (uri (git-reference
             (url "https://github.com/BioPP/bpp-core")
             (commit (string-append "v" version))))
       (file-name (git-file-name name version))
       (sha256
        (base32 "0ma2cl677l7s0n5sffh66cy9lxp5wycm50f121g8rx85p95vkgwv"))))
    (build-system cmake-build-system)
    ;; XXX: does not build with later GCC:
    ;; error: ‘numeric_limits’ was not declared in this scope
    (native-inputs (list gcc-10))
    (home-page "https://pbil.univ-lyon1.fr/bpp-doc/bpp-core/html/index.html")
    (synopsis "C++ libraries for Bioinformatics")
    (description
     "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
analysis, phylogenetics, molecular evolution and population genetics.  It is
Object Oriented and is designed to be both easy to use and computer efficient.
Bio++ intends to help programmers to write computer expensive programs, by
providing them a set of re-usable tools.")
      (license license:cecill-c)))

(define-public bpp-phyl
  (package
    (name "bpp-phyl")
    (version "2.4.1")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/BioPP/bpp-phyl")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "192zks6wyk903n06c2lbsscdhkjnfwms8p7jblsmk3lvjhdipb20"))))
    (build-system cmake-build-system)
    (inputs
     (list bpp-core bpp-seq))
    (home-page "https://pbil.univ-lyon1.fr/bpp-doc/bpp-phyl/html/")
    (synopsis "Bio++ phylogenetic library")
    (description
     "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
analysis, phylogenetics, molecular evolution and population genetics.  This
library provides phylogenetics-related modules.")
    (license license:cecill-c)))

(define-public bpp-phyl-omics
  (package
    (name "bpp-phyl-omics")
    (version "2.4.1")
    (source
      (origin
       (method git-fetch)
       (uri (git-reference
            (url "https://github.com/BioPP/bpp-phyl-omics")
            (commit (string-append "v" version))))
        (file-name (git-file-name name version))
        (sha256
         (base32 "172psb8njkjwg3cd6gdy5w0mq8f0817v635yw4bk7146aggjzl1h"))))
    (build-system cmake-build-system)
    (arguments
     (list #:tests? #f)) ; No test provided.
    (inputs
     (list bpp-core
           bpp-phyl
           bpp-seq
           bpp-seq-omics))
    (home-page "https://github.com/BioPP/bpp-phyl-omics")
    (synopsis "Bio++ phylogenetic library genomics components")
    (description
     "This library contains the genomics components of the Bio++ phylogenetics
library.  It is part of the Bio++ project.")
    (license license:cecill)))

(define-public bpp-popgen
  (package
    (name "bpp-popgen")
    (version "2.4.1")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/BioPP/bpp-popgen")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "0bz0fhrq3dri6a0hvfc3zlvrns8mrzzlnicw5pyfa812gc1qwfvh"))))
    (build-system cmake-build-system)
    (arguments
     (list #:tests? #f)) ; There are no tests.
    (inputs
     (list bpp-core bpp-seq))
    (home-page "https://pbil.univ-lyon1.fr/bpp-doc/bpp-popgen/html/")
    (synopsis "Bio++ population genetics library")
    (description
     "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
analysis, phylogenetics, molecular evolution and population genetics.  This
library provides population genetics-related modules.")
    (license license:cecill-c)))

(define-public bpp-seq
  (package
    (name "bpp-seq")
    (version "2.4.1")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/BioPP/bpp-seq")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32 "1mc09g8jswzsa4wgrfv59jxn15ys3q8s0227p1j838wkphlwn2qk"))))
    (build-system cmake-build-system)
    (inputs
     (list bpp-core))
    (home-page "https://pbil.univ-lyon1.fr/bpp-doc/bpp-seq/html/")
    (synopsis "Bio++ sequence library")
    (description
     "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
analysis, phylogenetics, molecular evolution and population genetics.  This
library provides sequence-related modules.")
    (license license:cecill-c)))

(define-public bpp-seq-omics
  (package
    (name "bpp-seq-omics")
    (version "2.4.1")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/BioPP/bpp-seq-omics")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32 "1sc2xdfnfp5a6qihplp49rgrqmj89898avfy9bqaq1g2fajppgjj"))))
    (build-system cmake-build-system)
    (inputs
     (list bpp-core bpp-seq))
    (home-page "https://github.com/BioPP/bpp-seq-omics")
    (synopsis "Bio++ sequence library genomics components")
    (description
     "This library contains the genomics components of the Bio++ sequence library.
It is part of the Bio++ project.")
    (license license:cecill)))

(define-public bppsuite
  (package
    (name "bppsuite")
    (version "2.4.1")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/BioPP/bppsuite")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "1wdwcgczqbc3m116vakvi0129wm3acln3cfc7ivqnalwvi6lrpds"))))
    (build-system cmake-build-system)
    (arguments
     (list #:tests? #f)) ; There are no tests.
    (native-inputs
     (list groff man-db texinfo))
    (inputs
     (list bpp-core bpp-seq bpp-phyl bpp-popgen))
    (home-page "https://github.com/BioPP")
    (synopsis "Bioinformatics tools written with the Bio++ libraries")
    (description
     "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
analysis, phylogenetics, molecular evolution and population genetics.  This
package provides command line tools using the Bio++ library.")
    (license license:cecill-c)))

(define-public blast+
  (package
    (name "blast+")
    (version "2.14.0")
    (source (origin
              (method url-fetch)
              (uri (string-append
                    "https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
                    version "/ncbi-blast-" version "+-src.tar.gz"))
              (sha256
               (base32
                "003mn7m4y306k7visv3in3ikfgm8m41z0jq9lyvz10iv1hdpyixz"))
              (modules '((guix build utils)))
              (snippet
               '(begin
                  ;; Remove bundled bzip2, zlib and pcre.
                  (delete-file-recursively "c++/src/util/compress/bzip2")
                  (delete-file-recursively "c++/src/util/compress/zlib")
                  (delete-file-recursively "c++/src/util/regexp")
                  (substitute* "c++/src/util/compress/Makefile.in"
                    (("bzip2 zlib api") "api"))
                  ;; Remove useless msbuild directory
                  (delete-file-recursively
                   "c++/src/build-system/project_tree_builder/msbuild")

                  ;; Build reproducibly.
                  ;; Do not record the kernel version
                  (substitute* "c++/src/build-system/configure"
                    (("kver=.*") "kver=\"\""))
                  ;; Do not generate random numbers.
                  (substitute* "c++/scripts/common/impl/define_random_macros.sh"
                    (("#define NCBI_RANDOM_VALUE_MAX  0xffffffffu" m)
                     (string-append m "
#define NCBI_RANDOM_VALUE_0    2845495105u
#define NCBI_RANDOM_VALUE_1    2158634051u
#define NCBI_RANDOM_VALUE_2    4072202242u
#define NCBI_RANDOM_VALUE_3    902228395u
#define NCBI_RANDOM_VALUE_4    1353323915u
#define NCBI_RANDOM_VALUE_5    574823513u
#define NCBI_RANDOM_VALUE_6    4119501261u
#define NCBI_RANDOM_VALUE_7    2477640938u
#define NCBI_RANDOM_VALUE_8    2776595395u
#define NCBI_RANDOM_VALUE_9    270550684u
"))
                    (("cksum") "cksum >/dev/null"))))))
    (build-system gnu-build-system)
    (arguments
     (list
      ;; There are two(!) tests for this massive library, and both fail with
      ;; "unparsable timing stats".
      ;; ERR [127] --  [serial/datatool] datatool.sh     (unparsable timing stats)
      ;; ERR [127] --  [serial/datatool] datatool_xml.sh     (unparsable timing stats)
      #:tests? #f
      #:out-of-source? #t
      #:parallel-build? #f              ;not supported
      #:phases
      #~(modify-phases %standard-phases
          (add-before 'configure 'set-HOME
            ;; $HOME needs to be set at some point during the configure phase
            (lambda _ (setenv "HOME" "/tmp")))
          (add-after 'unpack 'enter-dir
            (lambda _ (chdir "c++")))
          (add-after 'enter-dir 'fix-build-system
            (lambda _
              (define (which* cmd)
                (cond ((string=? cmd "date")
                       ;; make call to "date" deterministic
                       "date -d @0")
                      ((which cmd)
                       => identity)
                      (else
                       (format (current-error-port)
                               "WARNING: Unable to find absolute path for ~s~%"
                               cmd)
                       #false)))

              ;; Rewrite hardcoded paths to various tools
              (substitute* (append '("src/build-system/configure.ac"
                                     "src/build-system/configure"
                                     "src/build-system/helpers/run_with_lock.c"
                                     "scripts/common/impl/if_diff.sh"
                                     "scripts/common/impl/run_with_lock.sh"
                                     "src/build-system/Makefile.configurables.real"
                                     "src/build-system/Makefile.in.top"
                                     "src/build-system/Makefile.meta.gmake=no"
                                     "src/build-system/Makefile.meta.in"
                                     "src/build-system/Makefile.meta_l"
                                     "src/build-system/Makefile.meta_p"
                                     "src/build-system/Makefile.meta_r"
                                     "src/build-system/Makefile.mk.in"
                                     "src/build-system/Makefile.requirements"
                                     "src/build-system/Makefile.rules_with_autodep.in")
                                   (find-files "scripts/common/check" "\\.sh$"))
                (("(/usr/bin/|/bin/)([a-z][-_.a-z]*(\\+\\+)?)" all dir cmd)
                 (or (which* cmd) all)))

              (substitute* (find-files "src/build-system" "^config.*")
                (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
                (("^PATH=.*") ""))

              ;; rewrite "/var/tmp" in check script
              (substitute* "scripts/common/check/check_make_unix.sh"
                (("/var/tmp") "/tmp"))

              ;; do not reset PATH
              (substitute* (find-files "scripts/common/impl/" "\\.sh$")
                (("^ *PATH=.*") "")
                (("action=/bin/") "action=")
                (("export PATH") ":"))))
          (replace 'configure
            (lambda _
              (let ((lib     (string-append #$output:lib "/lib"))
                    (include (string-append #$output:include
                                            "/include/ncbi-tools++")))
                ;; The 'configure' script doesn't recognize things like
                ;; '--enable-fast-install'.
                (invoke "./configure.orig"
                        (string-append "--with-build-root=" (getcwd) "/build")
                        (string-append "--prefix=" #$output)
                        (string-append "--libdir=" lib)
                        (string-append "--includedir=" include)
                        (string-append "--with-bz2="
                                       #$(this-package-input "bzip2"))
                        (string-append "--with-z="
                                       #$(this-package-input "zlib"))
                        (string-append "--with-pcre="
                                       #$(this-package-input "pcre"))
                        ;; Each library is built twice by default, once
                        ;; with "-static" in its name, and again
                        ;; without.
                        "--without-static"
                        "--with-dll")))))))
    (outputs '("out"       ;  21 MB
               "lib"       ; 226 MB
               "include")) ;  33 MB
    (inputs
     (list bzip2
           lmdb
           zlib
           pcre
           perl
           python-wrapper))
    (native-inputs
     (list cpio))
    (home-page "https://blast.ncbi.nlm.nih.gov")
    (synopsis "Basic local alignment search tool")
    (description
     "BLAST is a popular method of performing a DNA or protein sequence
similarity search, using heuristics to produce results quickly.  It also
calculates an “expect value” that estimates how many matches would have
occurred at a given score by chance, which can aid a user in judging how much
confidence to have in an alignment.")
    ;; Most of the sources are in the public domain, with the following
    ;; exceptions:
    ;;   * Expat:
    ;;     * ./c++/include/util/bitset/
    ;;     * ./c++/src/html/ncbi_menu*.js
    ;;   * Boost license:
    ;;     * ./c++/include/util/impl/floating_point_comparison.hpp
    ;;   * LGPL 2+:
    ;;     * ./c++/include/dbapi/driver/odbc/unix_odbc/
    ;;   * ASL 2.0:
    ;;     * ./c++/src/corelib/teamcity_*
    (license (list license:public-domain
                   license:expat
                   license:boost1.0
                   license:lgpl2.0+
                   license:asl2.0))))

(define-public bless
  (package
    (name "bless")
    (version "1p02")
    (source (origin
              (method url-fetch)
              (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
                                  version ".tgz"))
              (sha256
               (base32
                "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
              (modules '((guix build utils)))
              (snippet
               `(begin
                  ;; Remove bundled boost, pigz, zlib, and .git directory
                  ;; FIXME: also remove bundled sources for murmurhash3 and
                  ;; kmc once packaged.
                  (delete-file-recursively "boost")
                  (delete-file-recursively "pigz")
                  (delete-file-recursively "google-sparsehash")
                  (delete-file-recursively "zlib")
                  (delete-file-recursively ".git")))))
    (build-system gnu-build-system)
    (arguments
     `(#:tests? #f ;no "check" target
       #:make-flags
       ,#~(list (string-append "ZLIB="
                               #$(this-package-input "zlib")
                               "/lib/libz.so")
                (string-append "LDFLAGS="
                               (string-join '("-lboost_filesystem"
                                              "-lboost_system"
                                              "-lboost_iostreams"
                                              "-lz"
                                              "-fopenmp"))))
       #:phases
       (modify-phases %standard-phases
         (add-after 'unpack 'do-not-build-bundled-pigz
          (lambda* (#:key inputs outputs #:allow-other-keys)
            (substitute* "Makefile"
              (("cd pigz/pigz-2.3.3; make") ""))))
         (add-after 'unpack 'patch-paths-to-executables
          (lambda* (#:key inputs outputs #:allow-other-keys)
            (substitute* "parse_args.cpp"
              (("kmc_binary = .*")
               (string-append "kmc_binary = \""
                              (assoc-ref outputs "out")
                              "/bin/kmc\";"))
              (("pigz_binary = .*")
               (string-append "pigz_binary = \""
                              (assoc-ref inputs "pigz")
                              "/bin/pigz\";")))))
         (replace 'install
          (lambda* (#:key outputs #:allow-other-keys)
            (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
              (for-each (lambda (file)
                          (install-file file bin))
                        '("bless" "kmc/bin/kmc")))))
         (delete 'configure))))
    (native-inputs
     (list perl))
    (inputs
     (list openmpi boost sparsehash pigz zlib))
    (supported-systems '("x86_64-linux"))
    (home-page "https://sourceforge.net/p/bless-ec/wiki/Home/")
    (synopsis "Bloom-filter-based error correction tool for NGS reads")
    (description
     "@dfn{Bloom-filter-based error correction solution for high-throughput
sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
correction tool for genomic reads produced by @dfn{Next-generation
sequencing} (NGS).  BLESS produces accurate correction results with much less
memory compared with previous solutions and is also able to tolerate a higher
false-positive rate.  BLESS can extend reads like DNA assemblers to correct
errors at the end of reads.")
    (license license:gpl3+)))

(define-public bowtie
  (package
    (name "bowtie")
    (version "2.3.4.3")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/BenLangmead/bowtie2")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "1zl3cf327y2p7p03cavymbh7b00djc7lncfaqih33n96iy9q8ibp"))
              (modules '((guix build utils)))
              (snippet
               '(begin
                  (substitute* "Makefile"
                    ;; replace BUILD_HOST and BUILD_TIME for deterministic build
                    (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
                    (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))))))
    (build-system gnu-build-system)
    (arguments
     `(#:make-flags
       ,#~(list "allall"
                "WITH_TBB=1"
                (string-append "prefix=" #$output))
       #:phases
       (modify-phases %standard-phases
         (replace 'configure
           (lambda _
             ;; This "extended character" is not considered valid.
             (substitute* "processor_support.h"
               (("“") "\"")
               (("”") "\""))))
         (replace 'check
           (lambda _
             (invoke "perl"
                     "scripts/test/simple_tests.pl"
                     "--bowtie2=./bowtie2"
                     "--bowtie2-build=./bowtie2-build"))))))
    (inputs
     `(("tbb" ,tbb-2020)
       ("zlib" ,zlib)
       ("python" ,python-wrapper)))
    (native-inputs
     (list perl perl-clone perl-test-deep perl-test-simple))
    (home-page "https://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
    (synopsis "Fast and sensitive nucleotide sequence read aligner")
    (description
     "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
reads to long reference sequences.  It is particularly good at aligning reads
of about 50 up to 100s or 1,000s of characters, and particularly good at
aligning to relatively long (e.g. mammalian) genomes.  Bowtie 2 indexes the
genome with an FM Index to keep its memory footprint small: for the human
genome, its memory footprint is typically around 3.2 GB.  Bowtie 2 supports
gapped, local, and paired-end alignment modes.")
    (supported-systems '("x86_64-linux"))
    (license license:gpl3+)))

(define-public bowtie1
  (package
    (name "bowtie1")
    (version "1.3.1")
    (source (origin
              (method url-fetch)
              (uri (string-append "mirror://sourceforge/bowtie-bio/bowtie/"
                                  version "/bowtie-" version "-src.zip"))
              (sha256
               (base32
                "0q87nhgj9wrnbazcpvqp4594hmyh1isi3s9b2wlghvl4afm1fdg2"))
              (modules '((guix build utils)))
              (snippet
               '(substitute* "Makefile"
                  ;; replace BUILD_HOST and BUILD_TIME for deterministic build
                  (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
                  (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))))
    (build-system gnu-build-system)
    (arguments
     `(#:tests? #f                      ; Tests need various perl modules
       #:test-target "simple-test"
       #:make-flags
       ,#~(append #$(if (not (target-x86?))
                        #~'("POPCNT_CAPABILITY=0")
                        #~'())
                  (list (string-append "CC=" #$(cc-for-target))
                        (string-append "CXX=" #$(cxx-for-target))
                        "all"
                        (string-append "prefix=" #$output)))
       #:phases
       (modify-phases %standard-phases
         (delete 'configure))))
    (inputs
     (list python-wrapper tbb zlib))
    (supported-systems %64bit-supported-systems)
    (home-page "https://bowtie-bio.sourceforge.net/index.shtml")
    (synopsis "Fast aligner for short nucleotide sequence reads")
    (description
     "Bowtie is a fast, memory-efficient short read aligner.  It aligns short
DNA sequences (reads) to the human genome at a rate of over 25 million 35-bp
reads per hour.  Bowtie indexes the genome with a Burrows-Wheeler index to
keep its memory footprint small: typically about 2.2 GB for the human
genome (2.9 GB for paired-end).")
    (license license:artistic2.0)))

(define-public tophat
  (package
    (name "tophat")
    (version "2.1.1")
    (source (origin
              (method url-fetch)
              (uri (string-append
                    "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
                    version ".tar.gz"))
              (sha256
               (base32
                "19add02kv2xhd6ihd779dr7x35ggym3jqr0m5c4315i1yfb0p11p"))
              (modules '((guix build utils)))
              (snippet
               '(begin
                  ;; Remove bundled SeqAn and samtools
                  (delete-file-recursively "src/SeqAn-1.4.2")
                  (delete-file-recursively "src/samtools-0.1.18")))))
    (build-system gnu-build-system)
    (arguments
     '(#:parallel-build? #f             ; not supported
       #:phases
       (modify-phases %standard-phases
         (add-after 'set-paths 'hide-default-gcc
           (lambda* (#:key inputs #:allow-other-keys)
             (let ((gcc (assoc-ref inputs "gcc")))
               ;; Remove the default GCC from CPLUS_INCLUDE_PATH to prevent
               ;; conflicts with the GCC 5 input.
               (setenv "CPLUS_INCLUDE_PATH"
                       (string-join
                        (delete (string-append gcc "/include/c++")
                                (string-split (getenv "CPLUS_INCLUDE_PATH") #\:))
                        ":")))))
         (add-after 'unpack 'use-system-samtools
           (lambda* (#:key inputs #:allow-other-keys)
             (substitute* "src/Makefile.in"
               (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
               (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
               (("SAMPROG = samtools_0\\.1\\.18") "")
               (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
               (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
             (substitute* '("src/common.cpp"
                            "src/tophat.py")
               (("samtools_0.1.18") (which "samtools")))
             (substitute* '("src/common.h"
                            "src/bam2fastx.cpp")
               (("#include \"bam.h\"") "#include <samtools/bam.h>")
               (("#include \"sam.h\"") "#include <samtools/sam.h>"))
             (substitute* '("src/bwt_map.h"
                            "src/map2gtf.h"
                            "src/align_status.h")
               (("#include <bam.h>") "#include <samtools/bam.h>")
               (("#include <sam.h>") "#include <samtools/sam.h>")))))))
    (native-inputs
     `(("gcc@5" ,gcc-5))) ;; doesn't build with later versions
    (inputs
     `(("boost" ,boost)
       ("bowtie" ,bowtie)
       ("ncurses" ,ncurses)
       ("perl" ,perl)
       ("python" ,python-2)
       ("samtools" ,samtools-0.1)
       ("seqan" ,seqan-1)
       ("zlib" ,zlib)))
    (home-page "https://ccb.jhu.edu/software/tophat/index.shtml")
    (synopsis "Spliced read mapper for RNA-Seq data")
    (description
     "TopHat is a fast splice junction mapper for nucleotide sequence
reads produced by the RNA-Seq method.  It aligns RNA-Seq reads to
mammalian-sized genomes using the ultra high-throughput short read
aligner Bowtie, and then analyzes the mapping results to identify
splice junctions between exons.")
    ;; TopHat is released under the Boost Software License, Version 1.0
    ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
    (license license:boost1.0)))

(define-public bwa
  (package
    (name "bwa")
    (version "0.7.18")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/lh3/bwa")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "1vf3iwkzxqkzhcfz2q3qyvcv3jrvbb012qy21pfgjl8lv20ywfr1"))))
    (build-system gnu-build-system)
    (arguments
     (list #:tests? #f ;no "check" target
           #:make-flags #~(list "CFLAGS=-fcommon"
                                (string-append "CC=" #$(cc-for-target)))
           #:phases
           #~(modify-phases %standard-phases
               (replace 'install
                 (lambda* (#:key outputs #:allow-other-keys)
                   (let* ((out (assoc-ref outputs "out"))
                          (bin (string-append out "/bin"))
                          (lib (string-append out "/lib"))
                          (doc (string-append out "/share/doc/bwa"))
                          (man (string-append out "/share/man/man1")))
                     (install-file "bwa" bin)
                     (install-file "libbwa.a" lib)
                     (install-file "README.md" doc)
                     (install-file "bwa.1" man))))
               ;; no "configure" script
               (delete 'configure))))
    (inputs (list zlib))
    (supported-systems '("x86_64-linux" "aarch64-linux"))
    (home-page "https://bio-bwa.sourceforge.net/")
    (synopsis "Burrows-Wheeler sequence aligner")
    (description
     "BWA is a software package for mapping low-divergent sequences against a
large reference genome, such as the human genome.  It consists of three
algorithms: BWA-backtrack, BWA-SW and BWA-MEM.  The first algorithm is
designed for Illumina sequence reads up to 100bp, while the rest two for
longer sequences ranged from 70bp to 1Mbp.  BWA-MEM and BWA-SW share similar
features such as long-read support and split alignment, but BWA-MEM, which is
the latest, is generally recommended for high-quality queries as it is faster
and more accurate.  BWA-MEM also has better performance than BWA-backtrack for
70-100bp Illumina reads.")
    (license license:gpl3+)))

(define-public bwa-pssm
  (package
    (inherit bwa)
    (name "bwa-pssm")
    (version "0.5.11")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/pkerpedjiev/bwa-pssm")
                    (commit version)))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "076c4q0cdqz8jgylb067y9zmvxglppnzi3qiscn0xiypgc6lgb5r"))))
    (build-system gnu-build-system)
    (arguments
     (substitute-keyword-arguments (package-arguments bwa)
       ((#:phases phases #~%standard-phases)
        #~(modify-phases #$phases
            (add-after 'unpack 'patch-C-error
              (lambda _
                (substitute* "pssm.c"
                  (("inline int map") "int map"))))))))
    (inputs
     (list gdsl zlib perl))
    ;; https://bwa-pssm.binf.ku.dk is down, and all Web Archived copies are
    ;; blank (they actually have "display:none" for some nefarious reason).
    (home-page "https://github.com/pkerpedjiev/bwa-pssm")
    (synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
    (description
     "BWA-PSSM is a probabilistic short genomic sequence read aligner based on
the use of @dfn{position specific scoring matrices} (PSSM).  Like many of the
existing aligners it is fast and sensitive.  Unlike most other aligners,
however, it is also adaptible in the sense that one can direct the alignment
based on known biases within the data set.  It is coded as a modification of
the original BWA alignment program and shares the genome index structure as
well as many of the command line options.")
    (license license:gpl3+)))

(define-public bwa-meth
  (package
    (name "bwa-meth")
    (version "0.2.3")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/brentp/bwa-meth")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "0c695lkrr0996zwkibl7324wg2vxmn6522sz30xv4a9gaf0lnbh3"))))
    (build-system python-build-system)
    (arguments
     `(#:phases
       (modify-phases %standard-phases
         (add-after 'unpack 'keep-references-to-bwa
           (lambda* (#:key inputs #:allow-other-keys)
             (substitute* "bwameth.py"
               (("bwa (mem|index)" _ command)
                (string-append (which "bwa") " " command))
               ;; There's an ill-advised check for "samtools" on PATH.
               (("^checkX.*") "")))))))
    (inputs
     (list bwa))
    (native-inputs
     (list python-toolshed))
    (home-page "https://github.com/brentp/bwa-meth")
    (synopsis "Fast and accurante alignment of BS-Seq reads")
    (description
     "BWA-Meth works for single-end reads and for paired-end reads from the
directional protocol (most common).  It uses the method employed by
methylcoder and Bismark of in silico conversion of all C's to T's in both
reference and reads.  It recovers the original read (needed to tabulate
methylation) by attaching it as a comment which BWA appends as a tag to the
read.  It performs favorably to existing aligners gauged by number of on and
off-target reads for a capture method that targets CpG-rich region.")
    (license license:expat)))

(define-public python-bx-python
  (package
    (name "python-bx-python")
    (version "0.9.0")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/bxlab/bx-python")
                    (commit "4f4a48d3f227ae390c1b22072867ba86e347bdef")))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "1c914rw8phiw7zwzngz9i9hdciz5lq53drwdbpl2bd2sf5bj2biy"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:phases
      '(modify-phases %standard-phases
         (add-before 'check 'build-extensions
           (lambda _
             ;; Cython extensions have to be built before running the tests.
             (invoke "python" "setup.py" "build_ext" "--inplace")))
         ;; Unrecognized argument.
         (add-after 'unpack 'disable-cython-doctests
           (lambda _ (substitute* "pytest.ini" (("--doctest-cython") "")))))))
    (propagated-inputs
     (list python-numpy))
    (inputs
     (list zlib))
    (native-inputs
     (list python-cython python-lzo python-pytest))
    (home-page "https://github.com/bxlab/bx-python")
    (synopsis "Tools for manipulating biological data")
    (description
     "bx-python provides tools for manipulating biological data, particularly
multiple sequence alignments.")
    (license license:expat)))

(define-public python-mofax
  ;; This is a recent commit from the "dev" branch, which is much more recent
  ;; than the latest commit from the "master" branch.
  (let ((commit "4d96f8f0a5d5251847353656f523684d66c3c47a")
        (revision "0"))
    (package
      (name "python-mofax")
      (version (git-version "0.4.0" revision commit))
      (source (origin
                (method git-fetch)
                (uri (git-reference
                      (url "https://github.com/bioFAM/mofax")
                      (commit commit)))
                (file-name (git-file-name name version))
                (sha256
                 (base32
                  "1lwrw0qyvvnyiqz1l20dhcf7dxidb80cqgvk78czvdgba87yxzqx"))
                (modules '((guix build utils)))
                ;; Prevent the pyproject-build-system from guessing that flit
                ;; should be used as a builder.
                (snippet '(substitute* "pyproject.toml"
                            (("^#.*") "")))))
      (build-system pyproject-build-system)
      (arguments
       (list
        ;; This test is failing due to a bug. The bug has been reported to the
        ;; developers. See https://github.com/bioFAM/mofax/issues/12 for more
        ;; info.
        #:test-flags '(list "-k" "not test_get_methods")))
      (propagated-inputs (list python-h5py
                               python-matplotlib
                               python-pandas
                               python-poetry-core
                               python-scipy
                               python-seaborn))
      (native-inputs (list python-numpy python-pytest))
      (home-page "https://github.com/bioFAM/mofax")
      (synopsis
       "Motif activity finder for transcription factor motif analysis")
      (description
       "MoFax is a Python package for transcription factor motif analysis.
It provides convenience functions to load and visualize factor models trained
with MOFA+ in Python.")
      (license license:expat))))

(define-public python-mudata
  (package
    (name "python-mudata")
    (version "0.2.3")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/scverse/mudata")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "02h0k1q57589r0hdv8nwg1vk7g2ljvn5g66c47fy5gdilbm3gjws"))))
    (build-system pyproject-build-system)
    (propagated-inputs
     (list python-anndata python-h5py python-pandas))
    (native-inputs
     (list python-flit-core python-numpy python-pytest python-zarr))
    (home-page "https://github.com/scverse/mudata")
    (synopsis "Python package for multi-omics data analysis")
    (description
     "Mudata is a Python package for multi-omics data analysis.
It is designed to provide functionality to load, process, and store multimodal
omics data.")
    (license license:bsd-3)))

(define-public python-mofapy2
  (package
    (name "python-mofapy2")
    (version "0.7.1")
    (source
     (origin
       ;; The tarball from PyPi doesn't include tests.
       (method git-fetch)
       (uri (git-reference
             (url "https://github.com/bioFAM/mofapy2")
             (commit (string-append "v" version))))
       (file-name (git-file-name name version))
       (sha256
        (base32
         "0ahhnqk6gjrhyq286mrd5n7mxcv8l6040ffsawbjx9maqx8wbam0"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:test-flags
      ;; cupy is an optional dependency, which
      ;; itself has nonfree dependencies (CUDA)
      '(list "--ignore=mofapy2/notebooks/test_cupy.py")))
    (propagated-inputs (list python-anndata
                             python-h5py
                             python-numpy
                             python-pandas
                             python-scikit-learn
                             python-scipy))
    (native-inputs (list python-poetry-core
                         python-pytest))
    (home-page "https://biofam.github.io/MOFA2/")
    (synopsis "Multi-omics factor analysis")
    (description "MOFA is a factor analysis model that provides a general
framework for the integration of multi-omic data sets in an unsupervised
fashion.  Intuitively, MOFA can be viewed as a versatile and statistically
rigorous generalization of principal component analysis to multi-omics data.
Given several data matrices with measurements of multiple -omics data types on
the same or on overlapping sets of samples, MOFA infers an interpretable
low-dimensional representation in terms of a few latent factors.  These learnt
factors represent the driving sources of variation across data modalities,
thus facilitating the identification of cellular states or disease
subgroups.")
    (license license:lgpl3)))

(define-public python-muon
  (package
    (name "python-muon")
    (version "0.1.6")
    (source
     (origin
       ;; The tarball from PyPi doesn't include tests.
       (method git-fetch)
       (uri (git-reference
             (url "https://github.com/scverse/muon")
             (commit (string-append "v" version))))
       (file-name (git-file-name name version))
       (sha256
        (base32
         "1kd3flgy41dc0sc71wfnirh8vk1psxgyjxkbx1zx9yskkh6anbgw"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:test-flags
      ;; Even providing a random seed, scipy.sparse.rand produces inconsistent
      ;; results across scipy versions.
      '(list "-k" "not test_tfidf")
      #:phases
      '(modify-phases %standard-phases
         ;; Numba needs a writable dir to cache functions.
         (add-before 'build 'set-numba-cache-dir
           (lambda _
             (setenv "NUMBA_CACHE_DIR" "/tmp"))))))
    (propagated-inputs (list python-anndata
                             python-h5py
                             python-matplotlib
                             python-mofapy2
                             python-mudata
                             python-numba
                             python-numpy
                             python-pandas
                             python-protobuf
                             python-pybedtools
                             python-pysam
                             python-scanpy
                             python-scikit-learn
                             python-seaborn
                             python-tqdm
                             python-umap-learn))
    (native-inputs (list python-flit-core
                         python-pytest
                         python-pytest-flake8))
    (home-page "https://github.com/scverse/muon")
    (synopsis "Multimodal omics analysis framework")
    (description "muon is a multimodal omics Python framework.")
    (license license:bsd-3)))

(define-public python-pyega3
  (deprecated-package "python-pyega3" python-ega-download-client))

(define-public python-pysam
  (package
    (name "python-pysam")
    (version "0.20.0")
    (source (origin
              (method git-fetch)
              ;; Test data is missing on PyPi.
              (uri (git-reference
                    (url "https://github.com/pysam-developers/pysam")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "1dq6jwwm98lm30ijdgqc5xz5ppda4nj999y6qs78mhw8x0kij8gg"))
              (modules '((guix build utils)))
              (snippet '(begin
                          ;; FIXME: Unbundle samtools and bcftools.
                          (delete-file-recursively "htslib")))))
    (build-system pyproject-build-system)
    (arguments
     `(#:phases
       (modify-phases %standard-phases
         (add-before 'build 'set-flags
           (lambda* (#:key inputs #:allow-other-keys)
             (setenv "HTSLIB_MODE" "external")
             (setenv "HTSLIB_LIBRARY_DIR"
                     (string-append (assoc-ref inputs "htslib") "/lib"))
             (setenv "HTSLIB_INCLUDE_DIR"
                     (string-append (assoc-ref inputs "htslib") "/include"))
             (setenv "LDFLAGS" "-lncurses")
             (setenv "CFLAGS" "-D_CURSES_LIB=1")))
         (replace 'check
           (lambda* (#:key tests? #:allow-other-keys)
             (when tests?
               ;; Step out of source dir so python does not import from CWD.
               (with-directory-excursion "tests"
                 (setenv "HOME" "/tmp")
                 (invoke "make" "-C" "pysam_data")
                 (invoke "make" "-C" "cbcf_data")
                 ;; The FileHTTP test requires network access.
                 (invoke "pytest" "-k" "not FileHTTP"))))))))
    (propagated-inputs
     (list htslib))                    ; Included from installed header files.
    (inputs
     (list ncurses curl zlib))
    (native-inputs
     (list python-cython
           python-pytest
           ;; Dependencies below are are for tests only.
           samtools
           bcftools))
    (home-page "https://github.com/pysam-developers/pysam")
    (synopsis "Python bindings to the SAMtools C API")
    (description
     "Pysam is a Python module for reading and manipulating files in the
SAM/BAM format.  Pysam is a lightweight wrapper of the SAMtools C API.  It
also includes an interface for tabix.")
    (license license:expat)))

(define-public python-twobitreader
  (package
    (name "python-twobitreader")
    (version "3.1.6")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/benjschiller/twobitreader")
                    (commit version)))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "1qbxvv1h58cismbk1anpjrkpghsaiy64a11ir3lhy6qch6xf8n62"))))
    (build-system python-build-system)
    ;; Tests are not included
    (arguments '(#:tests? #f))
    (native-inputs
     (list python-sphinx))
    (home-page "https://github.com/benjschiller/twobitreader")
    (synopsis "Python library for reading .2bit files")
    (description
     "twobitreader is a Python library for reading .2bit files as used by the
UCSC genome browser.")
    (license license:artistic2.0)))

(define-public python-plastid
  (package
    (name "python-plastid")
    (version "0.6.1")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/joshuagryphon/plastid")
                    (commit "d97f239d73b3a7c2eff46f71928b777431891f90")))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "0iccpywlpf1ws46279z9rl0l29pil0rj0g2j5nvqq7jfbnq581cf"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:test-flags
      '(list "plastid/test"
             ;; These four failures look like errors in the test wrapper
             ;; class.
             "-k" (string-append "not test_chrom_sizes"
                                 " and not test_no_crash_if_file_not_exist"
                                 " and not test_fiveprime_variable"
                                 " and not test_fiveprime_variable_from_file"))
      #:phases
      #~(modify-phases %standard-phases
          (add-after 'unpack 'unpack-test-data
            (lambda* (#:key inputs #:allow-other-keys)
              (invoke "tar" "-C" "plastid/test"
                      "-xf" (assoc-ref inputs "test-data"))
              ;; This one requires bowtie-build
              (delete-file "plastid/test/functional/test_crossmap.py")))
          (add-after 'unpack 'patch-for-python-3.10
            (lambda _
              ;; Some classes were moved from collections to collections.abc
              ;; in Python 3.10.
              (substitute* "plastid/readers/bigbed.pyx"
                ((", Iterable")
                 "\nfrom collections.abc import Iterable"))))
          (add-before 'check 'build-extensions
            (lambda _
              ;; Cython extensions have to be built before running the tests.
              (invoke "python3" "setup.py" "build_ext" "--inplace"))))))
    (propagated-inputs
     (list python-numpy
           python-scipy
           python-pandas
           python-pysam
           python-matplotlib
           python-biopython
           python-twobitreader
           python-termcolor))
    (inputs
     (list openssl))
    (native-inputs
     `(("python-cython" ,python-cython)
       ("python-pytest" ,python-pytest)
       ("test-data"
        ,(origin
           (method url-fetch)
           (uri "https://www.dropbox.com/s/np3wlfvp6gx8tb8/2022-05-04.plastid-test-data.tar.bz2?dl=1")
           (file-name "plastid-test-data-2022-05-04.tar.bz2")
           (sha256
            (base32 "1szsji06m2r21flnvxg84jnj5zmlk6z10c9651v9ag71nxj9rbzn"))))))
    (home-page "https://github.com/joshuagryphon/plastid")
    (synopsis "Python library for genomic analysis")
    (description
     "plastid is a Python library for genomic analysis – in particular,
high-throughput sequencing data – with an emphasis on simplicity.")
    (license license:bsd-3)))

(define-public tetoolkit
  (package
    (name "tetoolkit")
    (version "2.2.1b")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/mhammell-laboratory/TEtranscripts")
                    (commit version)))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "1m3xsydakhdan9gp9mfdz7llka5g6ak91d0mbl1cmmxq9qs6an4y"))))
    (build-system python-build-system)
    (arguments
     `(#:phases
       (modify-phases %standard-phases
         (add-after 'unpack 'adjust-requirements
           (lambda _
             (substitute* "setup.py"
               ;; This defunct dependency isn't required for Python 3 (see:
               ;; https://github.com/mhammell-laboratory/TEtranscripts/issues/111).
               ((".*'argparse'.*") ""))))
         (add-after 'unpack 'patch-invocations
           (lambda* (#:key inputs #:allow-other-keys)
             (substitute* '("bin/TEtranscripts"
                            "bin/TEcount")
               (("'sort ")
                (string-append "'" (search-input-file inputs "bin/sort") " "))
               (("'rm -f ")
                (string-append "'" (search-input-file inputs "bin/rm") " -f "))
               (("'Rscript'")
                (string-append "'" (search-input-file inputs "bin/Rscript")
                               "'")))
             (substitute* "TEToolkit/IO/ReadInputs.py"
               (("BamToBED")
                (search-input-file inputs "bin/bamToBed")))
             (substitute* "TEToolkit/Normalization.py"
               (("\"Rscript\"")
                (string-append "\"" (search-input-file inputs "bin/Rscript")
                               "\"")))))
         (add-after 'install 'wrap-program
           (lambda* (#:key outputs #:allow-other-keys)
             ;; Make sure the executables find R packages.
             (for-each (lambda (script)
                         (wrap-program script
                           `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
                       (list (search-input-file outputs "bin/TEtranscripts")
                             (search-input-file outputs "bin/TEcount"))))))))
    (inputs
     (list bash-minimal
           coreutils
           bedtools
           python-pysam
           r-minimal
           r-deseq2))
    (home-page "https://github.com/mhammell-laboratory/TEtranscripts")
    (synopsis "Transposable elements in differential enrichment analysis")
    (description
     "This is package for including transposable elements in differential
enrichment analysis of sequencing datasets.  TEtranscripts and TEcount take
RNA-seq (and similar data) and annotates reads to both genes and transposable
elements.  TEtranscripts then performs differential analysis using DESeq2.
Note that TEtranscripts and TEcount rely on specially curated GTF files, which
are not included due to their size.")
    (license license:gpl3+)))

(define-public cd-hit
  (package
    (name "cd-hit")
    (version "4.8.1")
    (source (origin
              (method url-fetch)
              (uri (string-append "https://github.com/weizhongli/cdhit"
                                  "/releases/download/V" version
                                  "/cd-hit-v" version
                                  "-2019-0228.tar.gz"))
              (sha256
               (base32
                "1phmfhgcpyfd6kj7jwzw976613lcpv1wc2pzfdfaxla062x2s5r6"))))
    (build-system gnu-build-system)
    (arguments
     (list
      #:tests? #f                       ; there are no tests
      #:make-flags
      ;; Executables are copied directly to the PREFIX.
      #~(list (string-append "PREFIX=" #$output "/bin")
              ;; Support longer sequences (e.g. Pacbio sequences)
              "MAX_SEQ=60000000")
      #:phases
      '(modify-phases %standard-phases
         ;; No "configure" script
         (delete 'configure)
         ;; Remove sources of non-determinism
         (add-after 'unpack 'be-timeless
           (lambda _
             (substitute* "cdhit-utility.c++"
               ((" \\(built on \" __DATE__ \"\\)") ""))
             (substitute* "cdhit-common.c++"
               (("__DATE__") "\"0\"")
               (("\", %s, \" __TIME__ \"\\\\n\", date") ""))))
         ;; The "install" target does not create the target directory.
         (add-before 'install 'create-target-dir
           (lambda* (#:key outputs #:allow-other-keys)
             (mkdir-p (string-append (assoc-ref outputs "out") "/bin")))))))
    (inputs
     (list perl zlib))
    (home-page "http://weizhongli-lab.org/cd-hit/")
    (synopsis "Cluster and compare protein or nucleotide sequences")
    (description
     "CD-HIT is a program for clustering and comparing protein or nucleotide
sequences.  CD-HIT is designed to be fast and handle extremely large
databases.")
    ;; The manual says: "It can be copied under the GNU General Public License
    ;; version 2 (GPLv2)."
    (license license:gpl2)))

(define-public cd-hit-auxtools
  (package
    (inherit cd-hit)
    (name "cd-hit-auxtools")
    (arguments
     (list
      #:tests? #f                       ; there are no tests
      #:phases
      #~(modify-phases %standard-phases
          (add-after 'unpack 'chdir (lambda _ (chdir "cd-hit-auxtools")))
          ;; No "configure" script
          (delete 'configure)
          ;; There is no install target.
          (replace 'install
            (lambda _
              (for-each (lambda (file)
                          (install-file file (string-append #$output "/bin")))
                        '("cd-hit-dup" "cd-hit-lap" "read-linker")))))))
    (inputs '())))

(define-public clipper
  (package
    (name "clipper")
    (version "2.0.1")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/YeoLab/clipper")
                    (commit version)))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "0508rgnfjk5ar5d1mjbjyrnarv4kw9ksq0m3jw2bmgabmb5v6ikk"))
              (modules '((guix build utils)))
              (snippet
               '(begin
                  ;; Delete pre-compiled files.
                  (delete-file "clipper/src/peaks.so")))))
    (build-system python-build-system)
    (arguments
     `(#:tests? #false
       #:phases
       (modify-phases %standard-phases
         (add-after 'unpack 'use-python3-for-cython
           (lambda _
             (substitute* "setup.py"
               (("^setup")
                "\
peaks.cython_directives = {'language_level': '3'}
readsToWiggle.cython_directives = {'language_level': '3'}
setup"))))
         (add-after 'unpack 'disable-nondeterministic-test
           (lambda _
             ;; This test fails/succeeds non-deterministically.
             (substitute* "clipper/test/test_call_peak.py"
               (("test_get_FDR_cutoff_mean") "_test_get_FDR_cutoff_mean"))))
         ;; This doesn't work because "usage" is executed, and that calls
         ;; exit(8).
         (replace 'check
           (lambda* (#:key tests? inputs outputs #:allow-other-keys)
             (when tests?
               (add-installed-pythonpath inputs outputs)
               (with-directory-excursion "clipper/test"
                 (invoke "python" "-m" "unittest")))))
         ;; This is not a library
         (delete 'sanity-check))))
    (inputs
     (list htseq
           python-pybedtools
           python-cython
           python-scikit-learn
           python-matplotlib
           python-pandas
           python-pysam
           python-numpy
           python-scipy))
    (native-inputs
     (list python-setuptools-git
           python-mock ; for tests
           python-nose ; for tests
           python-pytz)) ; for tests
    (home-page "https://github.com/YeoLab/clipper")
    (synopsis "CLIP peak enrichment recognition")
    (description
     "CLIPper is a tool to define peaks in CLIP-seq datasets.")
    (license license:gpl2)))

(define-public codingquarry
  (package
    (name "codingquarry")
    (version "2.0")
    (source (origin
              (method url-fetch)
              (uri (string-append
                    "mirror://sourceforge/codingquarry/CodingQuarry_v"
                    version ".tar.gz"))
              (sha256
               (base32
                "0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
    (build-system gnu-build-system)
    (arguments
     (list
      #:tests? #f                       ;no "check" target
      #:phases
      #~(modify-phases %standard-phases
          (delete 'configure)
          (replace 'install
            (lambda _
              (let ((bin (string-append #$output "/bin"))
                    (doc (string-append #$output "/share/doc/codingquarry")))
                (install-file "INSTRUCTIONS.pdf" doc)
                (copy-recursively "QuarryFiles"
                                  (string-append #$output
                                                 "/share/codingquarry/QuarryFiles"))
                (install-file "CodingQuarry" bin)
                (install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin)))))))
    ;; TODO: This package also needs a Python 2 variant of biopython
    (inputs (list openmpi python-2)) ;Only Python 2 is supported
    (native-search-paths
     (list (search-path-specification
            (variable "QUARRY_PATH")
            (files '("share/codingquarry/QuarryFiles")))))
    (home-page "https://sourceforge.net/projects/codingquarry/")
    (synopsis "Fungal gene predictor")
    (description "CodingQuarry is a highly accurate, self-training GHMM fungal
gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
    (license license:gpl3+)))

(define-public clustal-omega
  (package
    (name "clustal-omega")
    (version "1.2.4")
    (source (origin
              (method url-fetch)
              (uri (string-append "http://www.clustal.org/omega/clustal-omega-"
                                  version ".tar.gz"))
              (sha256
               (base32
                "1vm30mzncwdv881vrcwg11vzvrsmwy4wg80j5i0lcfk6dlld50w6"))))
    (build-system gnu-build-system)
    (inputs
     (list argtable))
    (home-page "http://www.clustal.org/omega/")
    (synopsis "Multiple sequence aligner for protein and DNA/RNA")
    (description
     "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
program for protein and DNA/RNA.  It produces high quality MSAs and is capable
of handling data-sets of hundreds of thousands of sequences in reasonable
time.")
    (license license:gpl2+)))

(define-public crossmap
  (package
    (name "crossmap")
    (version "0.6.1")
    (source (origin
              (method url-fetch)
              (uri (pypi-uri "CrossMap" version))
              (sha256
               (base32
                "0hqminh5wn1p3x481jbyc7gmncp5xc196hpvki7k25vzbryhwcix"))
              (modules '((guix build utils)))
              (snippet
               '(begin
                  ;; Delete compiled Python files.
                  (for-each delete-file (find-files "." "\\.pyc$"))
                  (delete-file-recursively ".eggs")))))
    (build-system python-build-system)
    (inputs
     (list python-bx-python python-numpy python-pybigwig python-pysam
           zlib))
    (native-inputs
     (list python-cython python-nose))
    (home-page "https://crossmap.sourceforge.net/")
    (synopsis "Convert genome coordinates between assemblies")
    (description
     "CrossMap is a program for conversion of genome coordinates or annotation
files between different genome assemblies.  It supports most commonly used
file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
    (license license:gpl2+)))

(define-public python-dnaio
  (package
    (name "python-dnaio")
    (version "0.10.0")
    (source
     (origin
       (method url-fetch)
       (uri (pypi-uri "dnaio" version))
       (sha256
        (base32
         "064xc4j8plb4fpkm8mw55715mvpvi2sxsknpjx18c2zh904salfy"))))
    (build-system pyproject-build-system)
    (arguments
     (list #:tests? #false)) ;there are none
    (native-inputs
     (list python-cython python-pytest python-setuptools-scm))
    (propagated-inputs
     (list python-xopen))
    (home-page "https://github.com/marcelm/dnaio/")
    (synopsis "Read FASTA and FASTQ files efficiently")
    (description
     "dnaio is a Python library for fast parsing of FASTQ and also FASTA
files.  The code was previously part of the cutadapt tool.")
    (license license:expat)))

(define-public python-deeptoolsintervals
  (package
    (name "python-deeptoolsintervals")
    (version "0.1.9")
    (source (origin
              (method url-fetch)
              (uri (pypi-uri "deeptoolsintervals" version))
              (sha256
               (base32
                "1xnl80nblysj6dylj4683wgrfa425rkx4dp5k65hvwdns9pw753x"))))
    (build-system python-build-system)
    (inputs
     (list zlib))
    (home-page "https://github.com/deeptools/deeptools_intervals")
    (synopsis "Create GTF-based interval trees with associated meta-data")
    (description
     "This package provides a Python module creating/accessing GTF-based
interval trees with associated meta-data.  It is primarily used by the
@code{deeptools} package.")
    (license license:expat)))

(define-public python-deeptools
  (package
    (name "python-deeptools")
    (version "3.5.5")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/deeptools/deepTools")
                    (commit version)))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "0mgcs03amrd5157drbm6ikdg0m0szrn9xbflariz2zrrnqpsai6s"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:phases
      #~(modify-phases %standard-phases
          (add-after 'unpack 'fix-test
            (lambda _
              (substitute* "deeptools/test/test_tools.py"
                (("e_ver = _p")
                 "e_ver = \".\" + _p + \"-real\""))
              (substitute* "deeptools/multiBigwigSummary.py"
                (("version='multiBigwigSummary")
                 "version='%(prog)s"))
              (substitute* "deeptools/plotCoverage.py"
                (("version='plotCoverage")
                 "version='%(prog)s")))))))
    (native-inputs
     (list python-mock python-pytest))
    (propagated-inputs
     (list python-matplotlib
           python-numpy
           python-numpydoc
           python-py2bit
           python-pybigwig
           python-pysam
           python-scipy
           python-deeptoolsintervals
           python-plotly))
    (home-page "https://pypi.org/project/deepTools/")
    (synopsis "Useful tools for exploring deep sequencing data")
    (description "This package addresses the challenge of handling large amounts
of data that are now routinely generated from DNA sequencing centers.
@code{deepTools} contains useful modules to process the mapped reads data for
multiple quality checks, creating normalized coverage files in standard bedGraph
and bigWig file formats, that allow comparison between different files.  Finally,
using such normalized and standardized files, deepTools can create many
publication-ready visualizations to identify enrichments and for functional
annotations of the genome.")
    ;; The file deeptools/cm.py is licensed under the BSD license.  The
    ;; remainder of the code is licensed under the MIT license.
    (license (list license:bsd-3 license:expat))))

(define-public cutadapt
  (package
    (name "cutadapt")
    (version "4.0")
    (source (origin
              (method url-fetch)
              (uri (pypi-uri "cutadapt" version))
              (sha256
               (base32
                "0xgsv88mrlw2b1radmd1104y7bg8hvv54ay7xfdpnjiw2jgkrha9"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:test-flags
      '(list "-k" "not test_no_read_only_comment_fasta_input")
      #:phases
      #~(modify-phases %standard-phases
          (add-after 'unpack 'fix-test
            (lambda _
              (let ((site (string-append
                           #$output "/lib/python"
                           #$(version-major+minor
                              (package-version python))
                           "/site-packages")))
                (substitute* "tests/test_command.py"
                  (("env=\\{\"LC_CTYPE\": \"C\"\\},")
                   (string-append "env={\"LC_CTYPE\": \"C\", \"GUIX_PYTHONPATH\": \""
                                  (getenv "GUIX_PYTHONPATH") ":" site
                                  "\"},")))))))))
    (inputs
     (list python-dnaio python-xopen))
    (native-inputs
     (list python-cython
           python-pytest
           python-pytest-mock
           python-pytest-timeout
           python-setuptools-scm))
    (home-page "https://cutadapt.readthedocs.io/en/stable/")
    (synopsis "Remove adapter sequences from nucleotide sequencing reads")
    (description
     "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
other types of unwanted sequence from high-throughput sequencing reads.")
    (license license:expat)))

(define-public lammps
  (let ((commit "stable_2Aug2023_update2"))
    (package
      (name "lammps")
      (version (string-append "0." commit))
      (source
       (origin
	 (method git-fetch)
	 (uri (git-reference
	       (url "https://github.com/lammps/lammps.git")
	       (commit commit)))
	 (file-name (git-file-name name version))
	 (sha256
	  (base32
	   "11xagacgxgldkx34qdzyjrjvn8x3hpl0kgzhh9zh7skpq79pwycz"))))
      (build-system gnu-build-system)
      (arguments
       (list
        #:tests? #f                     ; no check target
	#:make-flags
        '(list "CC=mpicc" "mpi"
	       "LMP_INC=-DLAMMPS_GZIP \
-DLAMMPS_JPEG -DLAMMPS_PNG -DLAMMPS_FFMPEG -DLAMMPS_MEMALIGN=64"
	       "LIB=-gz -ljpeg -lpng -lavcodec")
	#:phases
	#~(modify-phases %standard-phases
            (add-after 'unpack 'chdir
	      (lambda _ (chdir "src")))
	    (replace 'configure
	      (lambda _
		(substitute* "MAKE/Makefile.mpi"
		  (("SHELL =.*")
		   (string-append "SHELL=" (which "bash") "\n"))
		  (("cc ") "mpicc "))
		(substitute* "Makefile"
		  (("SHELL =.*")
		   (string-append "SHELL=" (which "bash") "\n")))))
	    (add-after 'configure 'configure-modules
	      (lambda _
		(invoke "make"
			"yes-molecule"
			"yes-misc"
			"yes-granular"
			(string-append "HDF5_PATH="
				       #$(this-package-input "hdf5")))))
	    (replace 'install
	      (lambda _
		(let ((bin (string-append #$output "/bin")))
		  (mkdir-p bin)
		  (install-file "lmp_mpi" bin)))))))
      (inputs
       (list ffmpeg
	     gfortran
	     gzip
	     hdf5
	     libjpeg-turbo
	     libpng
	     openmpi
             python-wrapper))
      (native-inputs (list bc))
      (home-page "https://www.lammps.org/")
      (synopsis "Classical molecular dynamics simulator")
      (description "LAMMPS is a classical molecular dynamics simulator
designed to run efficiently on parallel computers.  LAMMPS has potentials for
solid-state materials (metals, semiconductors), soft matter (biomolecules,
polymers), and coarse-grained or mesoscopic systems.  It can be used to model
atoms or, more generically, as a parallel particle simulator at the atomic,
meso, or continuum scale.")
      (license license:gpl2+))))

(define-public lammps-serial
  (package
    (inherit lammps)
    (name "lammps-serial")
    (arguments
     (substitute-keyword-arguments (package-arguments lammps)
       ((#:make-flags flags)
        '(list "CC=gcc" "serial"
               "LMP_INC=-DLAMMPS_GZIP \
-DLAMMPS_JPEG -DLAMMPS_PNG -DLAMMPS_FFMPEG -DLAMMPS_MEMALIGN=64"
               "LIB=-gz -ljpeg -lpng -lavcodec"))
       ((#:phases phases)
        #~(modify-phases #$phases
            (replace 'configure
              (lambda _
                (substitute* "MAKE/Makefile.serial"
                  (("SHELL =.*")
                   (string-append "SHELL=" (which "bash") "\n"))
                  (("cc ") "gcc "))
                (substitute* "Makefile"
                  (("SHELL =.*")
                   (string-append "SHELL=" (which "bash") "\n")))))
            (replace 'install
	      (lambda _
		(let ((bin (string-append #$output "/bin")))
		  (mkdir-p bin)
		  (install-file "lmp_serial" bin))))))))
    (inputs
     (modify-inputs (package-inputs lammps)
       (delete "openmpi")))))

(define-public libbigwig
  (package
    (name "libbigwig")
    (version "0.4.4")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/dpryan79/libBigWig")
                    (commit version)))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "09693dmf1scdac5pyq6qyn8b4mcipvnmc370k9a5z41z81m3dcsj"))))
    (build-system gnu-build-system)
    (arguments
     `(#:test-target "test"
       #:tests? #f ; tests require access to the web
       #:make-flags
       ,#~(list "CC=gcc"
                (string-append "prefix=" #$output))
       #:phases
       (modify-phases %standard-phases
         (delete 'configure))))
    (inputs
     (list zlib curl))
    (native-inputs
     `(("doxygen" ,doxygen)
       ;; Need for tests
       ("python" ,python-2)))
    (home-page "https://github.com/dpryan79/libBigWig")
    (synopsis "C library for handling bigWig files")
    (description
     "This package provides a C library for parsing local and remote BigWig
files.")
    (license license:expat)))

(define-public lsgkm
  (package
    (name "lsgkm")
    (version "0.1.1")
    (source
     (origin
       (method git-fetch)
       (uri (git-reference
             (url "https://github.com/Dongwon-Lee/lsgkm.git")
             (commit (string-append "v" version))))
       (file-name (git-file-name name version))
       (sha256
        (base32
         "0b3m94kndvimdfjaf1q2yhmsn7lm5s9v81c5xgfjcp6ig7mh3sa5"))))
    (build-system gnu-build-system)
    (arguments
     (list
      #:make-flags '(list "-C" "src")
      #:tests? #false                   ;there are no executable tests
      #:phases
      #~(modify-phases %standard-phases
          (delete 'configure)
          (replace 'install
            (lambda _
              (let ((bin (string-append #$output "/bin")))
                (for-each (lambda (file)
                            (install-file file bin))
                          '("src/gkmtrain"
                            "src/gkmpredict"))))))))
    (home-page "https://github.com/Dongwon-Lee/lsgkm")
    (synopsis "Predict regulatory DNA elements in large-scale data")
    (description "gkm-SVM, a sequence-based method for predicting regulatory
DNA elements, is a useful tool for studying gene regulatory mechanisms.
LS-GKM is an effort to improve the method.  It offers much better scalability
and provides further advanced gapped k-mer based kernel functions.  As a
result, LS-GKM achieves considerably higher accuracy than the original
gkm-SVM.")
    (license license:gpl3+)))

(define-public python-fcsparser
  (package
    (name "python-fcsparser")
    (version "0.2.8")
    (source
     (origin
       (method url-fetch)
       (uri (pypi-uri "fcsparser" version))
       (sha256
        (base32 "1skk1k8phq9sj4ar0cnq8px89y3kcyh5zrbl6anz9wcdcyzkc16z"))))
    (build-system pyproject-build-system)
    (propagated-inputs (list python-numpy python-pandas))
    (native-inputs (list python-poetry-core python-pytest))
    (home-page "https://github.com/eyurtsev/fcsparser")
    (synopsis "Package for reading raw fcs files")
    (description
     "This package provides a Python package for reading raw fcs files")
    (license license:expat)))

(define-public python-pybigwig
  (package
    (name "python-pybigwig")
    (version "0.3.22")
    (source (origin
              (method url-fetch)
              (uri (pypi-uri "pyBigWig" version))
              (sha256
               (base32
                "0hr25lkp26mk0fp7irdjdrdsd5lann9kyv0xq9npyyxxakvjci2x"))
              (modules '((guix build utils)))
              (snippet
               ;; Delete bundled libBigWig sources
               '(delete-file-recursively "libBigWig"))))
    (build-system pyproject-build-system)
    (arguments
     '(#:tests? #false      ;only one test exists and it needs internet access
       #:phases
       (modify-phases %standard-phases
         (add-after 'unpack 'link-with-libBigWig
           (lambda _
             (substitute* "setup.py"
               (("libs=\\[") "libs=[\"BigWig\", ")))))))
    (propagated-inputs
     (list python-numpy))
    (inputs
     (list libbigwig zlib curl))
    (home-page "https://github.com/dpryan79/pyBigWig")
    (synopsis "Access bigWig files in Python using libBigWig")
    (description
     "This package provides Python bindings to the libBigWig library for
accessing bigWig files.")
    (license license:expat)))

(define-public python-pyfasta
  ;; The release on pypi does not contain the test data files.
  (let ((commit "c2f0611c5311f1b1466f2d56560447898b4a8b03")
        (revision "1"))
    (package
      (name "python-pyfasta")
      (version (git-version "0.5.2" revision commit))
      (source
       (origin
         (method git-fetch)
         (uri (git-reference
               (url "https://github.com/brentp/pyfasta")
               (commit commit)))
         (file-name (git-file-name name version))
         (sha256
          (base32
           "0a189id3fbv88gssyk6adbmz2ll1mqpmyw8vxmx3fi955gvaq9j7"))))
      (build-system pyproject-build-system)
      (arguments
       (list
        #:phases
        '(modify-phases %standard-phases
           (add-after 'unpack 'python3.10-compat
             (lambda _
               (substitute* "pyfasta/__init__.py"
                 (("from fasta import")
                  "from pyfasta.fasta import")
                 (("from records import")
                  "from pyfasta.records import")
                 (("from split_fasta import")
                  "from pyfasta.split_fasta import")
                 (("in f.iteritems")
                  "in f.items"))
               (substitute* "pyfasta/fasta.py"
                 (("from collections import Mapping")
                  "from collections.abc import Mapping")
                 (("from records import")
                  "from pyfasta.records import"))
               (substitute* "pyfasta/records.py"
                 (("cPickle") "pickle")
                 (("\\(int, long\\)")
                  "(int, int)")
                 ;; XXX: it's not clear if this is really correct.
                 (("buffer\\(self\\)")
                  "memoryview(bytes(str(self), encoding='utf-8'))")
                 (("sys.maxint") "sys.maxsize"))
               (substitute* "pyfasta/split_fasta.py"
                 (("from cStringIO import")
                  "from io import")
                 (("in lens.iteritems") "in lens.items"))
               (substitute* "tests/test_all.py"
                 (("f.keys\\(\\)\\) == \\['a-extra'")
                  "list(f.keys())) == ['a-extra'")
                 (("f.iterkeys\\(\\)") "iter(f.keys())")
                 (("tests/data/" m)
                  (string-append (getcwd) "/" m))))))))
      (propagated-inputs (list python-numpy))
      (native-inputs (list python-nose))
      (home-page "https://github.com/brentp/pyfasta/")
      (synopsis "Pythonic access to fasta sequence files")
      (description
       "This library provides fast, memory-efficient, pythonic (and
command-line) access to fasta sequence files.  It stores a flattened version
of a fasta sequence file without spaces or headers and uses either a
@code{mmap} in numpy binary format or @code{fseek}/@code{fread} so the
sequence data is never read into memory.  It saves a pickle (@code{.gdx}) of
the start and stop (for @code{fseek}/@code{mmap}) locations of each header in
the fasta file for internal use.

Note that this package has been deprecated in favor of @code{pyfaidx}.")
      (license license:expat))))

(define-public python-schema-salad
  (package
    (name "python-schema-salad")
    (version "8.5.20240102191335")
    (source
      (origin
        (method url-fetch)
        (uri (pypi-uri "schema-salad" version))
        (sha256
         (base32
          "035202p696i3jylb8b3nm9qcxsqby15hhqn1dl4nrz73a17p0ckx"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:phases
      #~(modify-phases %standard-phases
          (add-after 'unpack 'set-version
            (lambda _
              ;; Set exact version.
              (substitute* "setup.py"
                (("use_scm_version=True")
                 (string-append "version=\"" #$version "\"")))))
          (add-before 'check 'skip-failing-tests
            (lambda _
              ;; Skip tests that require network access.
              (let ((skip-test
                     (lambda (test-pattern)
                       (string-append "@pytest.mark.skip(reason="
                                      "\"test requires network access\")\n"
                                      test-pattern))))
                (substitute* "schema_salad/tests/test_cg.py"
                  (("^def test_(load(_by_yaml_metaschema|_metaschema|_cwlschema|)|include|idmap|idmap2)\\(" all)
                   (skip-test all)))
                (substitute* "schema_salad/tests/test_cwl11.py"
                  (("^def test_(secondaryFiles|outputBinding|yaml_tab_error)\\(" all)
                   (skip-test all)))
                (substitute* "schema_salad/tests/test_examples.py"
                  (("^def test_bad_schemas\\(" all)
                   (skip-test all)))))))))
    (propagated-inputs
     (list python-cachecontrol
           python-importlib-resources
           python-mistune-next
           python-mypy-extensions
           python-rdflib
           python-requests
           python-ruamel.yaml))
    (native-inputs
     (list python-black python-pytest python-pytest-runner python-pytest-xdist))
    (home-page "https://github.com/common-workflow-language/schema_salad")
    (synopsis "Schema Annotations for Linked Avro Data (SALAD)")
    (description
     "Salad is a schema language for describing JSON or YAML structured linked
data documents.  Salad schema describes rules for preprocessing, structural
validation, and hyperlink checking for documents described by a Salad schema.
Salad supports rich data modeling with inheritance, template specialization,
object identifiers, object references, documentation generation, code
generation, and transformation to RDF.  Salad provides a bridge between document
and record oriented data modeling and the Semantic Web.")
    (license license:asl2.0)))

(define-public python-scikit-bio
  (package
    (name "python-scikit-bio")
    (version "0.6.0")
    (source (origin
              (method url-fetch)
              (uri (pypi-uri "scikit-bio" version))
              (sha256
               (base32
                "03y1n91p6m44hhxm3rpb355j6ddalydz49s94h85kbhm7iy5l40h"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:test-flags
      ;; Accuracy problem
      '(list "-k" (string-append "not test_fisher_alpha"
                                 ;; UNEXPECTED EXCEPTION: ValueError("could
                                 ;; not convert string to float: 'gut'")
                                 " and not skbio.diversity"))
      #:phases
      '(modify-phases %standard-phases
         (add-before 'check 'build-extensions
           (lambda _
             ;; Cython extensions have to be built before running the tests.
             (invoke "python3" "setup.py" "build_ext" "--inplace")))
         (replace 'check
           (lambda* (#:key tests? test-flags #:allow-other-keys)
             (when tests?
               (apply invoke "python3" "-m" "skbio.test" test-flags)))))))
    (propagated-inputs
     (list python-biom-format
           python-decorator
           python-h5py
           python-hdmedians
           python-natsort
           python-numpy
           python-pandas
           python-requests
           python-scipy))
    (native-inputs
     (list python-coverage python-pytest))
    (home-page "https://scikit-bio.org")
    (synopsis "Data structures, algorithms and educational resources for bioinformatics")
    (description
     "This package provides data structures, algorithms and educational
resources for bioinformatics.")
    (license license:bsd-3)))

(define-public python-scrublet
  (package
    (name "python-scrublet")
    (version "0.2.3")
    (source (origin
              (method url-fetch)
              (uri (pypi-uri "scrublet" version))
              (sha256
               (base32
                "0fk5pwk12yz9wpvwkl6j2l2g28f3x35b9r9n5bw6f0i9f0qgd191"))))
    (build-system pyproject-build-system)
    (arguments '(#:tests? #false)) ;there are none
    (propagated-inputs
     (list python-annoy
           python-cython
           python-matplotlib
           python-numba
           python-numpy
           python-pandas
           python-scikit-image
           python-scikit-learn
           python-scipy
           python-umap-learn))
    (home-page "https://github.com/swolock/scrublet")
    (synopsis "Tool to indentify and remove doublets in single-cell data")
    (description "This package provides a tool for identifying and removing
doublets in single-cell RNA-seq data.")
    (license license:expat)))

(define-public python-cwlformat
  (package
    (name "python-cwlformat")
    (version "2022.02.18")
    (source
     ;; The PyPI tarball is missing Readme.md. Readme.md is required for the
     ;; build.
     (origin
       (method git-fetch)
       (uri (git-reference
             (url "https://github.com/rabix/cwl-format")
             (commit version)))
       (file-name (git-file-name name version))
       (sha256
        (base32
         "0agkz2w86k91rc9m5vx5hsqi5nm6fcmzkng6j99hjapz0r9233ql"))))
    (build-system pyproject-build-system)
    (propagated-inputs
     (list python-importlib-resources
           python-ruamel.yaml))
    (home-page "https://github.com/rabix/cwl-format")
    (synopsis "Prettifier for CWL code")
    (description "@code{python-cwlformat} is a specification and a reference
implementation for a very opinionated @acronym{CWL, Common Workflow Language}
code formatter.  It outputs CWL in a standardized YAML format.")
    (license license:asl2.0)))

(define-public python-cwl-upgrader
  (package
    (name "python-cwl-upgrader")
    (version "1.2.11")
    (source
     (origin
       (method url-fetch)
       (uri (pypi-uri "cwl-upgrader" version))
       (sha256
        (base32
         "12j6z8nvwnzjjyypz59hwj5hmrcri2r6aknw52n9dbj6lbzbdd2p"))))
    (build-system pyproject-build-system)
    (native-inputs
     (list python-pytest))
    (propagated-inputs
     (list python-ruamel.yaml
           python-schema-salad))
    (home-page "https://github.com/common-workflow-language/cwl-upgrader")
    (synopsis "CWL document upgrader")
    (description "@code{python-cwl-upgrader} is a standalone upgrader for
@acronym{CWL, Common Workflow Language} documents from version draft-3, v1.0,
and v1.1 to v1.2.")
    (license license:asl2.0)))

(define-public python-cwl-utils
  (package
    (name "python-cwl-utils")
    (version "0.32")
    (source
     (origin
       (method url-fetch)
       (uri (pypi-uri "cwl-utils" version))
       (sha256
        (base32
         "06wkw8d8cqm3hnz8xwnysz874gwaym36c358cr7frw5iglhvsj98"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:test-flags
      #~(list "-k"
              (string-append "not test_graph_split"
                             " and not test_load_document_with_remote_uri"
                             " and not test_remote_packing"
                             " and not test_remote_packing_github_soft_links"
                             " and not test_value_from_two_concatenated_expressions"))))
    (inputs
     (list node))
    (native-inputs
     (list python-mypy-extensions
           python-pytest
           python-pytest-mock
           python-pytest-runner))
    (propagated-inputs
     (list python-cwl-upgrader
           python-cwlformat
           python-packaging
           python-rdflib
           python-requests
           python-ruamel.yaml
           python-schema-salad))
    (home-page "https://github.com/common-workflow-language/cwl-utils")
    (synopsis "Python utilities for CWL")
    (description "@code{python-cwl-utils} provides python utilities and
autogenerated classes for loading and parsing CWL v1.0, CWL v1.1, and CWL v1.2
documents.")
    (license license:asl2.0)))

(define-public cwltool
  (package
    (name "cwltool")
    (version "3.1.20240112164112")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/common-workflow-language/cwltool")
                    (commit version)))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "1fpc5kqgpbn48g5vlvy64p297x2wm3gfz8casgpk15ap593wwh33"))))
    (build-system pyproject-build-system)
    (arguments
     (list
      #:phases
      #~(modify-phases %standard-phases
          (add-after 'unpack 'loosen-version-restrictions
            (lambda _
              (substitute* "setup.py"
                (("== 1.5.1") "> 1.5.1")))) ; prov
          (add-after 'unpack 'set-version
            (lambda _
              ;; Set exact version.
              (substitute* "setup.py"
                (("use_scm_version=True")
                 (string-append "version=\"" #$version "\"")))))
          (add-after 'unpack 'modify-tests
            (lambda _
              ;; Tries to connect to the internet.
              (delete-file "tests/test_content_type.py")
              (delete-file "tests/test_udocker.py")
              (delete-file "tests/test_http_input.py")
              (substitute* "tests/test_load_tool.py"
                (("def test_load_graph_fragment_from_packed")
                 (string-append "@pytest.mark.skip(reason=\"Disabled by Guix\")\n"
                                "def test_load_graph_fragment_from_packed")))
              (substitute* "tests/test_examples.py"
                (("def test_env_filtering")
                 (string-append "@pytest.mark.skip(reason=\"Disabled by Guix\")\n"
                                "def test_env_filtering")))
              ;; Tries to use cwl-runners.
              (substitute* "tests/test_examples.py"
                (("def test_v1_0_arg_empty_prefix_separate_false")
                 (string-append "@pytest.mark.skip(reason=\"Disabled by Guix\")\n"
                                "def test_v1_0_arg_empty_prefix_separate_false")))
              (substitute* '("tests/subgraph/env-tool2.cwl"
                             "tests/subgraph/env-tool2_req.cwl"
                             "tests/subgraph/env-wf2_subwf-packed.cwl"
                             "tests/subgraph/env-tool2_no_env.cwl")
                (("\"/bin/sh\"") (string-append "\"" (which "sh") "\""))))))))
    (inputs
     (list python-argcomplete
           python-bagit
           python-coloredlogs
           python-cwl-utils
           python-mypy-extensions
           python-prov
           python-pydot
           python-psutil
           python-rdflib
           python-requests
           python-ruamel.yaml
           python-schema-salad
           python-shellescape
           python-spython
           python-typing-extensions
           ;; Not listed as needed but still necessary:
           node))
    (native-inputs
     (list python-arcp
           python-humanfriendly
           python-mock
           python-pytest
           python-pytest-cov
           python-pytest-mock
           python-pytest-runner
           python-pytest-xdist))
    (home-page
     "https://github.com/common-workflow-language/common-workflow-language")
    (synopsis "Common Workflow Language reference implementation")
    (description
     "This is the reference implementation of the @acronym{CWL, Common Workflow
Language} standards.  The CWL open standards are for describing analysis
workflows and tools in a way that makes them portable and scalable across a
variety of software and hardware environments, from workstations to cluster,
cloud, and high performance computing (HPC) environments.  CWL is designed to
meet the needs of data-intensive science, such as Bioinformatics, Medical
Imaging, Astronomy, Physics, and Chemistry.  The @acronym{cwltool, CWL reference
implementation} is intended to be feature complete and to provide comprehensive
validation of CWL files as well as provide other tools related to working with
CWL descriptions.")
    (license license:asl2.0)))

(define-public python-dendropy
  (package
    (name "python-dendropy")
    (version "4.5.1")
    (source
     (origin
       (method git-fetch)
       ;; Source from GitHub so that tests are included.
       (uri (git-reference
             (url "https://github.com/jeetsukumaran/DendroPy")
             (commit (string-append "v" version))))
       (file-name (git-file-name name version))
       (sha256
        (base32
         "0lrfzjqzbpk1rrra9vd7z2j7q09jy9w1ss7wn2rd85i4k5y3xz8l"))))
    (build-system pyproject-build-system)
    (arguments
     (list
       #:test-flags
       '(list "-k"
              (string-join
               ;; These tests fail because we have no "paup" executable.
               (list "not test_group1"
                     "test_basic_split_counting_under_different_rootings"
                     "test_basic_split_count_with_incorrect_weight_treatment_raises_error"
                     "test_basic_split_count_with_incorrect_rootings_raises_error"

                     ;; Assert error for unknown reasons
                     "test_by_num_lineages")
               " and not "))
       #:phases
       '(modify-phases %standard-phases
          (add-after 'unpack 'python-compatibility
            (lambda _
              (substitute* "tests/test_datamodel_taxon.py"
                (("collections.Iterable")
                 "collections.abc.Iterable")))))))
    (native-inputs (list python-pytest))
    (home-page "https://dendropy.org/")
    (synopsis "Library for phylogenetics and phylogenetic computing")
    (description
     "DendroPy is a library for phylogenetics and phylogenetic computing: reading,
writing, simulation, processing and manipulation of phylogenetic
trees (phylogenies) and characters.")
    (license license:bsd-3)))

(define-public python-py2bit
  (package
    (name "python-py2bit")
    (version "0.3.0")
    (source
     (origin
       (method url-fetch)
       (uri (pypi-uri "py2bit" version))
       (sha256
        (base32
         "1vw2nvw1yrl7ikkqsqs1pg239yr5nspvd969r1x9arms1k25a1a5"))))
    (build-system python-build-system)
    (home-page "https://github.com/dpryan79/py2bit")
    (synopsis "Access 2bit files using lib2bit")
    (description
     "This package provides Python bindings for lib2bit to access 2bit files
with Python.")
    (license license:expat)))

(define-public delly
  (package
    (name "delly")
    (version "0.8.3")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/dellytools/delly")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32 "1ibnplgfzj96w8glkx17v7sld3pm402fr5ybmf3h0rlcryabxrqy"))
              (modules '((guix build utils)))
              (snippet
               '(delete-file-recursively "src/htslib"))))
    (build-system gnu-build-system)
    (arguments
     (list
      #:tests? #f                       ;There are no tests to run.
      #:make-flags
      #~(list "PARALLEL=1"             ; Allow parallel execution at run-time.
              (string-append "prefix=" #$output))
      #:phases
      #~(modify-phases %standard-phases
          (delete 'configure)           ; There is no configure phase.
          (add-after 'install 'install-templates
            (lambda _
              (let ((templates (string-append #$output
                                              "/share/delly/templates")))
                (mkdir-p templates)
                (copy-recursively "excludeTemplates" templates)))))))
    (inputs
     (list boost bzip2 htslib zlib))
    (home-page "https://github.com/dellytools/delly")
    (synopsis "Integrated structural variant prediction method")
    (description "Delly is an integrated structural variant prediction method
that can discover and genotype deletions, tandem duplications, inversions and
translocations at single-nucleotide resolution in short-read massively parallel
sequencing data.  It uses paired-ends and split-reads to sensitively and
accurately delineate genomic rearrangements throughout the genome.")
    (license license:gpl3+)))

(define-public transanno
  (package
    (name "transanno")
    (version "0.3.0")
    (source
     (origin
       (method git-fetch)
       (uri (git-reference
             (url "https://github.com/informationsea/transanno")
             ;; Corresponds to tag v0.3.0
             (commit "df49050c92644ea12d9d5c6fae2186ca436dbca3")))
       (file-name (git-file-name name version))
       (sha256
        (base32
         "1jpn7s3cnd9ybk4lmfbhj2arhf6cmrv7jp74n7n87m3a3irkaif1"))
       (snippet
        '(with-output-to-file "liftover-rs/build.rs"
           (lambda _
             (format #true
                     "fn main() {~@
                        println!(\"cargo:rustc-link-lib=lzma\");~@
                        }~%"))))))
    (build-system cargo-build-system)
    (arguments
     (list
      #:install-source? #false          ;fails
      #:phases
      #~(modify-phases %standard-phases
          (add-after 'unpack 'prepare-test-files
            (lambda _
              (delete-file "Cargo.lock")
              (substitute* "liftover-rs/prepare-test.sh"
                (("/bin/bash")
                 (string-append #$(this-package-native-input "bash")
                                "/bin/bash")))
              (invoke "bash" "prepare-test-files.sh")))
          (add-before 'install 'chdir
            (lambda _ (chdir "transanno"))))
      #:cargo-inputs
      `(("rust-anyhow" ,rust-anyhow-1)
        ("rust-autocompress" ,rust-autocompress-0.2)
        ("rust-bio" ,rust-bio-0.41)
        ("rust-clap" ,rust-clap-2)
        ("rust-csv" ,rust-csv-1)
        ("rust-flate2" ,rust-flate2-1)
        ("rust-indexmap" ,rust-indexmap-1)
        ("rust-log" ,rust-log-0.4)
        ("rust-nom" ,rust-nom-5)
        ("rust-once-cell" ,rust-once-cell-1)
        ("rust-pretty-env-logger" ,rust-pretty-env-logger-0.3)
        ("rust-regex" ,rust-regex-1)
        ("rust-thiserror" ,rust-thiserror-1)
        ("rust-serde" ,rust-serde-1)
        ("rust-serde-json" ,rust-serde-json-1))
      #:cargo-development-inputs
      `(("rust-clap" ,rust-clap-2)
        ("rust-lazy-static" ,rust-lazy-static-1))))
    (native-inputs (list bash pkg-config))
    (inputs (list xz))
    (home-page "https://github.com/informationsea/transanno")
    (synopsis "LiftOver tool for new genome assemblies")
    (description "This package provides an accurate VCF/GFF3/GTF LiftOver tool
for new genome assemblies.")
    (license license:gpl3+)))

(define-public trf
  (package
    (name "trf")
    (version "4.09.1")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/Benson-Genomics-Lab/TRF")
                    (commit (string-append "v" version))))
              (file-name (git-file-name name version))
              (sha256
               (base32 "0fhwr4s1mf8nw8fr5imwjvjr42b59p97zr961ifm8xl1bajz4wpg"))))
    (build-system gnu-build-system)
    (home-page "https://github.com/Benson-Genomics-Lab/TRF")
    (synopsis "Tandem Repeats Finder: a program to analyze DNA sequences")
    (description "A tandem repeat in DNA is two or more adjacent, approximate
copies of a pattern of nucleotides.  Tandem Repeats Finder is a program to
locate and display tandem repeats in DNA sequences.  In order to use the
program, the user submits a sequence in FASTA format.  The output consists of
two files: a repeat table file and an alignment file.  Submitted sequences may
be of arbitrary length. Repeats with pattern size in the range from 1 to 2000
bases are detected.")
    (license license:agpl3+)))

(define-public trinityrnaseq
  (package
    (name "trinityrnaseq")
    (version "2.13.2")
    (source (origin
              (method git-fetch)
              (uri (git-reference
                    (url "https://github.com/trinityrnaseq/trinityrnaseq.git")
                    (commit (string-append "Trinity-v" version))
                    (recursive? #true)))
              (file-name (git-file-name name version))
              (sha256
               (base32
                "1qszrxqbx4q5pavpgm4rkrh1z1v1mf7qx83vv3fnlqdmncnsf1gv"))))
    (build-system gnu-build-system)
    (arguments
     (list
      #:test-target "test"
      #:modules
      '((guix build gnu-build-system)
        (guix build utils)
        (ice-9 match)
        (srfi srfi-1))
      #:make-flags
      #~(list (string-append "CC=" #$(cc-for-target)))
      #:phases
      #~(modify-phases %standard-phases
          (replace 'configure
            (lambda _
              (setenv "SHELL" (which "sh"))
              (setenv "CONFIG_SHELL" (which "sh"))
              ;; Do not require version.h, which triggers a local build of a
              ;; vendored htslib.
              (substitute* "trinity-plugins/bamsifter/Makefile"
                (("sift_bam_max_cov.cpp htslib/version.h")
                 "sift_bam_max_cov.cpp"))))
          (add-after 'build 'build-plugins
            (lambda _
              ;; Run this in the subdirectory to avoid running the
              ;; tests right here.
              (with-directory-excursion "trinity-plugins"
                (invoke "make" "plugins"))))
          ;; The install script uses rsync, provides no overrides for the
          ;; default location at /usr/local/bin, and patching it would change
          ;; all lines that do something.
          (replace 'install
            (lambda* (#:key inputs #:allow-other-keys)
              (let ((share (string-append #$output "/share/trinity/"))
                    (bin   (string-append #$output "/bin/")))
                (mkdir-p bin)
                (copy-recursively "." share)
                (delete-file (string-append share "/Chrysalis/build/CMakeFiles/CMakeOutput.log"))
                (delete-file (string-append share "/Inchworm/build/CMakeFiles/CMakeOutput.log"))

                (wrap-program (string-append share "Trinity")
                  `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))
                  `("PERL5LIB"    ":" = (,(getenv "PERL5LIB")))
                  `("PYTHONPATH"  ":" = (,(getenv "GUIX_PYTHONPATH")))
                  `("PATH"        ":" =
                    ,(cons (string-append share "/trinity-plugins/BIN")
                           (filter-map (match-lambda
                                         ((name . dir)
                                          (string-append dir "/bin")))
                                       inputs))))
                (symlink (string-append share "Trinity")
                         (string-append bin "Trinity"))))))))
    (inputs
     (list blast+
           bowtie
           fastqc
           hisat
           htslib
           icedtea-8
           jellyfish
           kallisto
           multiqc
           perl
           perl-uri-escape
           python-numpy
           python-wrapper
           r-ape
           r-argparse
           r-biobase
           r-ctc
           r-deseq2
           r-edger
           r-fastcluster
           r-glimma
           r-goplot
           r-goseq
           r-gplots
           r-minimal
           r-qvalue
           r-rots
           r-sm
           r-tidyverse
           rsem
           salmon
           samtools
           sra-tools
           star
           zlib))
    (propagated-inputs
     (list coreutils
           gzip
           which))
    (native-inputs (list cmake))
    (home-page "https://github.com/trinityrnaseq/trinityrnaseq/wiki")
    (synopsis "Trinity RNA-Seq de novo transcriptome assembly")
    (description "Trinity assembles transcript sequences from Illumina RNA-Seq
data.  Trinity represents a novel method for the efficient and robust de novo
reconstruction of transcriptomes from RNA-seq data.  Trinity combines three
independent software modules: Inchworm, Chrysalis, and Butterfly, applied
sequentially to process large volumes of RNA-seq reads.  Trinity partitions
the sequence data into many individual de Bruijn graphs, each representing the
transcriptional complexity at a given gene or locus, and then processes each
graph independently to extract full-length splicing isoforms and to tease
apart transcripts derived from paralogous genes.")
    (license license:bsd-3)))

(define-public repeat-masker
  (package
    (name "repeat-masker")
    (version "4.1.2-p1")
    (source (origin
              (method url-fetch)
              (uri (string-append "http://www.repeatmasker.org/"
                                  "RepeatMasker/RepeatMasker-"
                                  version ".tar.gz"))
              (sha256
               (base32 "15hfdfpzmdjcx7ng7rjfid69bmvgn3z9g9r43qhjnhjhq3v4prab"))))
    (build-system gnu-build-system)
    (arguments
     `(#:tests? #false ; there are none
       #:phases
       (modify-phases %standard-phases
         (delete 'configure)
         (replace 'build
           (lambda* (#:key inputs outputs #:allow-other-keys)
             (let ((share (string-append (assoc-ref outputs "out")
                                         "/share/RepeatMasker")))
               (mkdir-p share)
               (copy-recursively "." share)
               (with-directory-excursion share
                 (invoke "perl" "configure"
                         "--trf_prgm&q