From ff0600c5efb519925c270e9f54f43ecf096e564e Mon Sep 17 00:00:00 2001 From: Maxim Cournoyer Date: Thu, 11 Aug 2022 23:58:24 -0400 Subject: gnu: tesseract-ocr: Make the default install minimally useful. Fixes . * gnu/packages/ocr.scm (tesseract-ocr) [phases]{adjust-TESSDATA_PREFIX-macro}: New phase. {install-minimal-tessdata}: New phase. [native-inputs]: Add tesseract-ocr-tessdata-fast. [search-paths]: New field. [description]: Mention how to add support for more languages. --- gnu/packages/ocr.scm | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) (limited to 'gnu/packages') diff --git a/gnu/packages/ocr.scm b/gnu/packages/ocr.scm index e2c9f561cc..21d257ef24 100644 --- a/gnu/packages/ocr.scm +++ b/gnu/packages/ocr.scm @@ -132,6 +132,15 @@ models for the Tesseract OCR Engine.") (substitute* "configure.ac" (("AC_SUBST\\(\\[XML_CATALOG_FILES])") "")))) + (add-after 'unpack 'adjust-TESSDATA_PREFIX-macro + (lambda _ + ;; Use a deeper TESSDATA_PREFIX hierarchy so that a more + ;; specific search-path than '/share' can be specified. The + ;; build system uses CPPFLAGS for itself, so we can't simply set + ;; a make flag. + (substitute* "Makefile.am" + (("-DTESSDATA_PREFIX='\"@datadir@\"'") + "-DTESSDATA_PREFIX='\"@datadir@/tesseract-ocr\"'")))) (add-after 'build 'build-training (lambda* (#:key parallel-build? #:allow-other-keys) (define n (if parallel-build? (number->string @@ -140,7 +149,18 @@ models for the Tesseract OCR Engine.") (invoke "make" "-j" n "training"))) (add-after 'install 'install-training (lambda _ - (invoke "make" "training-install")))))) + (invoke "make" "training-install"))) + (add-after 'install 'install-minimal-tessdata + ;; tesseract-ocr cannot be used without its trained models data; + ;; install the English language as a minimal base which can be + ;; extended via TESSDATA_PREFIX. + (lambda* (#:key native-inputs inputs #:allow-other-keys) + (define eng.traineddata + "/share/tesseract-ocr/tessdata/eng.traineddata") + (install-file (search-input-file (or native-inputs inputs) + eng.traineddata) + (dirname (string-append #$output + eng.traineddata)))))))) (native-inputs (list asciidoc autoconf @@ -152,13 +172,18 @@ models for the Tesseract OCR Engine.") libtool libxml2 ;for XML_CATALOG_FILES libxslt - pkg-config)) + pkg-config + tesseract-ocr-tessdata-fast)) (inputs (list cairo icu4c leptonica pango python-wrapper)) + (native-search-paths (list (search-path-specification + (variable "TESSDATA_PREFIX") + (files (list "share/tesseract-ocr/tessdata")) + (separator #f)))) ;single value (home-page "https://github.com/tesseract-ocr/tesseract") (synopsis "Optical character recognition engine") (description @@ -166,7 +191,9 @@ models for the Tesseract OCR Engine.") high accuracy. It supports many languages, output text formatting, hOCR positional information and page layout analysis. Several image formats are supported through the Leptonica library. It can also detect whether text is -monospaced or proportional.") +monospaced or proportional. Support for the English language is included by +default. To add support for more languages, the +@code{tesseract-ocr-tessdata-fast} package should be installed.") (license license:asl2.0))) (define-public gimagereader -- cgit v1.2.3 > 2021-09-20etc: Add systemd files for running ‘guix gc’ periodically...* etc/guix-gc.service.in: New file. * etc/guix-gc.timer: Likewise. * .gitignore: Ignore generated ‘guix-gc.service’. * nix/local.mk (nodist_systemdservice_DATA): Add ‘guix-gc.service’ and ‘guix-gc.timer’. (EXTRA_DIST): Add ‘guix-gc.service.in’ and ‘guix-gc.timer’. * doc/guix.texi (Binary Installation): Mention the new systemd files. Signed-off-by: Mathieu Othacehe <othacehe@gnu.org> Thiago Jung Bauermann 2021-05-10.gitignore: Ignore .mo files....* .gitignore: Ignore files matching the *.mo pattern. Maxim Cournoyer 2021-05-05.gitignore: Ignore .tarball-ignore....This file can be useful to fix a version string when experimenting with 'make release'. * .gitignore: Add a pattern to ignore .tarball-version. Maxim Cournoyer 2021-04-23.gitignore: Ignore release artifacts....Not ignoring these in the tree leads to the next generated version (.version) being suffixed with '-dirty', which confuses things. * .gitignore [/guix-*]: New pattern. [/doc/stamp-[0-9]]: Adjust to ... [/doc/stamp-*]: ... this. [/release-*]: New pattern. Maxim Cournoyer 2021-04-23.gitignore: Ignore generated .pot files....These files are automatically-extracted templates rather than source, hence shouldn't be checked in. * .gitignore: Add a glob pattern to ignore .pot files. Maxim Cournoyer 2020-10-20gitignore: Ignore generated guile binary and intermediate products....* .gitignore: Ignore guile, guile-guile-launcher.o, .deps and .dirstamp. Julien Lepiller 2020-09-17guix-install.sh: Support OpenRC....* etc/guix-install.sh (chk_init_sys): Detect OpenRC. (sys_enable_guix_daemon): Install & enable the Guix daemon on such systems. * etc/openrc/guix-daemon.in: New file. * nix/local.mk: Add a rule for it. (openrcservicedir, nodist_openrcservice_DATA): New variables. (CLEANFILES, EXTRA_DIST): Add them. * .gitignore: Ignore etc/openrc/guix-daemon. Signed-off-by: Tobias Geerinckx-Rice <me@tobias.gr> Morgan Smith 2020-07-17gitignore: Don't ignore daemon source code....Remove files that are included in the repository from .gitignore. This improves the behavior of some code-searching tools. Jakub Kądziołka 2020-06-28.gitignore: Ignore 'etc/committer.scm'....* .gitignore: Add '/etc/committer.scm'. Brice Waegeneire 2020-05-16etc: Add a systemd unit to bind-mount @storedir@ read-only....* etc/gnu-store.mount.in: New file. * nix/local.mk (nodist_systemdservice_DATA): Add it. (etc/%.mount): New rule for it. * etc/guix-install.sh (sys_enable_guix_daemon): Install it. * doc/guix.texi (Binary Installation): Document it. * .gitignore: Ignore changes to it. Tobias Geerinckx-Rice 2020-05-14etc: indent-code.el: Use the --quick option....This prevents Emacs from loading the autoload definitions found in its profile, which needlessly clutters the output. It also prevents Geiser (if installed) from blocking the script and asking the user to input the Scheme implementation to use. The trick for passing multiple arguments to Emacs is to use what is called a "sesquicolon" (see https://www.emacswiki.org/emacs/EmacsScripts). * etc/indent-code.el.in: Rename to... * etc/indent-code.el: ...this. Adapt the shebang to use a sesquicolon, and pass the --quick option to Emacs. Since this line is interpreted by the shell, simply use Emacs from the PATH instead of from a hard coded location. (main): New procedure, used as the entry point. * configure.ac: Remove the warning about Emacs. Emacs can now be installed any time by the user if they want to use the script. * .gitignore: No longer ignore changes to etc/indent-code.el. Maxim Cournoyer 2020-03-30.gitignore: Ignore test results in tests/services....This is a follow-up to commit d3e439e355284f136ff13eb3e6fffb4661552f3b. * .gitignore: Add /tests/services/*.trs. Marius Bakke 2020-03-11Add system start-up files for guix-daemon....* etc/init.d/guix-daemon.in: New file. * nix/local.mk (etc/init.d/guix-daemon): New rule. (nodist_sysvinitservice_DATA): Add etc/init.d/guix-daemon.in . (CLEANFILES): Add etc/init.d/guix-daemon . * .gitignore: Add etc/init.d/guix-daemon . Danny Milosavljevic