Store the crawl version in the textdatabases in SAVEDIR and upgrade the databases when the crawl version changes. By default crawl checks for a mtime difference on files in DATADIR to see if an upgrade is required, but guix nulls all file dates, and crawl would never upgrade saves. diff -ur a/source/database.cc b/source/database.cc --- a/source/database.cc 2018-08-09 21:49:26.000000000 -0400 +++ b/source/database.cc 2018-10-07 18:06:41.022445789 -0400 @@ -25,6 +25,7 @@ #include "syscalls.h" #include "threads.h" #include "unicode.h" +#include "version.h" // TextDB handles dependency checking the db vs text files, creating the // db, loading, and destroying the DB. @@ -55,6 +56,7 @@ vector _input_files; DBM* _db; string timestamp; + string version; TextDB *_parent; const char* lang() { return _parent ? Options.lang_name : 0; } public: @@ -163,7 +165,7 @@ TextDB::TextDB(const char* db_name, const char* dir, vector files) : _db_name(db_name), _directory(dir), _input_files(files), - _db(nullptr), timestamp(""), _parent(0), translation(0) + _db(nullptr), timestamp(""), version(""), _parent(0), translation(0) { } @@ -171,7 +173,7 @@ : _db_name(parent->_db_name), _directory(parent->_directory + Options.lang_name + "/"), _input_files(parent->_input_files), // FIXME: pointless copy - _db(nullptr), timestamp(""), _parent(parent), translation(nullptr) + _db(nullptr), timestamp(""), version(""), _parent(parent), translation(nullptr) { } @@ -186,6 +188,9 @@ return false; timestamp = _query_database(*this, "TIMESTAMP", false, false, true); + version = _query_database(*this, "VERSION", false, false, true); + if (version.empty()) + return false; if (timestamp.empty()) return false; @@ -229,6 +234,9 @@ string ts; bool no_files = true; + if (string(Version::Long) != version) + return true; + for (const string &file : _input_files) { string full_input_path = _directory + file; @@ -245,7 +253,7 @@ ts += buf; } - if (no_files && timestamp.empty()) + if (no_files && timestamp.empty() && version.empty()) { // No point in empty databases, although for simplicity keep ones // for disappeared translations for now. @@ -313,7 +321,10 @@ _store_text_db(full_input_path, _db); } } + + string current_version = string(Version::Long); _add_entry(_db, "TIMESTAMP", ts); + _add_entry(_db, "VERSION", current_version); dbm_close(_db); _db = 0; > 2022-08-12gnu: tesseract-ocr: Make the default install minimally useful....Fixes <https://issues.guix.gnu.org/47536>. * gnu/packages/ocr.scm (tesseract-ocr) [phases]{adjust-TESSDATA_PREFIX-macro}: New phase. {install-minimal-tessdata}: New phase. [native-inputs]: Add tesseract-ocr-tessdata-fast. [search-paths]: New field. [description]: Mention how to add support for more languages. Maxim Cournoyer 2022-08-12gnu: Add tesseract-ocr-tessdata-fast....* gnu/packages/ocr.scm (tesseract-ocr-tessdata-fast): New variable. Maxim Cournoyer 2022-08-12gnu: tesseract-ocr: Update to 5.2.0....* gnu/packages/ocr.scm (tesseract-ocr): Update to 5.2.0. [inputs, native-inputs]: Move after arguments. Use new style inputs. [arguments]: Use gexps. [configure-flags]: Add --disable-static. [phases]{fix-docbook}: Replace phase with... {do-not-override-xml-catalog-files}: ... this new phase. {build-training}: Move after build phase. Enable parallel build. {trailing-install}: Move after install phase. [native-inputs]: Add libxml2. Maxim Cournoyer 2022-08-12gnu: ocrad: Update to 0.28....* gnu/packages/ocr.scm (ocrad): Update to 0.28. Maxim Cournoyer 2022-05-31gnu: Remove python2-zinnia....* gnu/packages/ocr.scm (python2-zinnia): Delete variable. Maxim Cournoyer 2022-02-07gnu: gimagereader: Use G-expressions....* gnu/packages/ocr.scm (gimagereader)[arguments]<#:configure-flags>: Use G-expression. Nicolas Goaziou 2022-02-07gnu: gimagereader: Remove input labels....* gnu/packages/ocr.scm (gimagereader)[native-inputs, inputs]: Remove labels. Nicolas Goaziou 2022-02-07gnu: gimagereader: Update to 3.4.0....* gnu/packages/ocr.scm (gimagereader): Update to 3.4.0. Nicolas Goaziou