gnu: ocrodjvu: Use a Python 3 compatible fork.

* gnu/packages/djvu.scm (ocrodjvu)[source]: Update URL to Python 3 fork.
[inputs]: Use new style.  Remove python2-subprocess32.  Replace all other
Python 2 inputs by their Python 3 equivalent.  Add python-future and
python-regex.
[arguments]: Use gexps, and adjust accordingly, using this-package-input and
search-input-file.
This commit is contained in:
Maxim Cournoyer 2022-04-29 17:08:04 -04:00
parent 1c865e8e1c
commit 1db8080591
No known key found for this signature in database
GPG Key ID: 1260E46482E63562

View File

@ -24,6 +24,7 @@
#:use-module (guix utils)
#:use-module (guix packages)
#:use-module (guix download)
#:use-module (guix gexp)
#:use-module (guix git-download)
#:use-module (guix build-system gnu)
#:use-module (guix build-system python)
@ -398,85 +399,88 @@ and background layers of images, which can then be encoded into a DjVu file.")
(license license:gpl2)))
(define-public ocrodjvu
(package
(name "ocrodjvu")
(version "0.12")
(source
(origin
(method url-fetch)
(uri (string-append
"https://github.com/jwilk/ocrodjvu/releases/download/" version
"/ocrodjvu-" version ".tar.xz"))
(sha256
(base32 "09w9rqr7z2jd5kwp178zz2yrsc82mxs7gksipg92znxzgzhmw2ng"))))
(build-system gnu-build-system)
(native-inputs
(list libxml2 python2-nose python2-pillow))
(inputs
`(("djvulibre" ,djvulibre)
("ocrad" ,ocrad)
("python" ,python-2)
("python2-djvulibre" ,python2-djvulibre)
("python2-html5lib" ,python2-html5lib)
("python2-lxml" ,python2-lxml)
("python2-pyicu" ,python2-pyicu)
("python2-subprocess32" ,python2-subprocess32)
("tesseract-ocr" ,tesseract-ocr)))
(arguments
`(#:modules ((guix build gnu-build-system)
((guix build python-build-system) #:prefix python:)
(guix build utils))
#:imported-modules (,@%gnu-build-system-modules
(guix build python-build-system))
#:test-target "test"
#:phases
(modify-phases %standard-phases
(delete 'configure)
(add-before 'check 'disable-failing-test
(lambda _
(substitute* "tests/test_ipc.py"
;; test_wait_signal gets stuck forever
(("yield self\\._test_signal, name")
"return True")
;; test_path fails to find a file it should have created
(("path = os\\.getenv\\('PATH'\\)\\.split\\(':'\\)")
"return True"))
;; Disable tests with tesseract. They can't work without
;; the language files that must downloaded by the final user
;; as they are not packaged in Guix.
(substitute* "tests/ocrodjvu/test.py"
(("engines = stdout\\.getvalue\\(\\)\\.splitlines\\(\\)")
"engines = ['ocrad']"))
(substitute* "tests/ocrodjvu/test_integration.py"
(("engines = 'tesseract', 'cuneiform', 'gocr', 'ocrad'")
"engines = 'ocrad'"))))
(replace 'install
(lambda* (#:key outputs #:allow-other-keys)
(let ((out (assoc-ref outputs "out")))
(invoke "make"
"DESTDIR="
(string-append "PREFIX=" out)
"install"))))
(add-after 'install 'wrap-python
(assoc-ref python:%standard-phases 'wrap))
(add-after 'wrap-python 'wrap-path
(lambda* (#:key inputs outputs #:allow-other-keys)
(let ((out (assoc-ref outputs "out"))
(djvulibre (assoc-ref inputs "djvulibre"))
(ocrad (assoc-ref inputs "ocrad"))
(tesseract (assoc-ref inputs "tesseract-ocr")))
(for-each (lambda (file)
(wrap-program (string-append out "/bin/" file)
`("PATH" ":" prefix
(,(string-append djvulibre "/bin:"
ocrad "/bin:"
tesseract "/bin")))))
'("djvu2hocr"
"hocr2djvused"
"ocrodjvu"))))))))
(synopsis "Program to perform OCR on DjVu files")
(description
"@code{ocrodjvu} is a wrapper for OCR systems, that allows you to perform
(let ((revision "0")
(commit "0dd3364462fc77d5674b4457fcc8230835323c30"))
(package
(name "ocrodjvu")
(version (git-version "0.12" revision commit))
(source (origin
(method git-fetch)
(uri (git-reference
;; Use the following fork repository, as upstream
;; doesn't seem too concerned with Python 3
;; compatibility.
(url "https://github.com/rmast/ocrodjvu")
(commit commit)))
(file-name (git-file-name name version))
(sha256
(base32
"0x64hg9ysrk8sismxb4jgk0sq7r9j90v2i9765xhmxpiy6f0lpni"))))
(build-system gnu-build-system)
(native-inputs
(list libxml2 python-nose python-pillow))
(inputs
(list djvulibre
ocrad
python-djvulibre
python-future
python-html5lib
python-lxml
python-pyicu
python-regex
python-wrapper
tesseract-ocr))
(arguments
(list
#:modules '((guix build gnu-build-system)
((guix build python-build-system) #:prefix python:)
(guix build utils))
#:imported-modules `(,@%gnu-build-system-modules
(guix build python-build-system))
#:test-target "test"
#:phases
#~(modify-phases %standard-phases
(delete 'configure)
(add-before 'check 'disable-failing-test
(lambda _
(substitute* "tests/test_ipc.py"
;; test_wait_signal gets stuck forever
(("yield self\\._test_signal, name")
"return True")
;; test_path fails to find a file it should have created
(("path = os\\.getenv\\('PATH'\\)\\.split\\(':'\\)")
"return True"))
;; Disable tests with tesseract. They can't work without
;; the language files that must downloaded by the final user
;; as they are not packaged in Guix.
(substitute* "tests/ocrodjvu/test.py"
(("engines = stdout\\.getvalue\\(\\)\\.splitlines\\(\\)")
"engines = ['ocrad']"))
(substitute* "tests/ocrodjvu/test_integration.py"
(("engines = 'tesseract', 'cuneiform', 'gocr', 'ocrad'")
"engines = 'ocrad'"))))
(replace 'install
(lambda _
(invoke "make" "install"
"DESTDIR=" (string-append "PREFIX=" #$output))))
(add-after 'install 'wrap-python
(assoc-ref python:%standard-phases 'wrap))
(add-after 'wrap-python 'wrap-path
(lambda* (#:key outputs #:allow-other-keys)
(for-each (lambda (file)
(wrap-program (search-input-file outputs file)
`("PATH" ":" prefix
(,(string-append
#$(this-package-input "djvulibre") "/bin:"
#$(this-package-input "ocrad") "/bin:"
#$(this-package-input "tesseract-ocr")
"/bin")))))
'("bin/djvu2hocr"
"bin/hocr2djvused"
"bin/ocrodjvu")))))))
(synopsis "Program to perform OCR on DjVu files")
(description
"@code{ocrodjvu} is a wrapper for OCR systems, that allows you to perform
OCR on DjVu files.")
(home-page "https://jwilk.net/software/ocrodjvu")
(license license:gpl2)))
(home-page "https://jwilk.net/software/ocrodjvu")
(license license:gpl2))))