diff --git a/pkgs/development/python-modules/hocr-tools/default.nix b/pkgs/development/python-modules/hocr-tools/default.nix new file mode 100644 index 000000000000..d5492b3b6414 --- /dev/null +++ b/pkgs/development/python-modules/hocr-tools/default.nix @@ -0,0 +1,31 @@ +{ buildPythonPackage +, fetchFromGitHub +, lxml +, pillow +, reportlab +, stdenv +}: +buildPythonPackage rec { + pname = "hocr-tools"; + version = "1.3.0"; + + src = fetchFromGitHub { + owner = "tmbdev"; + repo = "${pname}"; + rev = "v${version}"; + sha256 = "14f9hkp7pr677085w8iidwd0la9cjzy3pyj3rdg9b03nz9pc0w6p"; + }; + + # hocr-tools uses a test framework that requires internet access + doCheck = false; + + propagatedBuildInputs = [ pillow lxml reportlab ]; + + meta = with stdenv.lib; { + description = " +Tools for manipulating and evaluating the hOCR format for representing multi-lingual OCR results by embedding them into HTML"; + homepage = https://github.com/tmbdev/hocr-tools; + license = licenses.asl20; + maintainers = [ maintainers.kiwi ]; + }; +} diff --git a/pkgs/development/python-modules/pikepdf/default.nix b/pkgs/development/python-modules/pikepdf/default.nix new file mode 100644 index 000000000000..c2662915b12d --- /dev/null +++ b/pkgs/development/python-modules/pikepdf/default.nix @@ -0,0 +1,73 @@ +{ attrs +, buildPythonPackage +, defusedxml +, fetchPypi +, hypothesis +, isPy3k +, lxml +, pillow +, pybind11 +, pytest +, pytest-helpers-namespace +, pytest-timeout +, pytest_xdist +, pytestrunner +, python-xmp-toolkit +, python3 +, qpdf +, setuptools-scm-git-archive +, setuptools_scm +, stdenv +}: + +buildPythonPackage rec { + pname = "pikepdf"; + version = "1.1.0"; + disabled = ! isPy3k; + + src = fetchPypi { + inherit pname version; + sha256 = "14b36r6h3088z2sxp2pqvm171js53hz53mwm1g52iadignjnp0my"; + }; + + buildInputs = [ + pybind11 + qpdf + ]; + + nativeBuildInputs = [ + setuptools-scm-git-archive + setuptools_scm + ]; + + checkInputs = [ + attrs + hypothesis + pillow + pytest + pytest-helpers-namespace + pytest-timeout + pytest_xdist + pytestrunner + python-xmp-toolkit + ]; + + propagatedBuildInputs = [ defusedxml lxml ]; + + postPatch = '' + substituteInPlace requirements/test.txt \ + --replace "pytest >= 3.6.0, < 4.1.0" "pytest >= 4.2.1, < 5" + ''; + + preBuild = '' + HOME=$TMPDIR + ''; + + meta = with stdenv.lib; { + homepage = "https://github.com/pikepdf/pikepdf"; + description = "Read and write PDFs with Python, powered by qpdf"; + license = licenses.mpl20; + maintainers = [ maintainers.kiwi ]; + }; +} + diff --git a/pkgs/development/python-modules/pytest-helpers-namespace/default.nix b/pkgs/development/python-modules/pytest-helpers-namespace/default.nix new file mode 100644 index 000000000000..a4e0cb809aec --- /dev/null +++ b/pkgs/development/python-modules/pytest-helpers-namespace/default.nix @@ -0,0 +1,35 @@ +{ buildPythonPackage +, fetchFromGitHub +, pytest +, stdenv +}: + +buildPythonPackage rec { + pname = "pytest-helpers-namespace"; + version = "2019.1.8"; + + src = fetchFromGitHub { + owner = "saltstack"; + repo = "${pname}"; + rev = "v${version}"; + sha256 = "0z9f25d2wpf3lnqzmmnrlvl5b1f7kqwjjf4nzs9x2bpf91s5zny1"; + }; + + buildInputs = [ pytest ]; + + checkInputs = [ pytest ]; + + checkPhase = '' + pytest + ''; + + # The tests fail with newest pytest. They passed with pytest_3, which no longer exists + doCheck = false; + + meta = with stdenv.lib; { + homepage = "https://github.com/saltstack/pytest-helpers-namespace"; + description = "PyTest Helpers Namespace"; + license = licenses.asl20; + maintainers = [ maintainers.kiwi ]; + }; +} diff --git a/pkgs/development/python-modules/python-xmp-toolkit/default.nix b/pkgs/development/python-modules/python-xmp-toolkit/default.nix new file mode 100644 index 000000000000..93fa36fa2fcf --- /dev/null +++ b/pkgs/development/python-modules/python-xmp-toolkit/default.nix @@ -0,0 +1,44 @@ +{ buildPythonPackage +, exempi +, fetchFromGitHub +, mock +, pythonOlder +, pytz +, stdenv +}: + +buildPythonPackage rec { + pname = "python-xmp-toolkit"; + version = "2.0.2"; + + # PyPi has version 2.0.1; the tests fail + # There are commits for a 2.0.2 release that was never published + # Not to github, not to PyPi + # This is the latest commit from Jun 29, 2017 (as of Mar 13, 2019) + # It includes the commits for the unreleased version 2.0.2 and more + # Tests pass with this version + src = fetchFromGitHub { + owner = "python-xmp-toolkit"; + repo = "python-xmp-toolkit"; + rev = "5692bdf8dac3581a0d5fb3c5aeb29be0ab6a54fc"; + sha256 = "16bylcm183ilzp7mrpdzw0pzp6csv9v5v247914qsv2abg0hgl5y"; + }; + + buildInputs = [ exempi ]; + + checkInputs = stdenv.lib.optionals (pythonOlder "3.3") [ mock ]; + + propagatedBuildInputs = [ pytz ]; + + postPatch = '' + substituteInPlace libxmp/exempi.py \ + --replace "ctypes.util.find_library('exempi')" "'${exempi}/lib/libexempi${stdenv.hostPlatform.extensions.sharedLibrary}'" + ''; + + meta = with stdenv.lib; { + homepage = https://github.com/python-xmp-toolkit/python-xmp-toolkit; + description = "Python XMP Toolkit for working with metadata"; + license = licenses.bsd3; + maintainers = [ maintainers.kiwi ]; + }; +} diff --git a/pkgs/development/python-modules/ruffus/default.nix b/pkgs/development/python-modules/ruffus/default.nix new file mode 100644 index 000000000000..1b3b271fb61f --- /dev/null +++ b/pkgs/development/python-modules/ruffus/default.nix @@ -0,0 +1,53 @@ +{ gevent +, buildPythonPackage +, fetchFromGitHub +, hostname +, pytest +, python +, stdenv +}: + +buildPythonPackage rec { + pname = "ruffus"; + version = "2.8.1"; + + src = fetchFromGitHub { + owner = "cgat-developers"; + repo = "${pname}"; + rev = "v${version}"; + sha256 = "1gyabqafq4s2sy0prh3k1m8859shzjmfxr7fimx10liflvki96a9"; + }; + + propagatedBuildInputs = [ gevent ]; + + postPatch = '' + sed -i -e 's|/bin/bash|${stdenv.shell}|' ruffus/test/Makefile + sed -i -e 's|\tpytest|\t${pytest}/bin/pytest|' ruffus/test/Makefile + sed -i -e 's|\tpython|\t${python.interpreter}|' ruffus/test/Makefile + sed -i -e 's|/usr/bin/env bash|${stdenv.shell}|' ruffus/test/run_all_unit_tests.cmd + sed -i -e 's|python3|${python.interpreter}|' ruffus/test/run_all_unit_tests3.cmd + sed -i -e 's|python %s|${python.interpreter} %s|' ruffus/test/test_drmaa_wrapper_run_job_locally.py + ''; + + makefile = "ruffus/test/Makefile"; + + checkInputs = [ + gevent + hostname + pytest + ]; + + checkPhase = '' + export HOME=$TMPDIR + cd ruffus/test + make all PYTEST_OPTIONS="-q --disable-warnings" + ''; + + meta = with stdenv.lib; { + description = "Light-weight Python Computational Pipeline Management"; + homepage = http://www.ruffus.org.uk; + license = licenses.mit; + maintainers = [ maintainers.kiwi ]; + }; +} + diff --git a/pkgs/tools/text/ocrmypdf/default.nix b/pkgs/tools/text/ocrmypdf/default.nix new file mode 100644 index 000000000000..514f3f675399 --- /dev/null +++ b/pkgs/tools/text/ocrmypdf/default.nix @@ -0,0 +1,103 @@ +{ fetchFromGitHub +, ghostscript +, img2pdf +, jbig2enc +, leptonica +, pngquant +, python3 +, python3Packages +, qpdf +, stdenv +, tesseract4 +, unpaper +}: + +let + inherit (python3Packages) buildPythonApplication; + + runtimeDeps = with python3Packages; [ + ghostscript + jbig2enc + leptonica + pngquant + qpdf + tesseract4 + unpaper + pillow + ]; + +in buildPythonApplication rec { + pname = "ocrmypdf"; + version = "8.2.3"; + disabled = ! python3Packages.isPy3k; + + src = fetchFromGitHub { + owner = "jbarlow83"; + repo = "OCRmyPDF"; + rev = "v${version}"; + sha256 = "1ldlyhxkav34y9d7g2kx3d4p26c2b82vnwi0ywnfynb16sav36d5"; + }; + + nativeBuildInputs = with python3Packages; [ + pytestrunner + setuptools + setuptools-scm-git-archive + setuptools_scm + ]; + + propagatedBuildInputs = with python3Packages; [ + cffi + chardet + img2pdf + pdfminer + pikepdf + reportlab + ruffus + ]; + + checkInputs = with python3Packages; [ + hocr-tools + pypdf2 + pytest + pytest-helpers-namespace + pytest_xdist + pytestcov + pytestrunner + python-xmp-toolkit + setuptools + ] ++ runtimeDeps; + + + postPatch = '' + substituteInPlace src/ocrmypdf/leptonica.py \ + --replace "ffi.dlopen(find_library('lept'))" \ + 'ffi.dlopen("${stdenv.lib.makeLibraryPath [leptonica]}/liblept${stdenv.hostPlatform.extensions.sharedLibrary}")' + ''; + + # The tests take potentially 20+ minutes, depending on machine + doCheck = false; + + # These tests fail and it might be upstream problem... or packaging. :) + # development is happening on macos and the pinned test versions are + # significantly newer than nixpkgs has. Program still works... + # (to the extent I've used it) -- Kiwi + checkPhase = '' + export HOME=$TMPDIR + pytest -k 'not test_force_ocr_on_pdf_with_no_images \ + and not test_tesseract_crash \ + and not test_tesseract_crash_autorotate \ + and not test_ghostscript_pdfa_failure \ + and not test_gs_render_failure \ + and not test_gs_raster_failure \ + and not test_bad_utf8 \ + and not test_old_unpaper' + ''; + + meta = with stdenv.lib; { + homepage = "https://github.com/jbarlow83/OCRmyPDF"; + description = "Adds an OCR text layer to scanned PDF files, allowing them to be searched"; + license = licenses.gpl3; + platforms = platforms.linux; + maintainers = [ maintainers.kiwi ]; + }; +} diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index a6fb0ceaf18a..c2df042eae22 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -1704,6 +1704,8 @@ in hid-listen = callPackage ../tools/misc/hid-listen { }; + hocr-tools = with python3Packages; toPythonApplication hocr-tools; + home-manager = callPackage ../tools/package-management/home-manager {}; hostsblock = callPackage ../tools/misc/hostsblock { }; @@ -1828,6 +1830,8 @@ in nyx = callPackage ../tools/networking/nyx { }; + ocrmypdf = callPackage ../tools/text/ocrmypdf { }; + onboard = callPackage ../applications/misc/onboard { }; xkbd = callPackage ../applications/misc/xkbd { }; diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix index 39e5723756da..3096c6067d57 100644 --- a/pkgs/top-level/python-packages.nix +++ b/pkgs/top-level/python-packages.nix @@ -643,6 +643,8 @@ in { hdmedians = callPackage ../development/python-modules/hdmedians { }; + hocr-tools = callPackage ../development/python-modules/hocr-tools { }; + holoviews = callPackage ../development/python-modules/holoviews { }; hoomd-blue = toPythonModule (callPackage ../development/python-modules/hoomd-blue { @@ -1952,6 +1954,8 @@ in { hypothesis = self.hypothesis.override { doCheck = false; }; }; + pytest-helpers-namespace = callPackage ../development/python-modules/pytest-helpers-namespace { }; + pytest-httpbin = callPackage ../development/python-modules/pytest-httpbin { }; pytest-asyncio = callPackage ../development/python-modules/pytest-asyncio { }; @@ -4143,6 +4147,8 @@ in { pika-pool = callPackage ../development/python-modules/pika-pool { }; + pikepdf = callPackage ../development/python-modules/pikepdf { }; + kmapper = callPackage ../development/python-modules/kmapper { }; kmsxx = (callPackage ../development/libraries/kmsxx { @@ -5083,6 +5089,8 @@ in { ruamel_yaml = callPackage ../development/python-modules/ruamel_yaml { }; + ruffus = callPackage ../development/python-modules/ruffus { }; + runsnakerun = callPackage ../development/python-modules/runsnakerun { }; pysendfile = callPackage ../development/python-modules/pysendfile { }; @@ -5669,6 +5677,8 @@ in { python-u2flib-host = callPackage ../development/python-modules/python-u2flib-host { }; + python-xmp-toolkit = callPackage ../development/python-modules/python-xmp-toolkit { }; + pluggy = callPackage ../development/python-modules/pluggy {}; xcffib = callPackage ../development/python-modules/xcffib {};