nixpkgs/pkgs/development/python-modules/pyocr/paths.patch

diff --git a/src/pyocr/cuneiform.py b/src/pyocr/cuneiform.py
index 2e5b717..35647e2 100644
--- a/src/pyocr/cuneiform.py
+++ b/src/pyocr/cuneiform.py
@@ -25,13 +25,9 @@ from . import builders
 from .error import CuneiformError
 
 
-# CHANGE THIS IF CUNEIFORM IS NOT IN YOUR PATH, OR IS NAMED DIFFERENTLY
-CUNEIFORM_CMD = 'cuneiform'
+CUNEIFORM_CMD = '@cuneiform@/bin/cuneiform'
 
-CUNEIFORM_DATA_POSSIBLE_PATHS = [
-    "/usr/local/share/cuneiform",
-    "/usr/share/cuneiform",
-]
+CUNEIFORM_DATA_POSSIBLE_PATHS = ['@cuneiform@/share/cuneiform']
 
 LANGUAGES_LINE_PREFIX = "Supported languages: "
 LANGUAGES_SPLIT_RE = re.compile("[^a-z]")
diff --git a/src/pyocr/libtesseract/tesseract_raw.py b/src/pyocr/libtesseract/tesseract_raw.py
index a068e73..9ebea5c 100644
--- a/src/pyocr/libtesseract/tesseract_raw.py
+++ b/src/pyocr/libtesseract/tesseract_raw.py
@@ -2,7 +2,6 @@ import ctypes
 import locale
 import logging
 import os
-import sys
 
 from ..error import TesseractError
 
@@ -10,48 +9,16 @@ from ..error import TesseractError
 logger = logging.getLogger(__name__)
 
 TESSDATA_PREFIX = os.getenv('TESSDATA_PREFIX', None)
-libnames = []
+if TESSDATA_PREFIX is None:
+    TESSDATA_PREFIX = '@tesseract@/share/tessdata'
+    os.environ['TESSDATA_PREFIX'] = TESSDATA_PREFIX
+
+
 # 70 is the minimum credible dpi for tesseract and force it to compute an
 # estimate of the image dpi
 DPI_DEFAULT = 70
 
-
-if getattr(sys, 'frozen', False):  # pragma: no cover
-    # Pyinstaller integration
-    libnames += [os.path.join(sys._MEIPASS, "libtesseract-4.dll")]
-    libnames += [os.path.join(sys._MEIPASS, "libtesseract-3.dll")]
-    tessdata = os.path.join(sys._MEIPASS, "data")
-    if not os.path.exists(os.path.join(tessdata, "tessdata")):
-        logger.warning(
-            "Running from container, but no tessdata ({}) found !".format(
-                tessdata
-            )
-        )
-    else:
-        TESSDATA_PREFIX = tessdata
-
-
-if sys.platform[:3] == "win":  # pragma: no cover
-    libnames += [
-        # Jflesch> Don't they have the equivalent of LD_LIBRARY_PATH on
-        # Windows ?
-        "../vs2010/DLL_Release/libtesseract302.dll",
-        # prefer the most recent first
-        "libtesseract305.dll",
-        "libtesseract304.dll",
-        "libtesseract303.dll",
-        "libtesseract302.dll",
-        "libtesseract400.dll",  # Tesseract 4 is still in alpha stage
-        "libtesseract.dll",
-        "C:\\Program Files (x86)\\Tesseract-OCR\\libtesseract-4.dll",
-        "C:\\Program Files (x86)\\Tesseract-OCR\\libtesseract-3.dll",
-    ]
-else:
-    libnames += [
-        "libtesseract.so.4",
-        "libtesseract.so.3",
-    ]
-
+libnames = [ "@tesseract@/lib/libtesseract.so" ]
 
 g_libtesseract = None
 
@@ -364,12 +331,12 @@ def init(lang=None):
     try:
         if lang:
             lang = lang.encode("utf-8")
-        prefix = None
-        if TESSDATA_PREFIX:  # pragma: no cover
-            prefix = TESSDATA_PREFIX.encode("utf-8")
+
+        prefix = TESSDATA_PREFIX
+
         g_libtesseract.TessBaseAPIInit3(
             ctypes.c_void_p(handle),
-            ctypes.c_char_p(prefix),
+            ctypes.c_char_p(prefix.encode('utf-8')),
             ctypes.c_char_p(lang)
         )
         g_libtesseract.TessBaseAPISetVariable(
diff --git a/src/pyocr/tesseract.py b/src/pyocr/tesseract.py
index 7c30852..44e8446 100644
--- a/src/pyocr/tesseract.py
+++ b/src/pyocr/tesseract.py
@@ -28,8 +28,7 @@ from .builders import DigitBuilder  # backward compatibility
 from .error import TesseractError  # backward compatibility
 from .util import digits_only
 
-# CHANGE THIS IF TESSERACT IS NOT IN YOUR PATH, OR IS NAMED DIFFERENTLY
-TESSERACT_CMD = 'tesseract.exe' if os.name == 'nt' else 'tesseract'
+TESSERACT_CMD = '@tesseract@/bin/tesseract'
 
 TESSDATA_EXTENSION = ".traineddata"
 
diff --git a/tests/tests_cuneiform.py b/tests/tests_cuneiform.py
index 45b7f6a..95f55c6 100644
--- a/tests/tests_cuneiform.py
+++ b/tests/tests_cuneiform.py
@@ -21,7 +21,7 @@ class TestCuneiform(BaseTest):
         # XXX is it useful?
         which.return_value = True
         self.assertTrue(cuneiform.is_available())
-        which.assert_called_once_with("cuneiform")
+        which.assert_called_once_with("@cuneiform@/bin/cuneiform")
 
     @patch("subprocess.Popen")
     def test_version(self, popen):
@@ -54,7 +54,7 @@ class TestCuneiform(BaseTest):
         self.assertIn("eng", langs)
         self.assertIn("fra", langs)
         popen.assert_called_once_with(
-            ["cuneiform", "-l"],
+            ["@cuneiform@/bin/cuneiform", "-l"],
             stdout=subprocess.PIPE, stderr=subprocess.STDOUT
         )
 
@@ -109,7 +109,7 @@ class TestCuneiformTxt(BaseTest):
         output = cuneiform.image_to_string(self.image)
         self.assertEqual(output, self._get_file_content("text").strip())
         popen.assert_called_once_with(
-            ["cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
+            ["@cuneiform@/bin/cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
             stdin=subprocess.PIPE, stdout=subprocess.PIPE,
             stderr=subprocess.STDOUT
         )
@@ -125,7 +125,7 @@ class TestCuneiformTxt(BaseTest):
                                            builder=self.builder)
         self.assertEqual(output, self._get_file_content("text").strip())
         popen.assert_called_once_with(
-            ["cuneiform", "-l", "fra", "-f", "text", "-o", self.tmp_filename,
+            ["@cuneiform@/bin/cuneiform", "-l", "fra", "-f", "text", "-o", self.tmp_filename,
              "-"],
             stdin=subprocess.PIPE, stdout=subprocess.PIPE,
             stderr=subprocess.STDOUT
@@ -142,7 +142,7 @@ class TestCuneiformTxt(BaseTest):
                                            builder=self.builder)
         self.assertEqual(output, self._get_file_content("text").strip())
         popen.assert_called_once_with(
-            ["cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
+            ["@cuneiform@/bin/cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
             stdin=subprocess.PIPE, stdout=subprocess.PIPE,
             stderr=subprocess.STDOUT
         )
@@ -173,7 +173,7 @@ class TestCuneiformTxt(BaseTest):
         output = cuneiform.image_to_string(image, builder=self.builder)
         self.assertEqual(output, self._get_file_content("text").strip())
         popen.assert_called_once_with(
-            ["cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
+            ["@cuneiform@/bin/cuneiform", "-f", "text", "-o", self.tmp_filename, "-"],
             stdin=subprocess.PIPE, stdout=subprocess.PIPE,
             stderr=subprocess.STDOUT
         )
@@ -227,7 +227,7 @@ class TestCuneiformWordBox(BaseTest):
         output = cuneiform.image_to_string(self.image,
                                            builder=self.builder)
         popen.assert_called_once_with(
-            ["cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"],
+            ["@cuneiform@/bin/cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"],
             stdin=subprocess.PIPE, stdout=subprocess.PIPE,
             stderr=subprocess.STDOUT
         )
@@ -280,7 +280,7 @@ class TestCuneiformLineBox(BaseTest):
         output = cuneiform.image_to_string(self.image,
                                            builder=self.builder)
         popen.assert_called_once_with(
-            ["cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"],
+            ["@cuneiform@/bin/cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"],
             stdin=subprocess.PIPE, stdout=subprocess.PIPE,
             stderr=subprocess.STDOUT
         )
diff --git a/tests/tests_libtesseract.py b/tests/tests_libtesseract.py
index ad7fdc9..57e7a60 100644
--- a/tests/tests_libtesseract.py
+++ b/tests/tests_libtesseract.py
@@ -165,7 +165,8 @@ class TestLibTesseractRaw(BaseTest):
             args = libtess.TessBaseAPIInit3.call_args[0]
             self.assertEqual(len(args), 3)
             self.assertEqual(args[0].value, self.handle)
-            self.assertEqual(args[1].value, None)
+            # we hardcode tesseract data, so we don't get None
+            #self.assertEqual(args[1].value, None)
             self.assertEqual(args[2].value, lang.encode() if lang else None)
 
             self.assertEqual(
@@ -201,7 +202,8 @@ class TestLibTesseractRaw(BaseTest):
             args = libtess.TessBaseAPIInit3.call_args[0]
             self.assertEqual(len(args), 3)
             self.assertEqual(args[0].value, self.handle)
-            self.assertEqual(args[1].value, None)
+            # we hardcode tesseract data, so we don't get None
+            #self.assertEqual(args[1].value, None)
             self.assertEqual(args[2].value, lang.encode() if lang else None)
 
             self.assertEqual(
diff --git a/tests/tests_tesseract.py b/tests/tests_tesseract.py
index 1a55567..a24d96f 100644
--- a/tests/tests_tesseract.py
+++ b/tests/tests_tesseract.py
@@ -36,7 +36,7 @@ class TestTesseract(BaseTest):
     def test_available(self, which):
         which.return_value = True
         self.assertTrue(tesseract.is_available())
-        which.assert_called_once_with("tesseract")
+        which.assert_called_once_with("@tesseract@/bin/tesseract")
 
     @patch("subprocess.Popen")
     def test_version_error(self, popen):
@@ -156,7 +156,7 @@ class TestTesseract(BaseTest):
         for lang in ("eng", "fra", "jpn", "osd"):
             self.assertIn(lang, langs)
         popen.assert_called_once_with(
-            ["tesseract", "--list-langs"],
+            ["@tesseract@/bin/tesseract", "--list-langs"],
             startupinfo=None, creationflags=0,
             stdout=subprocess.PIPE, stderr=subprocess.STDOUT
         )
@@ -171,7 +171,7 @@ class TestTesseract(BaseTest):
         self.assertEqual(te.exception.status, 1)
         self.assertEqual("unable to get languages", te.exception.message)
         popen.assert_called_once_with(
-            ["tesseract", "--list-langs"],
+            ["@tesseract@/bin/tesseract", "--list-langs"],
             startupinfo=None, creationflags=0,
             stdout=subprocess.PIPE, stderr=subprocess.STDOUT
         )
@@ -248,7 +248,7 @@ class TestTesseract(BaseTest):
         self.assertEqual(status, 0)
         self.assertEqual(error, message)
         popen.assert_called_once_with(
-            ["tesseract", "input.bmp", "output"],
+            ["@tesseract@/bin/tesseract", "input.bmp", "output"],
             cwd=tmpdir,
             startupinfo=None,
             creationflags=0,
@@ -271,7 +271,7 @@ class TestTesseract(BaseTest):
         self.assertEqual(status, 0)
         self.assertEqual(error, message)
         popen.assert_called_with(
-            ["tesseract", "input2.bmp", "output2", "-l", "fra", "--psm", "3"],
+            ["@tesseract@/bin/tesseract", "input2.bmp", "output2", "-l", "fra", "--psm", "3"],
             cwd=tmpdir,
             startupinfo=None,
             creationflags=0,
@@ -302,7 +302,7 @@ class TestTesseract(BaseTest):
             self.assertEqual(result["angle"], 90)
             self.assertEqual(result["confidence"], 9.30)
             popen.assert_called_once_with(
-                ["tesseract", "input.bmp", "stdout", "--psm", "0"],
+                ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "--psm", "0"],
                 stdin=subprocess.PIPE,
                 shell=False,
                 startupinfo=None,
@@ -338,7 +338,7 @@ class TestTesseract(BaseTest):
             self.assertEqual(result["angle"], 90)
             self.assertEqual(result["confidence"], 9.30)
             popen.assert_called_once_with(
-                ["tesseract", "input.bmp", "stdout", "--psm", "0"],
+                ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "--psm", "0"],
                 stdin=subprocess.PIPE,
                 shell=False,
                 startupinfo=None,
@@ -371,7 +371,7 @@ class TestTesseract(BaseTest):
             self.assertEqual(result["angle"], 90)
             self.assertEqual(result["confidence"], 9.30)
             popen.assert_called_once_with(
-                ["tesseract", "input.bmp", "stdout",
+                ["@tesseract@/bin/tesseract", "input.bmp", "stdout",
                  "--psm", "0", "-l", "osd"],
                 stdin=subprocess.PIPE,
                 shell=False,
@@ -399,7 +399,7 @@ class TestTesseract(BaseTest):
             with self.assertRaises(tesseract.TesseractError) as te:
                 tesseract.detect_orientation(self.image)
             popen.assert_called_once_with(
-                ["tesseract", "input.bmp", "stdout", "--psm", "0"],
+                ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "--psm", "0"],
                 stdin=subprocess.PIPE,
                 shell=False,
                 startupinfo=None,
@@ -433,7 +433,7 @@ class TestTesseract(BaseTest):
             with self.assertRaises(tesseract.TesseractError) as te:
                 tesseract.detect_orientation(self.image)
             popen.assert_called_once_with(
-                ["tesseract", "input.bmp", "stdout", "--psm", "0"],
+                ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "--psm", "0"],
                 stdin=subprocess.PIPE,
                 shell=False,
                 startupinfo=None,
@@ -467,7 +467,7 @@ class TestTesseract(BaseTest):
             self.assertEqual(result["angle"], 90)
             self.assertEqual(result["confidence"], 9.30)
             popen.assert_called_once_with(
-                ["tesseract", "input.bmp", "stdout", "-psm", "0"],
+                ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "-psm", "0"],
                 stdin=subprocess.PIPE,
                 shell=False,
                 startupinfo=None,
@@ -500,7 +500,7 @@ class TestTesseract(BaseTest):
             self.assertEqual(result["angle"], 90)
             self.assertEqual(result["confidence"], 9.30)
             popen.assert_called_once_with(
-                ["tesseract", "input.bmp", "stdout", "-psm", "0", "-l", "fra"],
+                ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "-psm", "0", "-l", "fra"],
                 stdin=subprocess.PIPE,
                 shell=False,
                 startupinfo=None,
@@ -527,7 +527,7 @@ class TestTesseract(BaseTest):
             with self.assertRaises(tesseract.TesseractError) as te:
                 tesseract.detect_orientation(self.image)
             popen.assert_called_once_with(
-                ["tesseract", "input.bmp", "stdout", "-psm", "0"],
+                ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "-psm", "0"],
                 stdin=subprocess.PIPE,
                 shell=False,
                 startupinfo=None,
@@ -561,7 +561,7 @@ class TestTesseract(BaseTest):
             with self.assertRaises(tesseract.TesseractError) as te:
                 tesseract.detect_orientation(self.image)
             popen.assert_called_once_with(
-                ["tesseract", "input.bmp", "stdout", "-psm", "0"],
+                ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "-psm", "0"],
                 stdin=subprocess.PIPE,
                 shell=False,
                 startupinfo=None,