diff --git a/nixos/lib/testing.nix b/nixos/lib/testing.nix index 7fad5cbc3cd9..2efe7a5b879c 100644 --- a/nixos/lib/testing.nix +++ b/nixos/lib/testing.nix @@ -93,7 +93,7 @@ rec { vms = map (m: m.config.system.build.vm) (lib.attrValues nodes); - ocrProg = tesseract.override { enableLanguages = [ "eng" ]; }; + ocrProg = tesseract; # Generate onvenience wrappers for running the test driver # interactively with the specified network, and for starting the diff --git a/pkgs/applications/graphics/tesseract/default.nix b/pkgs/applications/graphics/tesseract/default.nix index 375b09995488..1f1da9a389f2 100644 --- a/pkgs/applications/graphics/tesseract/default.nix +++ b/pkgs/applications/graphics/tesseract/default.nix @@ -1,53 +1,31 @@ -{ stdenv, fetchurl, autoconf, automake, libtool, leptonica, libpng, libtiff -, enableLanguages ? null +{ stdenv, fetchFromGitHub, pkgconfig, leptonica, libpng, libtiff +, icu, pango, opencl-headers }: -with stdenv.lib; - -let - majVersion = "3.02"; - version = "${majVersion}.02"; - - mkLang = lang: sha256: let - src = fetchurl { - url = "http://tesseract-ocr.googlecode.com/files/tesseract-ocr-${majVersion}.${lang}.tar.gz"; - inherit sha256; - }; - in "tar xfvz ${src} -C $out/share/ --strip=1"; - - wantLang = name: const (enableLanguages == null || elem name enableLanguages); - - extraLanguages = mapAttrsToList mkLang (filterAttrs wantLang { - cat = "0d1smiv1b3k9ay2s05sl7q08mb3ln4w5iiiymv2cs8g8333z8jl9"; - rus = "059336mkhsj9m3hwfb818xjlxkcdpy7wfgr62qwz65cx914xl709"; - spa = "1c9iza5mbahd9pa7znnq8yv09v5kz3gbd2sarcgcgc1ps1jc437l"; - nld = "162acxp1yb6gyki2is3ay2msalmfcsnrlsd9wml2ja05k94m6bjy"; - eng = "1y5xf794n832s3lymzlsdm2s9nlrd2v27jjjp0fd9xp7c2ah4461"; - slv = "0rqng43435cly32idxm1lvxkcippvc3xpxbfizwq5j0155ym00dr"; - jpn = "07v8pymd0iwyzh946lxylybda20gsw7p4fsb09jw147955x49gq9"; - }); -in - stdenv.mkDerivation rec { name = "tesseract-${version}"; + version = "3.04.01"; - src = fetchurl { - url = "http://tesseract-ocr.googlecode.com/files/tesseract-ocr-${version}.tar.gz"; - sha256 = "0g81m9y4iydp7kgr56mlkvjdwpp3mb01q385yhdnyvra7z5kkk96"; + src = fetchFromGitHub { + owner = "tesseract-ocr"; + repo = "tesseract"; + rev = version; + sha256 = "0h1x4z1h86n2gwknd0wck6gykkp99bmm02lg4a47a698g4az6ybv"; }; - buildInputs = [ autoconf automake libtool leptonica libpng libtiff ]; + tessdata = fetchFromGitHub { + owner = "tesseract-ocr"; + repo = "tessdata"; + rev = "3cf1e2df1fe1d1da29295c9ef0983796c7958b7d"; + sha256 = "1v4b63v5nzcxr2y3635r19l7lj5smjmc9vfk0wmxlryxncb4vpg7"; + }; - hardeningDisable = [ "format" ]; + nativeBuildInputs = [ pkgconfig ]; + buildInputs = [ leptonica libpng libtiff icu pango opencl-headers ]; - preConfigure = '' - ./autogen.sh - substituteInPlace "configure" \ - --replace 'LIBLEPT_HEADERSDIR="/usr/local/include /usr/include"' \ - 'LIBLEPT_HEADERSDIR=${leptonica}/include' - ''; + LIBLEPT_HEADERSDIR = "${leptonica}/include"; - postInstall = concatStringsSep "; " extraLanguages; + postInstall = "cp -Rt \"$out/share/tessdata\" \"$tessdata/\"*"; meta = { description = "OCR engine";