From 9a0a0c92c7e7a1f22d454dc641ccd4d5e55dc187 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Llu=C3=ADs=20Batlle=20i=20Rossell?= Date: Sun, 24 Apr 2011 20:01:19 +0000 Subject: [PATCH] Adding training results files for some languages to tesseract to be able to do OCR directly. svn path=/nixpkgs/trunk/; revision=26956 --- .../graphics/tesseract/default.nix | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pkgs/applications/graphics/tesseract/default.nix b/pkgs/applications/graphics/tesseract/default.nix index 80e0514d38fb..07a160a2e486 100644 --- a/pkgs/applications/graphics/tesseract/default.nix +++ b/pkgs/applications/graphics/tesseract/default.nix @@ -1,5 +1,22 @@ { stdenv, fetchurl, libtiff }: +let + f = lang : sha256 : let + src = fetchurl { + url = "http://tesseract-ocr.googlecode.com/files/${lang}.traineddata.gz"; + inherit sha256; + }; + in + "gunzip -c ${src} > $out/share/tessdata/${lang}.traineddata"; + + extraLanguages = '' + ${f "cat" "1qndk8qygw9bq7nzn7kzgxkm3jhlq7jgvdqpj5id4rrcaavjvifw"} + ${f "rus" "0yjzks189bgcmi2vr4v0l0fla11qdrw3cb1nvpxl9mdis8qr9vcc"} + ${f "spa" "1q1hw3qi95q5ww3l02fbhjqacxm34cp65fkbx10wjdcg0s5p9q2x"} + ${f "nld" "0cbqfhl2rwb1mg4y1140nw2vhhcilc0nk7bfbnxw6bzj1y5n49i8"} + ''; +in + stdenv.mkDerivation { name = "tesseract-3.0.0"; @@ -10,6 +27,8 @@ stdenv.mkDerivation { buildInputs = [ libtiff ]; + postInstall = extraLanguages; + meta = { description = "OCR engine"; homepage = http://code.google.com/p/tesseract-ocr/;