aboutsummaryrefslogtreecommitdiffstats
path: root/compare.py
diff options
context:
space:
mode:
authorGuillaume Horel <guillaume.horel@serenitascapital.com>2014-02-28 17:31:19 -0500
committerGuillaume Horel <guillaume.horel@serenitascapital.com>2014-02-28 17:31:54 -0500
commit6e694d555e1004da58ec3425d33043b2f1b5f715 (patch)
tree01fc9f907401f50cf6c869c7e1c057287b2dc405 /compare.py
parentaaa42a8efcd53576ced9bf2311e84d8ff2a5c8cf (diff)
downloadocr-layer-curation-6e694d555e1004da58ec3425d33043b2f1b5f715.tar.gz
update with the new functions
Diffstat (limited to 'compare.py')
-rw-r--r--compare.py13
1 files changed, 5 insertions, 8 deletions
diff --git a/compare.py b/compare.py
index 5f2352b..492dd53 100644
--- a/compare.py
+++ b/compare.py
@@ -1,21 +1,18 @@
# -*- coding: utf-8 -*-
from wikisource import get_page
-from parsedjvutext import page_sexp, parse_page_sexp
+from parsedjvutext import parse_book
import string_utils as su
-import pdb
wikibook = "Bloy - Le Sang du pauvre, Stock, 1932.djvu".replace(" ", "_")
#wikibook = "Villiers de L'Isle-Adam - Tribulat Bonhomet, 1908.djvu".replace(" ", "_")
n = 88
-ocrpage = parse_page_sexp(wikibook, n)
-l1, c1 = ocrpage['words'], ocrpage["coords"]
+ocrpage = parse_book(wikibook, n)
+l1, c1 = zip(*ocrpage[0])
+l1 = list(l1)
+c1 = list(c1)
l2 = get_page(wikibook, n)
-print len(l2.split())
l3 = su.simplify(l2)
C = su.align(l3.split(), l1, c1)
-#pdb.set_trace()
-#sexp = page_sexp(wikibook, n)
-#su.alignment_to_sexp(C[1], sexp, l2.split())
su.print_alignment(l2.split(), l1, c1, C[1])