aboutsummaryrefslogtreecommitdiffstats
path: root/compare.py
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2013-08-17 18:25:04 +0200
committerThibaut Horel <thibaut.horel@gmail.com>2013-08-17 18:25:04 +0200
commitf633ea807ef9a83f84f41767e71fad71656a4439 (patch)
tree5dca000d4892c4f23c61f46cd90603ea0ae7091e /compare.py
parentb5a00a5e914da988dcd81c6d276f7bb22a46aa20 (diff)
downloadocr-layer-curation-f633ea807ef9a83f84f41767e71fad71656a4439.tar.gz
Take line jumps into accounts when grouping words
Diffstat (limited to 'compare.py')
-rw-r--r--compare.py8
1 files changed, 4 insertions, 4 deletions
diff --git a/compare.py b/compare.py
index a458924..7305517 100644
--- a/compare.py
+++ b/compare.py
@@ -7,10 +7,10 @@ import string_utils as su
# wikibook = "Bloy - Le Sang du pauvre, Stock, 1932.djvu".replace(" ", "_")
wikibook = "Villiers de L'Isle-Adam - Tribulat Bonhomet, 1908.djvu".replace(" ", "_")
-n = 79
+n = 88
ocrpage = parse_page_sexp(wikibook, n)
-l1 = ocrpage['words']
+l1, c1 = ocrpage['words'], ocrpage["coords"]
l2 = get_page(wikibook, n)
l3 = su.simplify(l2)
-C = su.align(l2.split(), l1)
-su.print_alignment(l3.split(), l1, C[1])
+C = su.align(l3.split(), l1, c1)
+su.print_alignment(l2.split(), l1, c1, C[1])