aboutsummaryrefslogtreecommitdiffstats
path: root/compare.py
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2013-08-17 14:59:36 +0200
committerThibaut Horel <thibaut.horel@gmail.com>2013-08-17 14:59:36 +0200
commitb5a00a5e914da988dcd81c6d276f7bb22a46aa20 (patch)
tree366237b3f5f64acb02a9089e0635eca3ddb7300e /compare.py
parent241e0f3f7cf72f1d771ed0b4651542168b16329e (diff)
downloadocr-layer-curation-b5a00a5e914da988dcd81c6d276f7bb22a46aa20.tar.gz
Some tweaks
Diffstat (limited to 'compare.py')
-rw-r--r--compare.py11
1 files changed, 6 insertions, 5 deletions
diff --git a/compare.py b/compare.py
index c37cde9..a458924 100644
--- a/compare.py
+++ b/compare.py
@@ -4,12 +4,13 @@ from wikisource import get_page
from parsedjvutext import parse_page_sexp
import string_utils as su
-wikibook = "Bloy - Le Sang du pauvre, Stock, 1932.djvu".replace(" ", "_")
-# wikibook = "Villiers de L'Isle-Adam - Tribulat Bonhomet, 1908.djvu".replace(" ", "_")
+# wikibook = "Bloy - Le Sang du pauvre, Stock, 1932.djvu".replace(" ", "_")
+wikibook = "Villiers de L'Isle-Adam - Tribulat Bonhomet, 1908.djvu".replace(" ", "_")
n = 79
ocrpage = parse_page_sexp(wikibook, n)
l1 = ocrpage['words']
-l2 = get_page(wikibook, n).replace(u"’", u"'").split()
-C = su.align(l2, l1)
-su.print_alignment(l2, l1, C[1])
+l2 = get_page(wikibook, n)
+l3 = su.simplify(l2)
+C = su.align(l2.split(), l1)
+su.print_alignment(l3.split(), l1, C[1])