diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2013-08-17 18:25:04 +0200 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2013-08-17 18:25:04 +0200 |
| commit | f633ea807ef9a83f84f41767e71fad71656a4439 (patch) | |
| tree | 5dca000d4892c4f23c61f46cd90603ea0ae7091e /compare.py | |
| parent | b5a00a5e914da988dcd81c6d276f7bb22a46aa20 (diff) | |
| download | ocr-layer-curation-f633ea807ef9a83f84f41767e71fad71656a4439.tar.gz | |
Take line jumps into accounts when grouping words
Diffstat (limited to 'compare.py')
| -rw-r--r-- | compare.py | 8 |
1 files changed, 4 insertions, 4 deletions
@@ -7,10 +7,10 @@ import string_utils as su # wikibook = "Bloy - Le Sang du pauvre, Stock, 1932.djvu".replace(" ", "_") wikibook = "Villiers de L'Isle-Adam - Tribulat Bonhomet, 1908.djvu".replace(" ", "_") -n = 79 +n = 88 ocrpage = parse_page_sexp(wikibook, n) -l1 = ocrpage['words'] +l1, c1 = ocrpage['words'], ocrpage["coords"] l2 = get_page(wikibook, n) l3 = su.simplify(l2) -C = su.align(l2.split(), l1) -su.print_alignment(l3.split(), l1, C[1]) +C = su.align(l3.split(), l1, c1) +su.print_alignment(l2.split(), l1, c1, C[1]) |
