diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2014-09-07 16:01:05 -0400 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2014-09-07 16:01:05 -0400 |
| commit | cd51d7e0d763ed669a2a45555d64d81e3b2478a1 (patch) | |
| tree | 52d27202e5971f1972c2a3e3430913b93f55b9dd /web/utils.py | |
| parent | d28394833d54a68f5ca13d2edaa261128f6c5170 (diff) | |
| parent | 6283b6582960544dc02e438e739775e3239b802c (diff) | |
| download | ocr-layer-curation-cd51d7e0d763ed669a2a45555d64d81e3b2478a1.tar.gz | |
Merge branch 'master' of horel.org:thibaut/ocr-layer-curation
Conflicts:
web/static/css/style.css
web/templates/index.html
web/utils.py
Diffstat (limited to 'web/utils.py')
| -rw-r--r-- | web/utils.py | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/web/utils.py b/web/utils.py index 1947f8b..e6f4309 100644 --- a/web/utils.py +++ b/web/utils.py @@ -10,11 +10,11 @@ def gen_html(book, page_number): d = du.parse_page(page) elem, corrected_text = get_page2(open("test.txt").read()) if d: - words, coords = zip(*d) - C = su.align(corrected_text.split(), list(words), list(coords)) - coords = [coords[e[0]] for e in C[1]] - coords_html = du.convert_to_htmlcoord(coords, page.size[1]) - return (list(enumerate(coords_html)), str(elem)) + orig_words, orig_coords = zip(*d) + C = su.align(corrected_words, list(orig_words), list(orig_coords)) + corr_words = corrected_text.split() + orig_coords_html = du.convert_to_htmlcoord(orig_coords, page.size[1]) + return orig_coords_html, orig_words, corr_words, C[1] if __name__ == "__main__": gen_html(*sys.argv[1:3]) |
