From d28394833d54a68f5ca13d2edaa261128f6c5170 Mon Sep 17 00:00:00 2001 From: Thibaut Horel Date: Sun, 7 Sep 2014 15:55:27 -0400 Subject: Compute alignment on the final (html formatted) text --- web/static/css/style.css | 18 ++++++++++++++++++ web/templates/index.html | 5 ++--- web/templates/layout.html | 2 +- web/utils.py | 15 +++++++-------- 4 files changed, 28 insertions(+), 12 deletions(-) (limited to 'web') diff --git a/web/static/css/style.css b/web/static/css/style.css index e42975d..ec73fd0 100644 --- a/web/static/css/style.css +++ b/web/static/css/style.css @@ -10,3 +10,21 @@ span:hover { float: left; margin-right: 1em; } + +#text { + width: 460px; + float: left; +} + +.pagetext p { + text-align: justify; + -moz-hyphens: auto; + margin: 0; + text-indent: 1.5em; +} + +.pagetext { + padding: 3.5em 2em; + font-size: 18px; + line-height: 180%; +} diff --git a/web/templates/index.html b/web/templates/index.html index b3b5fe0..dc286aa 100644 --- a/web/templates/index.html +++ b/web/templates/index.html @@ -5,10 +5,9 @@ {% for id, coords in areas %} - {% end %} + {% end %}
- {% for id, word in words %} - {{word}} {% end %} + {% raw words %}
{% end %} diff --git a/web/templates/layout.html b/web/templates/layout.html index ff4077d..2fef4d3 100644 --- a/web/templates/layout.html +++ b/web/templates/layout.html @@ -1,5 +1,5 @@ - + diff --git a/web/utils.py b/web/utils.py index 8522841..1947f8b 100644 --- a/web/utils.py +++ b/web/utils.py @@ -1,21 +1,20 @@ import djvu_utils as du import sys import string_utils as su -from wikisource import get_page +from wikisource import get_page2 + def gen_html(book, page_number): doc = du.get_document("../" + book) - page = doc.pages[int(page_number)-1] + page = doc.pages[int(page_number) - 1] d = du.parse_page(page) - corrected_text = get_page(book, int(page_number)) - corrected_words = su.simplify(corrected_text).split() + elem, corrected_text = get_page2(open("test.txt").read()) if d: words, coords = zip(*d) - C = su.align(corrected_words, list(words), list(coords)) - r = su.alignment_to_sexp(corrected_text.split(), words, coords, C[1]) - corrected_words, coords = zip(*r) + C = su.align(corrected_text.split(), list(words), list(coords)) + coords = [coords[e[0]] for e in C[1]] coords_html = du.convert_to_htmlcoord(coords, page.size[1]) - return (list(enumerate(coords_html)), list(enumerate(corrected_words))) + return (list(enumerate(coords_html)), str(elem)) if __name__ == "__main__": gen_html(*sys.argv[1:3]) -- cgit v1.2.3-70-g09d2