diff options
| -rw-r--r-- | parsedjvutext.py | 14 | ||||
| -rw-r--r-- | web/utils.py | 9 |
2 files changed, 10 insertions, 13 deletions
diff --git a/parsedjvutext.py b/parsedjvutext.py index a13421a..4211de1 100644 --- a/parsedjvutext.py +++ b/parsedjvutext.py @@ -9,23 +9,25 @@ import collections def parse_page(page, html=False): s, page_size = page.text.sexpr, page.size[1] - def aux(s): + def aux(s, html): if type(s) is djvu.sexpr.ListExpression: if len(s) == 0: pass if str(s[0].value) == "word": - coords = [s[i].value for i in xrange(1, 5)] if html: - coords[1] = page_size - coords[1] - coords[3] = page_size - coords[3] + coords = (s[1].value, page_size - s[2].value, + s[3].value, page_size - s[4].value) + coords = ",".join(map(str,coords)) + else: + coords = [s[i].value for i in xrange(1, 5)] word = s[5].value yield (word, coords) else: - for c in chain.from_iterable(aux(child) for child in s[5:]): + for c in chain.from_iterable(aux(child, html) for child in s[5:]): yield c else: pass - return aux(s) + return aux(s, html) def parse_book(djvubook, page=None, html=False): diff --git a/web/utils.py b/web/utils.py index 5cc53cf..7a6bf1a 100644 --- a/web/utils.py +++ b/web/utils.py @@ -3,16 +3,11 @@ import sys def gen_html(book, page_number): - book = "../Villiers_de_L\'Isle-Adam_-_Tribulat_Bonhomet,_1908.djvu" + book = "../Bloy_-_Le_Sang_du_pauvre,_Stock,_1932.djvu" d = parse_book(book, page=int(page_number), html=True) words, coords = d[0] - def get_areas(): - for i, coord in enumerate(coords): - coord_str = ",".join(map(str, coord)) - yield i, coord_str - - return list(get_areas()), list(enumerate(words)) + return (list(enumerate(coords)), list(enumerate(words))) if __name__ == "__main__": gen_html(*sys.argv[1:3]) |
