From 6d386d892ffde28d051cf5ba066391c8834dc3c6 Mon Sep 17 00:00:00 2001 From: Thibaut Horel Date: Thu, 27 Feb 2014 12:03:28 -0500 Subject: Simplify parse_book a bit, also making it more natural to use --- parsedjvutext.py | 22 ++++++++++------------ web/utils.py | 2 +- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/parsedjvutext.py b/parsedjvutext.py index 11be498..598151f 100644 --- a/parsedjvutext.py +++ b/parsedjvutext.py @@ -65,19 +65,17 @@ def parse_book(djvubook, page=None, html=False): toparse = [document.pages[page - 1]] else: toparse = document.pages - words = [[]] * len(toparse) - coords = [[]] * len(toparse) - page_size = None - for i, page in enumerate(toparse): - if page.text.sexpr: - if html: - page_size = page.size[1] - gen = parse_page_sexp(page.text.sexpr, page_size) - word_coords = zip(*gen) - words[i] = word_coords[0] - coords[i] = word_coords[1] - return {"words": words, "coords": coords} + def gen_pages(): + page_size = None + for i, page in enumerate(toparse): + if page.text.sexpr: + if html: + page_size = page.size[1] + gen = parse_page_sexp(page.text.sexpr, page_size) + yield zip(*gen) + + return list(gen_pages()) if __name__ == "__main__": book = parse_book(sys.argv[1]) diff --git a/web/utils.py b/web/utils.py index 583cd1c..5cc53cf 100644 --- a/web/utils.py +++ b/web/utils.py @@ -5,7 +5,7 @@ import sys def gen_html(book, page_number): book = "../Villiers_de_L\'Isle-Adam_-_Tribulat_Bonhomet,_1908.djvu" d = parse_book(book, page=int(page_number), html=True) - coords, words = d["coords"][0], d["words"][0] + words, coords = d[0] def get_areas(): for i, coord in enumerate(coords): -- cgit v1.2.3-70-g09d2