diff options
| -rw-r--r-- | djvu_utils.py (renamed from parsedjvutext.py) | 28 | ||||
| l--------- | web/djvu_utils.py | 1 | ||||
| l--------- | web/parsedjvutext.py | 1 |
3 files changed, 14 insertions, 16 deletions
diff --git a/parsedjvutext.py b/djvu_utils.py index 6bd9950..21692a1 100644 --- a/parsedjvutext.py +++ b/djvu_utils.py @@ -6,28 +6,27 @@ from itertools import chain import collections from PIL import Image -def parse_page(page, html=False): - s, page_size = page.text.sexpr, page.size[1] +def parse_page(page): + s = page.text.sexpr - def aux(s, html): + def aux(s): if type(s) is djvu.sexpr.ListExpression: if len(s) == 0: pass if str(s[0].value) == "word": - if html: - coords = (s[1].value, page_size - s[4].value, - s[3].value, page_size - s[2].value) - coords = ",".join(map(str,coords)) - else: - coords = [s[i].value for i in xrange(1, 5)] + coords = [s[i].value for i in xrange(1, 5)] word = s[5].value yield (word.decode("utf-8"), coords) else: - for c in chain.from_iterable(aux(child, html) for child in s[5:]): + for c in chain.from_iterable(aux(child) for child in s[5:]): yield c else: pass - return aux(s, html) if s else None + return aux(s) if s else None + +def convert_to_htmlcoord(coords, page_size): + return [",".join(map(str, [c[0], page_size - c[3], + c[2], page_size - c[1]])) for c in coords] def get_document(djvufile): c = Context() @@ -35,11 +34,10 @@ def get_document(djvufile): document.decoding_job.wait() return document -def parse_book(djvubook, page=None, html=False): +def parse_book(djvubook, page=None): """ returns the list of words and coordinates from a djvu book. if page is None, returns the whole book. - if html is True, coordinates are computed from the bottom of the page """ document = get_document(djvubook) @@ -50,7 +48,7 @@ def parse_book(djvubook, page=None, html=False): else: toparse = document.pages - return [parse_page(page, html=html) for page in toparse] + return [parse_page(page) for page in toparse] def image_from_book(djvubook, page): document = get_document(djvubook) @@ -66,4 +64,4 @@ def image_from_book(djvubook, page): if __name__ == "__main__": book = parse_book(sys.argv[1], page=[10,11], html=True) im = image_from_book(sys.argv[1], 11) - im.save("test.webp") + im.save("test.jpeg") diff --git a/web/djvu_utils.py b/web/djvu_utils.py new file mode 120000 index 0000000..0742170 --- /dev/null +++ b/web/djvu_utils.py @@ -0,0 +1 @@ +../djvu_utils.py
\ No newline at end of file diff --git a/web/parsedjvutext.py b/web/parsedjvutext.py deleted file mode 120000 index e07ccf8..0000000 --- a/web/parsedjvutext.py +++ /dev/null @@ -1 +0,0 @@ -../parsedjvutext.py
\ No newline at end of file |
