diff options
| -rw-r--r-- | parsedjvutext.py | 29 | ||||
| -rw-r--r-- | web/utils.py | 5 |
2 files changed, 26 insertions, 8 deletions
diff --git a/parsedjvutext.py b/parsedjvutext.py index 301b449..3183d7e 100644 --- a/parsedjvutext.py +++ b/parsedjvutext.py @@ -4,6 +4,7 @@ import djvu from djvu.decode import Context from itertools import chain import collections +from PIL import Image def parse_page(page, html=False): s, page_size = page.text.sexpr, page.size[1] @@ -26,8 +27,13 @@ def parse_page(page, html=False): yield c else: pass - return aux(s, html) + return aux(s, html) if s else None +def get_document(djvufile): + c = Context() + document = c.new_document(djvu.decode.FileURI(djvufile)) + document.decoding_job.wait() + return document def parse_book(djvubook, page=None, html=False): """ @@ -35,9 +41,8 @@ def parse_book(djvubook, page=None, html=False): if page is None, returns the whole book. if html is True, coordinates are computed from the bottom of the page """ - c = Context() - document = c.new_document(djvu.decode.FileURI(djvubook)) - document.decoding_job.wait() + document = get_document(djvubook) + if type(page) is int: toparse = [document.pages[page - 1]] elif isinstance(page, collections.Iterable): @@ -45,8 +50,20 @@ def parse_book(djvubook, page=None, html=False): else: toparse = document.pages - return list(zip(*parse_page(page, html=html)) for page in toparse - if page.text.sexpr) + return [parse_page(page, html=html) for page in toparse] + +def image_from_book(djvubook, page): + document = get_document(djvubook) + mode = djvu.decode.RENDER_COLOR + djvu_pixel_format = djvu.decode.PixelFormatRgb() + page = document.pages[page-1] + page_job = page.decode(wait=True) + width, height = page_job.size + rect = (0, 0, width, height) + buf = page_job.render(mode, rect, rect, djvu_pixel_format) + return Image.frombuffer("RGB", (width, height), buf, 'raw', 'RGB', 0, 1) if __name__ == "__main__": book = parse_book(sys.argv[1], page=[10,11], html=True) + im = image_from_book(sys.argv[1], 11) + im.save("test.webp") diff --git a/web/utils.py b/web/utils.py index 7a6bf1a..3db0296 100644 --- a/web/utils.py +++ b/web/utils.py @@ -3,9 +3,10 @@ import sys def gen_html(book, page_number): - book = "../Bloy_-_Le_Sang_du_pauvre,_Stock,_1932.djvu" + book = "../Villiers_de_L'Isle-Adam_-_Tribulat_Bonhomet,_1908.djvu" d = parse_book(book, page=int(page_number), html=True) - words, coords = d[0] + if d[0]: + words, coords = zip(*d[0]) return (list(enumerate(coords)), list(enumerate(words))) |
