diff options
| author | Guillaume Horel <guillaume.horel@gmail.com> | 2014-02-27 23:40:21 -0500 |
|---|---|---|
| committer | Guillaume Horel <guillaume.horel@gmail.com> | 2014-02-27 23:40:21 -0500 |
| commit | 5b47c960ff350556fa4bc02e62ad1d2e62695765 (patch) | |
| tree | 7dadbb34a686d0f82267b732d452e9ea4dae9fa3 /parsedjvutext.py | |
| parent | 3e343111fc525f093f8bd98707000eb44f08a97f (diff) | |
| download | ocr-layer-curation-5b47c960ff350556fa4bc02e62ad1d2e62695765.tar.gz | |
add function to return image from a book
Diffstat (limited to 'parsedjvutext.py')
| -rw-r--r-- | parsedjvutext.py | 29 |
1 files changed, 23 insertions, 6 deletions
diff --git a/parsedjvutext.py b/parsedjvutext.py index 301b449..3183d7e 100644 --- a/parsedjvutext.py +++ b/parsedjvutext.py @@ -4,6 +4,7 @@ import djvu from djvu.decode import Context from itertools import chain import collections +from PIL import Image def parse_page(page, html=False): s, page_size = page.text.sexpr, page.size[1] @@ -26,8 +27,13 @@ def parse_page(page, html=False): yield c else: pass - return aux(s, html) + return aux(s, html) if s else None +def get_document(djvufile): + c = Context() + document = c.new_document(djvu.decode.FileURI(djvufile)) + document.decoding_job.wait() + return document def parse_book(djvubook, page=None, html=False): """ @@ -35,9 +41,8 @@ def parse_book(djvubook, page=None, html=False): if page is None, returns the whole book. if html is True, coordinates are computed from the bottom of the page """ - c = Context() - document = c.new_document(djvu.decode.FileURI(djvubook)) - document.decoding_job.wait() + document = get_document(djvubook) + if type(page) is int: toparse = [document.pages[page - 1]] elif isinstance(page, collections.Iterable): @@ -45,8 +50,20 @@ def parse_book(djvubook, page=None, html=False): else: toparse = document.pages - return list(zip(*parse_page(page, html=html)) for page in toparse - if page.text.sexpr) + return [parse_page(page, html=html) for page in toparse] + +def image_from_book(djvubook, page): + document = get_document(djvubook) + mode = djvu.decode.RENDER_COLOR + djvu_pixel_format = djvu.decode.PixelFormatRgb() + page = document.pages[page-1] + page_job = page.decode(wait=True) + width, height = page_job.size + rect = (0, 0, width, height) + buf = page_job.render(mode, rect, rect, djvu_pixel_format) + return Image.frombuffer("RGB", (width, height), buf, 'raw', 'RGB', 0, 1) if __name__ == "__main__": book = parse_book(sys.argv[1], page=[10,11], html=True) + im = image_from_book(sys.argv[1], 11) + im.save("test.webp") |
