aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuillaume Horel <guillaume.horel@gmail.com>2014-02-27 23:40:21 -0500
committerGuillaume Horel <guillaume.horel@gmail.com>2014-02-27 23:40:21 -0500
commit5b47c960ff350556fa4bc02e62ad1d2e62695765 (patch)
tree7dadbb34a686d0f82267b732d452e9ea4dae9fa3
parent3e343111fc525f093f8bd98707000eb44f08a97f (diff)
downloadocr-layer-curation-5b47c960ff350556fa4bc02e62ad1d2e62695765.tar.gz
add function to return image from a book
-rw-r--r--parsedjvutext.py29
-rw-r--r--web/utils.py5
2 files changed, 26 insertions, 8 deletions
diff --git a/parsedjvutext.py b/parsedjvutext.py
index 301b449..3183d7e 100644
--- a/parsedjvutext.py
+++ b/parsedjvutext.py
@@ -4,6 +4,7 @@ import djvu
from djvu.decode import Context
from itertools import chain
import collections
+from PIL import Image
def parse_page(page, html=False):
s, page_size = page.text.sexpr, page.size[1]
@@ -26,8 +27,13 @@ def parse_page(page, html=False):
yield c
else:
pass
- return aux(s, html)
+ return aux(s, html) if s else None
+def get_document(djvufile):
+ c = Context()
+ document = c.new_document(djvu.decode.FileURI(djvufile))
+ document.decoding_job.wait()
+ return document
def parse_book(djvubook, page=None, html=False):
"""
@@ -35,9 +41,8 @@ def parse_book(djvubook, page=None, html=False):
if page is None, returns the whole book.
if html is True, coordinates are computed from the bottom of the page
"""
- c = Context()
- document = c.new_document(djvu.decode.FileURI(djvubook))
- document.decoding_job.wait()
+ document = get_document(djvubook)
+
if type(page) is int:
toparse = [document.pages[page - 1]]
elif isinstance(page, collections.Iterable):
@@ -45,8 +50,20 @@ def parse_book(djvubook, page=None, html=False):
else:
toparse = document.pages
- return list(zip(*parse_page(page, html=html)) for page in toparse
- if page.text.sexpr)
+ return [parse_page(page, html=html) for page in toparse]
+
+def image_from_book(djvubook, page):
+ document = get_document(djvubook)
+ mode = djvu.decode.RENDER_COLOR
+ djvu_pixel_format = djvu.decode.PixelFormatRgb()
+ page = document.pages[page-1]
+ page_job = page.decode(wait=True)
+ width, height = page_job.size
+ rect = (0, 0, width, height)
+ buf = page_job.render(mode, rect, rect, djvu_pixel_format)
+ return Image.frombuffer("RGB", (width, height), buf, 'raw', 'RGB', 0, 1)
if __name__ == "__main__":
book = parse_book(sys.argv[1], page=[10,11], html=True)
+ im = image_from_book(sys.argv[1], 11)
+ im.save("test.webp")
diff --git a/web/utils.py b/web/utils.py
index 7a6bf1a..3db0296 100644
--- a/web/utils.py
+++ b/web/utils.py
@@ -3,9 +3,10 @@ import sys
def gen_html(book, page_number):
- book = "../Bloy_-_Le_Sang_du_pauvre,_Stock,_1932.djvu"
+ book = "../Villiers_de_L'Isle-Adam_-_Tribulat_Bonhomet,_1908.djvu"
d = parse_book(book, page=int(page_number), html=True)
- words, coords = d[0]
+ if d[0]:
+ words, coords = zip(*d[0])
return (list(enumerate(coords)), list(enumerate(words)))