From d344cfc46b65763f9e06b3f09c428573614e8fbd Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Mon, 8 Sep 2014 22:18:02 -0400 Subject: get rid of convert_to_htmlcoord for now --- utils/djvu_utils.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'utils') diff --git a/utils/djvu_utils.py b/utils/djvu_utils.py index 21692a1..a8d40d3 100644 --- a/utils/djvu_utils.py +++ b/utils/djvu_utils.py @@ -9,24 +9,22 @@ from PIL import Image def parse_page(page): s = page.text.sexpr - def aux(s): + def aux(s, page_size): if type(s) is djvu.sexpr.ListExpression: if len(s) == 0: pass if str(s[0].value) == "word": - coords = [s[i].value for i in xrange(1, 5)] + c = [s[i].value for i in xrange(1, 5)] + coords = ",".join(map(str, [c[0], page_size -c[3], + c[2], page_size - c[1]])) word = s[5].value yield (word.decode("utf-8"), coords) else: - for c in chain.from_iterable(aux(child) for child in s[5:]): + for c in chain.from_iterable(aux(child, page_size) for child in s[5:]): yield c else: pass - return aux(s) if s else None - -def convert_to_htmlcoord(coords, page_size): - return [",".join(map(str, [c[0], page_size - c[3], - c[2], page_size - c[1]])) for c in coords] + return aux(s, page.size[1]) if s else None def get_document(djvufile): c = Context() -- cgit v1.2.3-70-g09d2