aboutsummaryrefslogtreecommitdiffstats
path: root/parsedjvutext.py
diff options
context:
space:
mode:
authorGuillaume Horel <guillaume.horel@serenitascapital.com>2014-02-27 13:58:22 -0500
committerGuillaume Horel <guillaume.horel@serenitascapital.com>2014-02-27 13:58:22 -0500
commit7d51499ee70c2795ba897981e7ce4f69ce61dd28 (patch)
tree8581dce985d102a305b555435dc9ee25ae36b44c /parsedjvutext.py
parentf96752448a537bd6a3378a83ab0e8476653ec59c (diff)
downloadocr-layer-curation-7d51499ee70c2795ba897981e7ce4f69ce61dd28.tar.gz
generate more useful html coordinatates
Diffstat (limited to 'parsedjvutext.py')
-rw-r--r--parsedjvutext.py14
1 files changed, 8 insertions, 6 deletions
diff --git a/parsedjvutext.py b/parsedjvutext.py
index a13421a..4211de1 100644
--- a/parsedjvutext.py
+++ b/parsedjvutext.py
@@ -9,23 +9,25 @@ import collections
def parse_page(page, html=False):
s, page_size = page.text.sexpr, page.size[1]
- def aux(s):
+ def aux(s, html):
if type(s) is djvu.sexpr.ListExpression:
if len(s) == 0:
pass
if str(s[0].value) == "word":
- coords = [s[i].value for i in xrange(1, 5)]
if html:
- coords[1] = page_size - coords[1]
- coords[3] = page_size - coords[3]
+ coords = (s[1].value, page_size - s[2].value,
+ s[3].value, page_size - s[4].value)
+ coords = ",".join(map(str,coords))
+ else:
+ coords = [s[i].value for i in xrange(1, 5)]
word = s[5].value
yield (word, coords)
else:
- for c in chain.from_iterable(aux(child) for child in s[5:]):
+ for c in chain.from_iterable(aux(child, html) for child in s[5:]):
yield c
else:
pass
- return aux(s)
+ return aux(s, html)
def parse_book(djvubook, page=None, html=False):