aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--parsedjvutext.py14
-rw-r--r--web/utils.py9
2 files changed, 10 insertions, 13 deletions
diff --git a/parsedjvutext.py b/parsedjvutext.py
index a13421a..4211de1 100644
--- a/parsedjvutext.py
+++ b/parsedjvutext.py
@@ -9,23 +9,25 @@ import collections
def parse_page(page, html=False):
s, page_size = page.text.sexpr, page.size[1]
- def aux(s):
+ def aux(s, html):
if type(s) is djvu.sexpr.ListExpression:
if len(s) == 0:
pass
if str(s[0].value) == "word":
- coords = [s[i].value for i in xrange(1, 5)]
if html:
- coords[1] = page_size - coords[1]
- coords[3] = page_size - coords[3]
+ coords = (s[1].value, page_size - s[2].value,
+ s[3].value, page_size - s[4].value)
+ coords = ",".join(map(str,coords))
+ else:
+ coords = [s[i].value for i in xrange(1, 5)]
word = s[5].value
yield (word, coords)
else:
- for c in chain.from_iterable(aux(child) for child in s[5:]):
+ for c in chain.from_iterable(aux(child, html) for child in s[5:]):
yield c
else:
pass
- return aux(s)
+ return aux(s, html)
def parse_book(djvubook, page=None, html=False):
diff --git a/web/utils.py b/web/utils.py
index 5cc53cf..7a6bf1a 100644
--- a/web/utils.py
+++ b/web/utils.py
@@ -3,16 +3,11 @@ import sys
def gen_html(book, page_number):
- book = "../Villiers_de_L\'Isle-Adam_-_Tribulat_Bonhomet,_1908.djvu"
+ book = "../Bloy_-_Le_Sang_du_pauvre,_Stock,_1932.djvu"
d = parse_book(book, page=int(page_number), html=True)
words, coords = d[0]
- def get_areas():
- for i, coord in enumerate(coords):
- coord_str = ",".join(map(str, coord))
- yield i, coord_str
-
- return list(get_areas()), list(enumerate(words))
+ return (list(enumerate(coords)), list(enumerate(words)))
if __name__ == "__main__":
gen_html(*sys.argv[1:3])