get rid of convert_to_htmlcoord for now

author: Guillaume Horel <guillaume.horel@gmail.com> 2014-09-08 22:18:02 -0400
committer: Guillaume Horel <guillaume.horel@gmail.com> 2014-09-08 22:18:02 -0400
commit: d344cfc46b65763f9e06b3f09c428573614e8fbd (patch)
tree: 1e023945c31b284c81560e60337f592a30093138 /utils
parent: 2e1b9a4df2724b2ac61f39dcb3d9cbdf3a0ee306 (diff)
download: ocr-layer-curation-d344cfc46b65763f9e06b3f09c428573614e8fbd.tar.gz
1 files changed, 6 insertions, 8 deletions
diff --git a/utils/djvu_utils.py b/utils/djvu_utils.py
index 21692a1..a8d40d3 100644
--- a/utils/djvu_utils.py
+++ b/utils/djvu_utils.py
@@ -9,24 +9,22 @@ from PIL import Image
 def parse_page(page):
     s = page.text.sexpr
 
-    def aux(s):
+    def aux(s, page_size):
         if type(s) is djvu.sexpr.ListExpression:
             if len(s) == 0:
                 pass
             if str(s[0].value) == "word":
-                coords = [s[i].value for i in xrange(1, 5)]
+                c = [s[i].value for i in xrange(1, 5)]
+                coords = ",".join(map(str, [c[0], page_size -c[3],
+                                            c[2], page_size - c[1]]))
                 word = s[5].value
                 yield (word.decode("utf-8"), coords)
             else:
-                for c in chain.from_iterable(aux(child) for child in s[5:]):
+                for c in chain.from_iterable(aux(child, page_size) for child in s[5:]):
                     yield c
         else:
             pass
-    return aux(s) if s else None
-
-def convert_to_htmlcoord(coords, page_size):
-    return [",".join(map(str, [c[0], page_size - c[3],
-                               c[2], page_size - c[1]])) for c in coords]
+    return aux(s, page.size[1]) if s else None
 
 def get_document(djvufile):
     c = Context()
author	Guillaume Horel <guillaume.horel@gmail.com>	2014-09-08 22:18:02 -0400
committer	Guillaume Horel <guillaume.horel@gmail.com>	2014-09-08 22:18:02 -0400
commit	d344cfc46b65763f9e06b3f09c428573614e8fbd (patch)
tree	1e023945c31b284c81560e60337f592a30093138 /utils
parent	2e1b9a4df2724b2ac61f39dcb3d9cbdf3a0ee306 (diff)
download	ocr-layer-curation-d344cfc46b65763f9e06b3f09c428573614e8fbd.tar.gz