aboutsummaryrefslogtreecommitdiffstats
path: root/web/utils.py
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2014-02-27 11:24:22 -0500
committerThibaut Horel <thibaut.horel@gmail.com>2014-02-27 11:24:22 -0500
commit473ef7e26fc8d2c6b26e66b80d50e49c18fa24f8 (patch)
treea33e209a563490affe14a7b962b6774a008eaa64 /web/utils.py
parentc40ad3ecef221e3f3d6a8633687c896603d493a0 (diff)
downloadocr-layer-curation-473ef7e26fc8d2c6b26e66b80d50e49c18fa24f8.tar.gz
Basic tornado app displaying a page image and associated text side by side
Diffstat (limited to 'web/utils.py')
-rw-r--r--web/utils.py19
1 files changed, 19 insertions, 0 deletions
diff --git a/web/utils.py b/web/utils.py
new file mode 100644
index 0000000..72d05dd
--- /dev/null
+++ b/web/utils.py
@@ -0,0 +1,19 @@
+from parsedjvutext import parse_page_sexp
+import sys
+
+
+def gen_html(book, page_number):
+ book = "../Villiers_de_L\'Isle-Adam_-_Tribulat_Bonhomet,_1908.djvu"
+ d = parse_page_sexp(book, page_number)
+ coords, words = d["coords"], d["words"]
+
+ def get_areas():
+ for i, coord in enumerate(coords):
+ coord[1], coord[3] = 2764 - coord[3], 2764 - coord[1]
+ coord_str = ",".join(map(str, coord))
+ yield i, coord_str
+
+ return list(get_areas()), list(enumerate(words))
+
+if __name__ == "__main__":
+ gen_html(*sys.argv[1:3])