diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2014-02-27 11:24:22 -0500 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2014-02-27 11:24:22 -0500 |
| commit | 473ef7e26fc8d2c6b26e66b80d50e49c18fa24f8 (patch) | |
| tree | a33e209a563490affe14a7b962b6774a008eaa64 /web/utils.py | |
| parent | c40ad3ecef221e3f3d6a8633687c896603d493a0 (diff) | |
| download | ocr-layer-curation-473ef7e26fc8d2c6b26e66b80d50e49c18fa24f8.tar.gz | |
Basic tornado app displaying a page image and associated text side by side
Diffstat (limited to 'web/utils.py')
| -rw-r--r-- | web/utils.py | 19 |
1 files changed, 19 insertions, 0 deletions
diff --git a/web/utils.py b/web/utils.py new file mode 100644 index 0000000..72d05dd --- /dev/null +++ b/web/utils.py @@ -0,0 +1,19 @@ +from parsedjvutext import parse_page_sexp +import sys + + +def gen_html(book, page_number): + book = "../Villiers_de_L\'Isle-Adam_-_Tribulat_Bonhomet,_1908.djvu" + d = parse_page_sexp(book, page_number) + coords, words = d["coords"], d["words"] + + def get_areas(): + for i, coord in enumerate(coords): + coord[1], coord[3] = 2764 - coord[3], 2764 - coord[1] + coord_str = ",".join(map(str, coord)) + yield i, coord_str + + return list(get_areas()), list(enumerate(words)) + +if __name__ == "__main__": + gen_html(*sys.argv[1:3]) |
