blob: 72d05dd3fa36a9a81aa376bcfacd6b5931da38f2 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
|
from parsedjvutext import parse_page_sexp
import sys
def gen_html(book, page_number):
book = "../Villiers_de_L\'Isle-Adam_-_Tribulat_Bonhomet,_1908.djvu"
d = parse_page_sexp(book, page_number)
coords, words = d["coords"], d["words"]
def get_areas():
for i, coord in enumerate(coords):
coord[1], coord[3] = 2764 - coord[3], 2764 - coord[1]
coord_str = ",".join(map(str, coord))
yield i, coord_str
return list(get_areas()), list(enumerate(words))
if __name__ == "__main__":
gen_html(*sys.argv[1:3])
|