diff options
| -rw-r--r-- | web/main.py | 4 | ||||
| -rw-r--r-- | web/settings.py | 3 | ||||
| -rw-r--r-- | web/utils.py | 21 |
3 files changed, 19 insertions, 9 deletions
diff --git a/web/main.py b/web/main.py index 582d33c..206eb86 100644 --- a/web/main.py +++ b/web/main.py @@ -3,7 +3,7 @@ from tornado.web import RequestHandler, Application import tornado.ioloop from settings import settings import utils -from parsedjvutext import image_from_book +from djvu_utils import image_from_book import io class MainHandler(RequestHandler): @@ -16,7 +16,7 @@ class MainHandler(RequestHandler): class ImageHandler(RequestHandler): def get(self, page_number): - im = image_from_book(self.settings["book"], int(page_number)) + im = image_from_book("../" + self.settings["book"], int(page_number)) self.set_header('Content-Type', 'image/jpg') img_buff = io.BytesIO() im.save(img_buff, format="JPEG") diff --git a/web/settings.py b/web/settings.py index 98c490b..5a8c9aa 100644 --- a/web/settings.py +++ b/web/settings.py @@ -4,5 +4,6 @@ settings = { "static_path": "static", "cookie_secret": "toto", "login_url": "/login", - "book": "../Bloy_-_Le_Sang_du_pauvre,_Stock,_1932.djvu" + #"book": "Bloy_-_Le_Sang_du_pauvre,_Stock,_1932.djvu" + "book": "Villiers_de_L'Isle-Adam_-_Tribulat_Bonhomet,_1908.djvu" } diff --git a/web/utils.py b/web/utils.py index bb9a4fe..8522841 100644 --- a/web/utils.py +++ b/web/utils.py @@ -1,12 +1,21 @@ -from parsedjvutext import parse_book +import djvu_utils as du import sys - +import string_utils as su +from wikisource import get_page def gen_html(book, page_number): - d = parse_book(book, page=int(page_number), html=True) - if d[0]: - words, coords = zip(*d[0]) - return (list(enumerate(coords)), list(enumerate(words))) + doc = du.get_document("../" + book) + page = doc.pages[int(page_number)-1] + d = du.parse_page(page) + corrected_text = get_page(book, int(page_number)) + corrected_words = su.simplify(corrected_text).split() + if d: + words, coords = zip(*d) + C = su.align(corrected_words, list(words), list(coords)) + r = su.alignment_to_sexp(corrected_text.split(), words, coords, C[1]) + corrected_words, coords = zip(*r) + coords_html = du.convert_to_htmlcoord(coords, page.size[1]) + return (list(enumerate(coords_html)), list(enumerate(corrected_words))) if __name__ == "__main__": gen_html(*sys.argv[1:3]) |
