Reorganize the code

hope I did it right. We have two packages now, one for the server and one for the actual library.
author: Guillaume Horel <guillaume.horel@gmail.com> 2014-09-07 18:21:37 -0400
committer: Guillaume Horel <guillaume.horel@gmail.com> 2014-09-07 18:24:08 -0400
commit: 0e8b0c88a4d3009cbbea695f606e49faef27f373 (patch)
tree: 85a14a7aef3ee36e73544382c6fdec8aa6bd375c
parent: 74604d7b8ae98b125f1c800da753f8ab67474eb5 (diff)
download: ocr-layer-curation-0e8b0c88a4d3009cbbea695f606e49faef27f373.tar.gz
9 files changed, 29 insertions, 37 deletions
diff --git a/utils/__init__.py b/utils/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/utils/__init__.py
diff --git a/djvu_utils.py b/utils/djvu_utils.py
index 21692a1..21692a1 100644
--- a/djvu_utils.py
+++ b/utils/djvu_utils.py
diff --git a/string_utils.py b/utils/string_utils.py
index b6c8ce0..b6c8ce0 100644
--- a/string_utils.py
+++ b/utils/string_utils.py
diff --git a/wikisource.py b/utils/wikisource.py
index af72d34..589c88e 100644
--- a/wikisource.py
+++ b/utils/wikisource.py
@@ -4,7 +4,8 @@ import sys
 from bs4 import BeautifulSoup, NavigableString
 from itertools import takewhile, count
 from types import SliceType
-from string_utils import align
+import string_utils as su
+import djvu_utils as du
 
 URL = "http://fr.wikisource.org/w/index.php"
 
@@ -71,10 +72,22 @@ def get_pages(title, begin=1, end=None):
         return takewhile(lambda x: x is not None,
                          (get_page(title, i) for i in count(begin)))
 
+def gen_html(book, page_number):
+    doc = du.get_document(book)
+    page = doc.pages[int(page_number)-1]
+    d = du.parse_page(page)
+    corrected_text = get_page(book, int(page_number))
+    corrected_words = su.simplify(corrected_text).split()
+    if d:
+        orig_words, orig_coords = zip(*d)
+        C = su.align(corrected_words, list(orig_words), list(orig_coords))
+        corr_words = corrected_text.split()
+        orig_coords_html = du.convert_to_htmlcoord(orig_coords, page.size[1])
+    return orig_coords_html, orig_words, corr_words, C[1]
 
 if __name__ == "__main__":
     b = BeautifulSoup("<a>asd</a>")
     c = HtmlText(b)
     print type(c[0])
-    print align(c, [u"asd"], None)
+    print su.align(c, [u"asd"], None)
     print c[0:1]
diff --git a/web/__init__.py b/web/__init__.py
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/web/__init__.py
@@ -0,0 +1,2 @@
+
+
diff --git a/web/djvu_utils.py b/web/djvu_utils.py
deleted file mode 120000
index 0742170..0000000
--- a/web/djvu_utils.py
+++ /dev/null
@@ -1 +0,0 @@
-../djvu_utils.py
-\ No newline at end of file
diff --git a/web/main.py b/web/server.py
index a6826c1..1e67ad4 100644
--- a/web/main.py
+++ b/web/server.py
@@ -2,22 +2,22 @@ import tornado.httpserver
 from tornado.web import RequestHandler, Application
 import tornado.ioloop
 from settings import settings
-import utils
-from djvu_utils import image_from_book
+from utils.djvu_utils import image_from_book
+from utils.wikisource import gen_html
 import io
 
 class MainHandler(RequestHandler):
 
     def get(self, page_number):
         orig_coords, orig_words, corr_words, align = \
-            utils.gen_html(self.settings["book"], page_number)
+            gen_html(self.settings["book"], page_number)
         self.render("index.html", page_number=page_number, orig_coords=orig_coords,
                     orig_words=orig_words, corr_words=corr_words, align=align)
 
 class ImageHandler(RequestHandler):
 
     def get(self, page_number):
-        im = image_from_book("../" + self.settings["book"], int(page_number))
+        im = image_from_book(self.settings["book"], int(page_number))
         self.set_header('Content-Type', 'image/jpg')
         img_buff = io.BytesIO()
         im.save(img_buff, format="JPEG")
@@ -25,12 +25,10 @@ class ImageHandler(RequestHandler):
         self.write(img_buff.read())
         self.finish()
 
-application = Application([
-    (r'/(\d+)/?', MainHandler),
-    (r'/(\d+)\.jpg/?', ImageHandler)]
-    , **settings)
-
-if __name__ == '__main__':
+def run():
+    application = Application([
+        (r'/(\d+)/?', MainHandler),
+        (r'/(\d+)\.jpg/?', ImageHandler)], **settings)
     http_server = tornado.httpserver.HTTPServer(application)
     http_server.listen(8888)
     print "Listening on 8888"
diff --git a/web/settings.py b/web/settings.py
index 5a8c9aa..32693b8 100644
--- a/web/settings.py
+++ b/web/settings.py
@@ -1,9 +1,9 @@
 settings = {
     "debug": True,
-    "template_path": "templates",
-    "static_path": "static",
+    "template_path": "web/templates",
+    "static_path": "web/static",
     "cookie_secret": "toto",
     "login_url": "/login",
-    #"book": "Bloy_-_Le_Sang_du_pauvre,_Stock,_1932.djvu"
-    "book": "Villiers_de_L'Isle-Adam_-_Tribulat_Bonhomet,_1908.djvu"
+    "book": "Bloy_-_Le_Sang_du_pauvre,_Stock,_1932.djvu"
+    #"book": "Villiers_de_L'Isle-Adam_-_Tribulat_Bonhomet,_1908.djvu"
 }
diff --git a/web/utils.py b/web/utils.py
deleted file mode 100644
index 7e20858..0000000
--- a/web/utils.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import djvu_utils as du
-import sys
-import string_utils as su
-from wikisource import get_page
-
-def gen_html(book, page_number):
-    doc = du.get_document("../" + book)
-    page = doc.pages[int(page_number)-1]
-    d = du.parse_page(page)
-    corrected_text = get_page(book, int(page_number))
-    corrected_words = su.simplify(corrected_text).split()
-    if d:
-        orig_words, orig_coords = zip(*d)
-        C = su.align(corrected_words, list(orig_words), list(orig_coords))
-        corr_words = corrected_text.split()
-        orig_coords_html = du.convert_to_htmlcoord(orig_coords, page.size[1])
-    return orig_coords_html, orig_words, corr_words, C[1]
-
-if __name__ == "__main__":
-    gen_html(*sys.argv[1:3])
author	Guillaume Horel <guillaume.horel@gmail.com>	2014-09-07 18:21:37 -0400
committer	Guillaume Horel <guillaume.horel@gmail.com>	2014-09-07 18:24:08 -0400
commit	0e8b0c88a4d3009cbbea695f606e49faef27f373 (patch)
tree	85a14a7aef3ee36e73544382c6fdec8aa6bd375c
parent	74604d7b8ae98b125f1c800da753f8ab67474eb5 (diff)
download	ocr-layer-curation-0e8b0c88a4d3009cbbea695f606e49faef27f373.tar.gz