From 424aa29f600bc17c8391a3802206385962648519 Mon Sep 17 00:00:00 2001
From: Guillaume Horel <guillaume.horel@gmail.com>
Date: Sat, 1 Mar 2014 15:24:31 -0500
Subject: reorganize djvu_utils a bit

---
 djvu_utils.py        | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++
 parsedjvutext.py     | 69 ----------------------------------------------------
 web/djvu_utils.py    |  1 +
 web/parsedjvutext.py |  1 -
 4 files changed, 68 insertions(+), 70 deletions(-)
 create mode 100644 djvu_utils.py
 delete mode 100644 parsedjvutext.py
 create mode 120000 web/djvu_utils.py
 delete mode 120000 web/parsedjvutext.py

diff --git a/djvu_utils.py b/djvu_utils.py
new file mode 100644
index 0000000..21692a1
--- /dev/null
+++ b/djvu_utils.py
@@ -0,0 +1,67 @@
+import sys
+from bs4 import BeautifulSoup
+import djvu
+from djvu.decode import Context
+from itertools import chain
+import collections
+from PIL import Image
+
+def parse_page(page):
+    s = page.text.sexpr
+
+    def aux(s):
+        if type(s) is djvu.sexpr.ListExpression:
+            if len(s) == 0:
+                pass
+            if str(s[0].value) == "word":
+                coords = [s[i].value for i in xrange(1, 5)]
+                word = s[5].value
+                yield (word.decode("utf-8"), coords)
+            else:
+                for c in chain.from_iterable(aux(child) for child in s[5:]):
+                    yield c
+        else:
+            pass
+    return aux(s) if s else None
+
+def convert_to_htmlcoord(coords, page_size):
+    return [",".join(map(str, [c[0], page_size - c[3],
+                               c[2], page_size - c[1]])) for c in coords]
+
+def get_document(djvufile):
+    c = Context()
+    document = c.new_document(djvu.decode.FileURI(djvufile))
+    document.decoding_job.wait()
+    return document
+
+def parse_book(djvubook, page=None):
+    """
+    returns the list of words and coordinates from a djvu book.
+    if page is None, returns the whole book.
+    """
+    document = get_document(djvubook)
+
+    if type(page) is int:
+        toparse = [document.pages[page - 1]]
+    elif isinstance(page, collections.Iterable):
+        toparse = [document.pages[p - 1] for p in page]
+    else:
+        toparse = document.pages
+
+    return [parse_page(page) for page in toparse]
+
+def image_from_book(djvubook, page):
+    document = get_document(djvubook)
+    mode = djvu.decode.RENDER_COLOR
+    djvu_pixel_format = djvu.decode.PixelFormatRgb()
+    page = document.pages[page-1]
+    page_job = page.decode(wait=True)
+    width, height = page_job.size
+    rect = (0, 0, width, height)
+    buf = page_job.render(mode, rect, rect, djvu_pixel_format)
+    return Image.frombuffer("RGB", (width, height), buf, 'raw', 'RGB', 0, -1)
+
+if __name__ == "__main__":
+    book = parse_book(sys.argv[1], page=[10,11], html=True)
+    im = image_from_book(sys.argv[1], 11)
+    im.save("test.jpeg")
diff --git a/parsedjvutext.py b/parsedjvutext.py
deleted file mode 100644
index 6bd9950..0000000
--- a/parsedjvutext.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import sys
-from bs4 import BeautifulSoup
-import djvu
-from djvu.decode import Context
-from itertools import chain
-import collections
-from PIL import Image
-
-def parse_page(page, html=False):
-    s, page_size = page.text.sexpr, page.size[1]
-
-    def aux(s, html):
-        if type(s) is djvu.sexpr.ListExpression:
-            if len(s) == 0:
-                pass
-            if str(s[0].value) == "word":
-                if html:
-                    coords = (s[1].value, page_size - s[4].value,
-                              s[3].value, page_size - s[2].value)
-                    coords = ",".join(map(str,coords))
-                else:
-                    coords = [s[i].value for i in xrange(1, 5)]
-                word = s[5].value
-                yield (word.decode("utf-8"), coords)
-            else:
-                for c in chain.from_iterable(aux(child, html) for child in s[5:]):
-                    yield c
-        else:
-            pass
-    return aux(s, html) if s else None
-
-def get_document(djvufile):
-    c = Context()
-    document = c.new_document(djvu.decode.FileURI(djvufile))
-    document.decoding_job.wait()
-    return document
-
-def parse_book(djvubook, page=None, html=False):
-    """
-    returns the list of words and coordinates from a djvu book.
-    if page is None, returns the whole book.
-    if html is True, coordinates are computed from the bottom of the page
-    """
-    document = get_document(djvubook)
-
-    if type(page) is int:
-        toparse = [document.pages[page - 1]]
-    elif isinstance(page, collections.Iterable):
-        toparse = [document.pages[p - 1] for p in page]
-    else:
-        toparse = document.pages
-
-    return [parse_page(page, html=html) for page in toparse]
-
-def image_from_book(djvubook, page):
-    document = get_document(djvubook)
-    mode = djvu.decode.RENDER_COLOR
-    djvu_pixel_format = djvu.decode.PixelFormatRgb()
-    page = document.pages[page-1]
-    page_job = page.decode(wait=True)
-    width, height = page_job.size
-    rect = (0, 0, width, height)
-    buf = page_job.render(mode, rect, rect, djvu_pixel_format)
-    return Image.frombuffer("RGB", (width, height), buf, 'raw', 'RGB', 0, -1)
-
-if __name__ == "__main__":
-    book = parse_book(sys.argv[1], page=[10,11], html=True)
-    im = image_from_book(sys.argv[1], 11)
-    im.save("test.webp")
diff --git a/web/djvu_utils.py b/web/djvu_utils.py
new file mode 120000
index 0000000..0742170
--- /dev/null
+++ b/web/djvu_utils.py
@@ -0,0 +1 @@
+../djvu_utils.py
\ No newline at end of file
diff --git a/web/parsedjvutext.py b/web/parsedjvutext.py
deleted file mode 120000
index e07ccf8..0000000
--- a/web/parsedjvutext.py
+++ /dev/null
@@ -1 +0,0 @@
-../parsedjvutext.py
\ No newline at end of file
-- 
cgit v1.3.1