import sys
from bs4 import BeautifulSoup
import djvu
from djvu.decode import Context
from itertools import chain
import collections
from PIL import Image

def parse_page(page):
    s = page.text.sexpr

    def aux(s, page_size):
        if type(s) is djvu.sexpr.ListExpression:
            if len(s) == 0:
                pass
            if str(s[0].value) == "word":
                c = [s[i].value for i in xrange(1, 5)]
                coords = ",".join(map(str, [c[0], page_size -c[3],
                                            c[2], page_size - c[1]]))
                word = s[5].value
                yield (word.decode("utf-8"), coords)
            else:
                for c in chain.from_iterable(aux(child, page_size) for child in s[5:]):
                    yield c
        else:
            pass
    return aux(s, page.size[1]) if s else None

def get_document(djvufile):
    c = Context()
    document = c.new_document(djvu.decode.FileURI(djvufile))
    document.decoding_job.wait()
    return document

def parse_book(djvubook, page=None):
    """
    returns the list of words and coordinates from a djvu book.
    if page is None, returns the whole book.
    """
    document = get_document(djvubook)

    if type(page) is int:
        toparse = [document.pages[page - 1]]
    elif isinstance(page, collections.Iterable):
        toparse = [document.pages[p - 1] for p in page]
    else:
        toparse = document.pages

    return [parse_page(page) for page in toparse]

def image_from_book(djvubook, page):
    document = get_document(djvubook)
    mode = djvu.decode.RENDER_COLOR
    djvu_pixel_format = djvu.decode.PixelFormatRgb()
    page = document.pages[page-1]
    page_job = page.decode(wait=True)
    width, height = page_job.size
    rect = (0, 0, width, height)
    buf = page_job.render(mode, rect, rect, djvu_pixel_format)
    return Image.frombuffer("RGB", (width, height), buf, 'raw', 'RGB', 0, -1)

if __name__ == "__main__":
    book = parse_book(sys.argv[1], page=[10,11], html=True)
    im = image_from_book(sys.argv[1], 11)
    im.save("test.jpeg")