aboutsummaryrefslogtreecommitdiffstats
path: root/compare.py
blob: c37cde9c9aad3bf033ddd36b60e80b8c52f62a19 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# -*- coding: utf-8 -*-

from wikisource import get_page
from parsedjvutext import parse_page_sexp
import string_utils as su

wikibook = "Bloy - Le Sang du pauvre, Stock, 1932.djvu".replace(" ", "_")
# wikibook = "Villiers de L'Isle-Adam - Tribulat Bonhomet, 1908.djvu".replace(" ", "_")

n = 79
ocrpage = parse_page_sexp(wikibook, n)
l1 = ocrpage['words']
l2 = get_page(wikibook, n).replace(u"’", u"'").split()
C = su.align(l2, l1)
su.print_alignment(l2, l1, C[1])