aboutsummaryrefslogtreecommitdiffstats
path: root/compare.py
blob: 4fcacd034271a908703e2d66bd10af166db6a063 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import pdb
from wikisource import get_page
from parsedjvutext import parse_book
import lcs

wikibook = "Villiers de L'Isle-Adam - Tribulat Bonhomet, 1908.djvu"
ocrbook = "Tribulat Bonhomet.xml"

ocrbook = parse_book(ocrbook)

n = 14
l1 = ocrbook['words'][n]
l2 = get_page(wikibook, n+1).split()
C = lcs.LCS(l1, l2)
lcs.printDiff(C, l1, l2, len(l1), len(l2))