blob: 4fcacd034271a908703e2d66bd10af166db6a063 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
|
import pdb
from wikisource import get_page
from parsedjvutext import parse_book
import lcs
wikibook = "Villiers de L'Isle-Adam - Tribulat Bonhomet, 1908.djvu"
ocrbook = "Tribulat Bonhomet.xml"
ocrbook = parse_book(ocrbook)
n = 14
l1 = ocrbook['words'][n]
l2 = get_page(wikibook, n+1).split()
C = lcs.LCS(l1, l2)
lcs.printDiff(C, l1, l2, len(l1), len(l2))
|