diff options
Diffstat (limited to 'compare.py')
| -rw-r--r-- | compare.py | 19 |
1 files changed, 8 insertions, 11 deletions
@@ -1,15 +1,12 @@ -import pdb from wikisource import get_page -from parsedjvutext import parse_book -import lcs +from parsedjvutext import parse_page_sexp +from string_utils import LCS, printDiff wikibook = "Villiers de L'Isle-Adam - Tribulat Bonhomet, 1908.djvu" -ocrbook = "Tribulat Bonhomet.xml" -ocrbook = parse_book(ocrbook) - -n = 14 -l1 = ocrbook['words'][n] -l2 = get_page(wikibook, n+1).split() -C = lcs.LCS(l1, l2) -lcs.printDiff(C, l1, l2, len(l1), len(l2)) +n = 42 +ocrpage = parse_page_sexp(wikibook, n) +l1 = ocrpage['words'] +l2 = get_page(wikibook, n).split() +C = LCS(l1, l2) +printDiff(C, l1, l2, len(l1), len(l2)) |
