aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--compare.py19
-rw-r--r--string_utils.py2
2 files changed, 10 insertions, 11 deletions
diff --git a/compare.py b/compare.py
index 4fcacd0..aeb9092 100644
--- a/compare.py
+++ b/compare.py
@@ -1,15 +1,12 @@
-import pdb
from wikisource import get_page
-from parsedjvutext import parse_book
-import lcs
+from parsedjvutext import parse_page_sexp
+from string_utils import LCS, printDiff
wikibook = "Villiers de L'Isle-Adam - Tribulat Bonhomet, 1908.djvu"
-ocrbook = "Tribulat Bonhomet.xml"
-ocrbook = parse_book(ocrbook)
-
-n = 14
-l1 = ocrbook['words'][n]
-l2 = get_page(wikibook, n+1).split()
-C = lcs.LCS(l1, l2)
-lcs.printDiff(C, l1, l2, len(l1), len(l2))
+n = 42
+ocrpage = parse_page_sexp(wikibook, n)
+l1 = ocrpage['words']
+l2 = get_page(wikibook, n).split()
+C = LCS(l1, l2)
+printDiff(C, l1, l2, len(l1), len(l2))
diff --git a/string_utils.py b/string_utils.py
index 81f448f..8b7a3a3 100644
--- a/string_utils.py
+++ b/string_utils.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
def levenshtein(word1, word2):
"""Return triplet of number of (substitutions, insertions, deletions) to
transform word1 into word2.