From 80133c8d2d0a7334e8f53a11eaa48a8d7b70c1da Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Mon, 5 Aug 2013 00:05:59 -0400 Subject: use new functions in compare.py --- compare.py | 19 ++++++++----------- string_utils.py | 2 ++ 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/compare.py b/compare.py index 4fcacd0..aeb9092 100644 --- a/compare.py +++ b/compare.py @@ -1,15 +1,12 @@ -import pdb from wikisource import get_page -from parsedjvutext import parse_book -import lcs +from parsedjvutext import parse_page_sexp +from string_utils import LCS, printDiff wikibook = "Villiers de L'Isle-Adam - Tribulat Bonhomet, 1908.djvu" -ocrbook = "Tribulat Bonhomet.xml" -ocrbook = parse_book(ocrbook) - -n = 14 -l1 = ocrbook['words'][n] -l2 = get_page(wikibook, n+1).split() -C = lcs.LCS(l1, l2) -lcs.printDiff(C, l1, l2, len(l1), len(l2)) +n = 42 +ocrpage = parse_page_sexp(wikibook, n) +l1 = ocrpage['words'] +l2 = get_page(wikibook, n).split() +C = LCS(l1, l2) +printDiff(C, l1, l2, len(l1), len(l2)) diff --git a/string_utils.py b/string_utils.py index 81f448f..8b7a3a3 100644 --- a/string_utils.py +++ b/string_utils.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + def levenshtein(word1, word2): """Return triplet of number of (substitutions, insertions, deletions) to transform word1 into word2. -- cgit v1.2.3-70-g09d2