From 8b9977bc8cbf4b0c2bc90eb32ec3c78c91c5395c Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Sat, 3 Aug 2013 17:31:08 -0400 Subject: preliminary version of compare --- compare.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 compare.py (limited to 'compare.py') diff --git a/compare.py b/compare.py new file mode 100644 index 0000000..4fcacd0 --- /dev/null +++ b/compare.py @@ -0,0 +1,15 @@ +import pdb +from wikisource import get_page +from parsedjvutext import parse_book +import lcs + +wikibook = "Villiers de L'Isle-Adam - Tribulat Bonhomet, 1908.djvu" +ocrbook = "Tribulat Bonhomet.xml" + +ocrbook = parse_book(ocrbook) + +n = 14 +l1 = ocrbook['words'][n] +l2 = get_page(wikibook, n+1).split() +C = lcs.LCS(l1, l2) +lcs.printDiff(C, l1, l2, len(l1), len(l2)) -- cgit v1.2.3-70-g09d2