aboutsummaryrefslogtreecommitdiffstats
path: root/compare.py
diff options
context:
space:
mode:
authorGuillaume Horel <guillaume.horel@gmail.com>2013-08-03 17:31:08 -0400
committerGuillaume Horel <guillaume.horel@gmail.com>2013-08-03 17:31:08 -0400
commit8b9977bc8cbf4b0c2bc90eb32ec3c78c91c5395c (patch)
treeeff17b383bc703a63f4ce6c14532e48ff90f2c80 /compare.py
parent277b70c538a00583485011a4aeda2b08618d1b6e (diff)
downloadocr-layer-curation-8b9977bc8cbf4b0c2bc90eb32ec3c78c91c5395c.tar.gz
preliminary version of compare
Diffstat (limited to 'compare.py')
-rw-r--r--compare.py15
1 files changed, 15 insertions, 0 deletions
diff --git a/compare.py b/compare.py
new file mode 100644
index 0000000..4fcacd0
--- /dev/null
+++ b/compare.py
@@ -0,0 +1,15 @@
+import pdb
+from wikisource import get_page
+from parsedjvutext import parse_book
+import lcs
+
+wikibook = "Villiers de L'Isle-Adam - Tribulat Bonhomet, 1908.djvu"
+ocrbook = "Tribulat Bonhomet.xml"
+
+ocrbook = parse_book(ocrbook)
+
+n = 14
+l1 = ocrbook['words'][n]
+l2 = get_page(wikibook, n+1).split()
+C = lcs.LCS(l1, l2)
+lcs.printDiff(C, l1, l2, len(l1), len(l2))