diff options
| author | Guillaume Horel <guillaume.horel@gmail.com> | 2013-08-03 17:31:08 -0400 |
|---|---|---|
| committer | Guillaume Horel <guillaume.horel@gmail.com> | 2013-08-03 17:31:08 -0400 |
| commit | 8b9977bc8cbf4b0c2bc90eb32ec3c78c91c5395c (patch) | |
| tree | eff17b383bc703a63f4ce6c14532e48ff90f2c80 /compare.py | |
| parent | 277b70c538a00583485011a4aeda2b08618d1b6e (diff) | |
| download | ocr-layer-curation-8b9977bc8cbf4b0c2bc90eb32ec3c78c91c5395c.tar.gz | |
preliminary version of compare
Diffstat (limited to 'compare.py')
| -rw-r--r-- | compare.py | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/compare.py b/compare.py new file mode 100644 index 0000000..4fcacd0 --- /dev/null +++ b/compare.py @@ -0,0 +1,15 @@ +import pdb +from wikisource import get_page +from parsedjvutext import parse_book +import lcs + +wikibook = "Villiers de L'Isle-Adam - Tribulat Bonhomet, 1908.djvu" +ocrbook = "Tribulat Bonhomet.xml" + +ocrbook = parse_book(ocrbook) + +n = 14 +l1 = ocrbook['words'][n] +l2 = get_page(wikibook, n+1).split() +C = lcs.LCS(l1, l2) +lcs.printDiff(C, l1, l2, len(l1), len(l2)) |
