diff options
| author | Guillaume Horel <guillaume.horel@gmail.com> | 2013-12-28 10:13:29 -0500 |
|---|---|---|
| committer | Guillaume Horel <guillaume.horel@gmail.com> | 2013-12-28 10:13:29 -0500 |
| commit | 5ad22522df6b4b725fa7fdb46ff6c78d627775a5 (patch) | |
| tree | eea4db1155925a05925505c8c6437fd7635b9c96 /compare.py | |
| parent | 5dc1322e4c7e78ca98e3ad910f816ad45ac7bfd8 (diff) | |
| parent | 8dd400ab39e84afc13afba3acd15aa5f6918f03f (diff) | |
| download | ocr-layer-curation-5ad22522df6b4b725fa7fdb46ff6c78d627775a5.tar.gz | |
Merge branch 'refactor_align' of horel.org:thibaut/ocr-layer-curation into refactor_align
Diffstat (limited to 'compare.py')
| -rw-r--r-- | compare.py | 11 |
1 files changed, 11 insertions, 0 deletions
@@ -14,8 +14,19 @@ l1, c1 = ocrpage['words'], ocrpage["coords"] l2 = get_page(wikibook, n) print len(l2.split()) l3 = su.simplify(l2) + +def del_cost1(w, pos): + return 50 + +def del_cost2(w, pos): + return 1+3*len([c for c in w if c.isalnum()]) +bactrack1 = 8 +backtrack2 = 5 + C = su.align(l3.split(), l1, c1) pdb.set_trace() +su.print_alignment(l2.split(), l1, c1, C[1]) + sexp = page_sexp(wikibook, n) su.alignment_to_sexp(C[1], sexp, l2.split()) su.print_alignment(l2.split(), l1, c1, C[1]) |
