diff options
| author | Guillaume Horel <guillaume.horel@serenitascapital.com> | 2013-08-21 17:35:51 -0400 |
|---|---|---|
| committer | Guillaume Horel <guillaume.horel@serenitascapital.com> | 2013-08-21 17:35:51 -0400 |
| commit | 8dd400ab39e84afc13afba3acd15aa5f6918f03f (patch) | |
| tree | c886a93b53ce595ed797d5f6bbfe17ce257c2c2c /compare.py | |
| parent | d295c767717874045aab27d30759fd3ec7ed49fa (diff) | |
| download | ocr-layer-curation-8dd400ab39e84afc13afba3acd15aa5f6918f03f.tar.gz | |
begin refactoring
Diffstat (limited to 'compare.py')
| -rw-r--r-- | compare.py | 11 |
1 files changed, 11 insertions, 0 deletions
@@ -14,8 +14,19 @@ l1, c1 = ocrpage['words'], ocrpage["coords"] l2 = get_page(wikibook, n) print len(l2.split()) l3 = su.simplify(l2) + +def del_cost1(w, pos): + return 50 + +def del_cost2(w, pos): + return 1+3*len([c for c in w if c.isalnum()]) +bactrack1 = 8 +backtrack2 = 5 + C = su.align(l3.split(), l1, c1) pdb.set_trace() +su.print_alignment(l2.split(), l1, c1, C[1]) + sexp = page_sexp(wikibook, n) su.alignment_to_sexp(C[1], sexp, l2.split()) su.print_alignment(l2.split(), l1, c1, C[1]) |
