diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2013-08-06 01:35:42 +0200 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2013-08-06 01:35:42 +0200 |
| commit | 241e0f3f7cf72f1d771ed0b4651542168b16329e (patch) | |
| tree | d81bb152fbeda8758ec5f4dea01eafb86e532ca8 /string_utils.py | |
| parent | 5213d148a9fadbd0b7425e8c08298e9e74785d9f (diff) | |
| download | ocr-layer-curation-241e0f3f7cf72f1d771ed0b4651542168b16329e.tar.gz | |
Split words which map to two words
Diffstat (limited to 'string_utils.py')
| -rw-r--r-- | string_utils.py | 15 |
1 files changed, 11 insertions, 4 deletions
diff --git a/string_utils.py b/string_utils.py index 3db0a47..ab6d3c1 100644 --- a/string_utils.py +++ b/string_utils.py @@ -140,13 +140,20 @@ def print_alignment(l1, l2, alignment): else: if type(alignment[i]) == tuple: begin, end = alignment[i][0], alignment[i][-1] - merge = True else: begin, end = alignment[i], alignment[i] - merge = False while prev < begin - 1: prev += 1 print u"{0:>25} | {1}".format("", l2[prev]) - print u"{0:>25} | {1:<25} {2}".format(word, join_words(l2[begin:end+1]), - "(M)" if merge else "") prev = end + + if end > begin: + if end == begin + 1: + l, r = cut(word, l2[begin], l2[end]) + print u"(S) {0:>21} | {1:<25}".format(l, l2[begin]) + print u"(S) {0:>21} | {1:<25}".format(r, l2[end]) + else: + print u"{0:>25} | {1:<25} (M)".format(word, + join_words(l2[begin:end+1])) + else: + print u"{0:>25} | {1:<25}".format(word, l2[begin]) |
