diff options
Diffstat (limited to 'string_utils.py')
| -rw-r--r-- | string_utils.py | 12 |
1 files changed, 8 insertions, 4 deletions
diff --git a/string_utils.py b/string_utils.py index a8a38c0..7120219 100644 --- a/string_utils.py +++ b/string_utils.py @@ -156,18 +156,22 @@ def print_alignment(l1, l2, c2, alignment): for word in l2: print u"{0:>25} | {1}".format("", word) -def alignment_to_sexp(l1, l2, c2, alignment): +def alignment_to_coord(l1, alignment): + # l1 list of corrected words + # alignment list of size len(l1) qui mappe mots dans l2 + # returns indices in l2 + r = [] prev = 0 for index, g in itertools.groupby(zip(l1, alignment), lambda x:x[1]): word = " ".join([a[0] for a in g]) if not index: - r.append([word, []]) + r.append([word, None]) else: begin, end = index[0], index[-1] if end > begin: #need to find a better way to get the box coordinates - r.append([word, c2[begin]]) + r.append([word, begin]) else: - r.append([word, c2[begin]]) + r.append([word, begin]) return r |
