aboutsummaryrefslogtreecommitdiffstats
path: root/string_utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'string_utils.py')
-rw-r--r--string_utils.py12
1 files changed, 8 insertions, 4 deletions
diff --git a/string_utils.py b/string_utils.py
index a8a38c0..7120219 100644
--- a/string_utils.py
+++ b/string_utils.py
@@ -156,18 +156,22 @@ def print_alignment(l1, l2, c2, alignment):
for word in l2:
print u"{0:>25} | {1}".format("", word)
-def alignment_to_sexp(l1, l2, c2, alignment):
+def alignment_to_coord(l1, alignment):
+ # l1 list of corrected words
+ # alignment list of size len(l1) qui mappe mots dans l2
+ # returns indices in l2
+
r = []
prev = 0
for index, g in itertools.groupby(zip(l1, alignment), lambda x:x[1]):
word = " ".join([a[0] for a in g])
if not index:
- r.append([word, []])
+ r.append([word, None])
else:
begin, end = index[0], index[-1]
if end > begin:
#need to find a better way to get the box coordinates
- r.append([word, c2[begin]])
+ r.append([word, begin])
else:
- r.append([word, c2[begin]])
+ r.append([word, begin])
return r