aboutsummaryrefslogtreecommitdiffstats
path: root/string_utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'string_utils.py')
-rw-r--r--string_utils.py33
1 files changed, 23 insertions, 10 deletions
diff --git a/string_utils.py b/string_utils.py
index a8a38c0..0588418 100644
--- a/string_utils.py
+++ b/string_utils.py
@@ -156,18 +156,31 @@ def print_alignment(l1, l2, c2, alignment):
for word in l2:
print u"{0:>25} | {1}".format("", word)
-def alignment_to_sexp(l1, l2, c2, alignment):
+def invert_align(alignment, n):
+ l = [[] for _ in range(n)]
+ for i, e in enumerate(alignment):
+ for a in e:
+ l[a].append(i)
+ return l
+
+def alignment_to_coord(l1, alignment):
+ # l1 list of corrected words
+ # alignment list of size len(l1) qui mappe mots dans l2
+ # returns indices in l2
+
r = []
prev = 0
for index, g in itertools.groupby(zip(l1, alignment), lambda x:x[1]):
word = " ".join([a[0] for a in g])
- if not index:
- r.append([word, []])
- else:
- begin, end = index[0], index[-1]
- if end > begin:
- #need to find a better way to get the box coordinates
- r.append([word, c2[begin]])
- else:
- r.append([word, c2[begin]])
+ r.append([word, index])
+ # if not index:
+ # r.append([word, None])
+ # else:
+
+ # begin, end = index[0], index[-1]
+ # if end > begin:
+ # #need to find a better way to get the box coordinates
+ # r.append([word, begin])
+ # else:
+ # r.append([word, begin])
return r