aboutsummaryrefslogtreecommitdiffstats
path: root/string_utils.py
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2014-09-07 16:01:05 -0400
committerThibaut Horel <thibaut.horel@gmail.com>2014-09-07 16:01:05 -0400
commitcd51d7e0d763ed669a2a45555d64d81e3b2478a1 (patch)
tree52d27202e5971f1972c2a3e3430913b93f55b9dd /string_utils.py
parentd28394833d54a68f5ca13d2edaa261128f6c5170 (diff)
parent6283b6582960544dc02e438e739775e3239b802c (diff)
downloadocr-layer-curation-cd51d7e0d763ed669a2a45555d64d81e3b2478a1.tar.gz
Merge branch 'master' of horel.org:thibaut/ocr-layer-curation
Conflicts: web/static/css/style.css web/templates/index.html web/utils.py
Diffstat (limited to 'string_utils.py')
-rw-r--r--string_utils.py33
1 files changed, 23 insertions, 10 deletions
diff --git a/string_utils.py b/string_utils.py
index a8a38c0..0588418 100644
--- a/string_utils.py
+++ b/string_utils.py
@@ -156,18 +156,31 @@ def print_alignment(l1, l2, c2, alignment):
for word in l2:
print u"{0:>25} | {1}".format("", word)
-def alignment_to_sexp(l1, l2, c2, alignment):
+def invert_align(alignment, n):
+ l = [[] for _ in range(n)]
+ for i, e in enumerate(alignment):
+ for a in e:
+ l[a].append(i)
+ return l
+
+def alignment_to_coord(l1, alignment):
+ # l1 list of corrected words
+ # alignment list of size len(l1) qui mappe mots dans l2
+ # returns indices in l2
+
r = []
prev = 0
for index, g in itertools.groupby(zip(l1, alignment), lambda x:x[1]):
word = " ".join([a[0] for a in g])
- if not index:
- r.append([word, []])
- else:
- begin, end = index[0], index[-1]
- if end > begin:
- #need to find a better way to get the box coordinates
- r.append([word, c2[begin]])
- else:
- r.append([word, c2[begin]])
+ r.append([word, index])
+ # if not index:
+ # r.append([word, None])
+ # else:
+
+ # begin, end = index[0], index[-1]
+ # if end > begin:
+ # #need to find a better way to get the box coordinates
+ # r.append([word, begin])
+ # else:
+ # r.append([word, begin])
return r