aboutsummaryrefslogtreecommitdiffstats
path: root/string_utils.py
diff options
context:
space:
mode:
authorGuillaume Horel <guillaume.horel@gmail.com>2014-07-29 18:50:09 -0400
committerGuillaume Horel <guillaume.horel@gmail.com>2014-07-29 18:50:09 -0400
commit2ab5da4bfdce9f41e93a27082900da6ea72db6ed (patch)
treebdbe27d77fea5f77e0b6a8934ab01391f03a8057 /string_utils.py
parentdfcd65c8f10aa94f19fe40940565681ab9a73e44 (diff)
downloadocr-layer-curation-2ab5da4bfdce9f41e93a27082900da6ea72db6ed.tar.gz
Webapp now show three columns
image, original text and corrected text. The highlighting is functional as well.
Diffstat (limited to 'string_utils.py')
-rw-r--r--string_utils.py12
1 files changed, 8 insertions, 4 deletions
diff --git a/string_utils.py b/string_utils.py
index a8a38c0..7120219 100644
--- a/string_utils.py
+++ b/string_utils.py
@@ -156,18 +156,22 @@ def print_alignment(l1, l2, c2, alignment):
for word in l2:
print u"{0:>25} | {1}".format("", word)
-def alignment_to_sexp(l1, l2, c2, alignment):
+def alignment_to_coord(l1, alignment):
+ # l1 list of corrected words
+ # alignment list of size len(l1) qui mappe mots dans l2
+ # returns indices in l2
+
r = []
prev = 0
for index, g in itertools.groupby(zip(l1, alignment), lambda x:x[1]):
word = " ".join([a[0] for a in g])
if not index:
- r.append([word, []])
+ r.append([word, None])
else:
begin, end = index[0], index[-1]
if end > begin:
#need to find a better way to get the box coordinates
- r.append([word, c2[begin]])
+ r.append([word, begin])
else:
- r.append([word, c2[begin]])
+ r.append([word, begin])
return r