aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2014-09-07 16:01:05 -0400
committerThibaut Horel <thibaut.horel@gmail.com>2014-09-07 16:01:05 -0400
commitcd51d7e0d763ed669a2a45555d64d81e3b2478a1 (patch)
tree52d27202e5971f1972c2a3e3430913b93f55b9dd
parentd28394833d54a68f5ca13d2edaa261128f6c5170 (diff)
parent6283b6582960544dc02e438e739775e3239b802c (diff)
downloadocr-layer-curation-cd51d7e0d763ed669a2a45555d64d81e3b2478a1.tar.gz
Merge branch 'master' of horel.org:thibaut/ocr-layer-curation
Conflicts: web/static/css/style.css web/templates/index.html web/utils.py
-rw-r--r--string_utils.py33
-rw-r--r--web/main.py7
-rw-r--r--web/static/css/style.css11
-rw-r--r--web/static/js/main.js26
-rw-r--r--web/templates/index.html31
-rw-r--r--web/utils.py10
6 files changed, 80 insertions, 38 deletions
diff --git a/string_utils.py b/string_utils.py
index a8a38c0..0588418 100644
--- a/string_utils.py
+++ b/string_utils.py
@@ -156,18 +156,31 @@ def print_alignment(l1, l2, c2, alignment):
for word in l2:
print u"{0:>25} | {1}".format("", word)
-def alignment_to_sexp(l1, l2, c2, alignment):
+def invert_align(alignment, n):
+ l = [[] for _ in range(n)]
+ for i, e in enumerate(alignment):
+ for a in e:
+ l[a].append(i)
+ return l
+
+def alignment_to_coord(l1, alignment):
+ # l1 list of corrected words
+ # alignment list of size len(l1) qui mappe mots dans l2
+ # returns indices in l2
+
r = []
prev = 0
for index, g in itertools.groupby(zip(l1, alignment), lambda x:x[1]):
word = " ".join([a[0] for a in g])
- if not index:
- r.append([word, []])
- else:
- begin, end = index[0], index[-1]
- if end > begin:
- #need to find a better way to get the box coordinates
- r.append([word, c2[begin]])
- else:
- r.append([word, c2[begin]])
+ r.append([word, index])
+ # if not index:
+ # r.append([word, None])
+ # else:
+
+ # begin, end = index[0], index[-1]
+ # if end > begin:
+ # #need to find a better way to get the box coordinates
+ # r.append([word, begin])
+ # else:
+ # r.append([word, begin])
return r
diff --git a/web/main.py b/web/main.py
index 206eb86..a6826c1 100644
--- a/web/main.py
+++ b/web/main.py
@@ -9,9 +9,10 @@ import io
class MainHandler(RequestHandler):
def get(self, page_number):
- areas, words = utils.gen_html(self.settings["book"], page_number)
- self.render("index.html", page_number=page_number,
- areas=areas, words=words)
+ orig_coords, orig_words, corr_words, align = \
+ utils.gen_html(self.settings["book"], page_number)
+ self.render("index.html", page_number=page_number, orig_coords=orig_coords,
+ orig_words=orig_words, corr_words=corr_words, align=align)
class ImageHandler(RequestHandler):
diff --git a/web/static/css/style.css b/web/static/css/style.css
index ec73fd0..b293da8 100644
--- a/web/static/css/style.css
+++ b/web/static/css/style.css
@@ -27,4 +27,15 @@ span:hover {
padding: 3.5em 2em;
font-size: 18px;
line-height: 180%;
+
+#texte-non-corrige {
+ margin-top:0cm;
+ width: 10cm;
+ float: left;
+}
+
+#texte-corrige {
+ margin-top:0cm;
+ width: 10cm;
+ float: left;
}
diff --git a/web/static/js/main.js b/web/static/js/main.js
index ff09fd1..c790e34 100644
--- a/web/static/js/main.js
+++ b/web/static/js/main.js
@@ -1,19 +1,23 @@
$(document).ready(function() {
$('#page').mapster({
- mapKey: 'data-id',
- fillColor: 'ff0000',
- fillOpacity: 0.3,
- onMouseover: function (e) {
- $("#" + e.key).addClass("selected");
- },
- onMouseout: function (e) {
- $("#" + e.key).removeClass("selected");
- }
-
+ mapKey: 'data-orig',
+ fillColor: 'ff0000',
+ fillOpacity: 0.3,
+ onMouseover: function (e) {
+ $("#" + "orig-" + e.key).addClass("selected");
+ $("#" + "corr-" + e.key).addClass("selected");
+ },
+ onMouseout: function (e) {
+ $("#" + "orig-" + e.key).removeClass("selected");
+ $("#" + "corr-" + e.key).removeClass("selected");
+ }
+
}).mapster('resize', 500);
$("span").mouseenter(function() {
- $('area[data-id='+$(this).attr("id")+']').mapster("highlight");
+ $(this).attr("id").replace(/\D+/,"").split(",").map(function(e){
+ $('area[data-orig='+e+']').mapster("highlight");
+ })
});
$("span").mouseout(function() {
diff --git a/web/templates/index.html b/web/templates/index.html
index dc286aa..0a07be1 100644
--- a/web/templates/index.html
+++ b/web/templates/index.html
@@ -1,13 +1,26 @@
{% extends "layout.html" %}
{% block main %}
- <div id="image_container">
- <img id="page" src="{{page_number}}.jpg" usemap="#wordmap" />
- </div>
- <map name="wordmap">{% for id, coords in areas %}
- <area href="#" shape="rect" coords="{{coords}}" data-id="word-{{id}}" />{% end %}
- </map>
- <div id="text">
- {% raw words %}
- </div>
+<div id="image_container">
+ <img id="page" src="{{page_number}}.jpg" usemap="#wordmap" />
+</div>
+<map name="wordmap">
+ {% for i, coord in enumerate(orig_coords) %}
+ <area href="#" shape="rect" coords="{{coord}}" data-orig="{{i}}" />
+ {# data-corr="{{",".join(map(str,corr_coords_index[i]))}}" #}
+ {% end %}
+</map>
+
+<div id="texte-non-corrige">
+ <h3>Texte original</h3>
+ {% for i, word in enumerate(orig_words) %}
+ <span id="orig-{{i}}">{{word}}</span>
+ {% end %}
+</div>
+<div id="texte-corrige">
+ <h3>Texte corrigé</h3>
+ {% for i, word in enumerate(corr_words) %}
+ <span id="corr-{{",".join(map(str,align[i]))}}">{{word}}</span>
+ {% end %}
+</div>
{% end %}
diff --git a/web/utils.py b/web/utils.py
index 1947f8b..e6f4309 100644
--- a/web/utils.py
+++ b/web/utils.py
@@ -10,11 +10,11 @@ def gen_html(book, page_number):
d = du.parse_page(page)
elem, corrected_text = get_page2(open("test.txt").read())
if d:
- words, coords = zip(*d)
- C = su.align(corrected_text.split(), list(words), list(coords))
- coords = [coords[e[0]] for e in C[1]]
- coords_html = du.convert_to_htmlcoord(coords, page.size[1])
- return (list(enumerate(coords_html)), str(elem))
+ orig_words, orig_coords = zip(*d)
+ C = su.align(corrected_words, list(orig_words), list(orig_coords))
+ corr_words = corrected_text.split()
+ orig_coords_html = du.convert_to_htmlcoord(orig_coords, page.size[1])
+ return orig_coords_html, orig_words, corr_words, C[1]
if __name__ == "__main__":
gen_html(*sys.argv[1:3])