aboutsummaryrefslogtreecommitdiffstats
path: root/parsedjvutext.py
diff options
context:
space:
mode:
authorGuillaume Horel <guillaume.horel@gmail.com>2013-08-17 19:01:24 -0400
committerGuillaume Horel <guillaume.horel@gmail.com>2013-08-17 19:01:24 -0400
commit1d53df7c99126679d391a1efc96b30aa3848b4d3 (patch)
tree2d2977bac09ef557b12faa8f8a3a3707f37ec128 /parsedjvutext.py
parentf633ea807ef9a83f84f41767e71fad71656a4439 (diff)
downloadocr-layer-curation-1d53df7c99126679d391a1efc96b30aa3848b4d3.tar.gz
add function for converting alignment to sexp
Diffstat (limited to 'parsedjvutext.py')
-rw-r--r--parsedjvutext.py9
1 files changed, 6 insertions, 3 deletions
diff --git a/parsedjvutext.py b/parsedjvutext.py
index 3d4ee96..773a1d4 100644
--- a/parsedjvutext.py
+++ b/parsedjvutext.py
@@ -33,11 +33,14 @@ def parse_wordline(line):
coords = map(int, line[1:5])
return word, coords
-def parse_page_sexp(djvubook, pagenumber):
+def page_sexp(djvubook, pagenumber):
args = ["djvused", "-e", "select {0};print-txt".format(pagenumber),
djvubook]
- page = [parse_wordline(line) for line in \
- subprocess.check_output(args).split("\n") if "word" in line]
+ return subprocess.check_output(args).split("\n")
+
+def parse_page_sexp(djvubook, pagenumber):
+ page = [parse_wordline(line) for line in page_sexp(djvubook, pagenumber) \
+ if "word" in line]
return {"words": [a for a, b in page], "coords": [b for a, b in page]}
def parse_book_sexp(djvubook):