aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--parsepdftext.py3
1 files changed, 2 insertions, 1 deletions
diff --git a/parsepdftext.py b/parsepdftext.py
index 8521d7a..d1af47e 100644
--- a/parsepdftext.py
+++ b/parsepdftext.py
@@ -11,10 +11,11 @@ def parse_book(book):
words = []
coords = []
- for i, page in enumerate(document.findall('.//{{{0}}}page'.format(ns))):
+ for page in document.findall('.//{{{0}}}page'.format(ns)):
words.append([word.text for word in page.getchildren()])
coords.append([parse_coords(word) for word in page.getchildren()])
return {"words": words, "coords": coords}
if __name__=="__main__":
book = parse_book(sys.argv[1])
+ print book['words'][14]