diff options
Diffstat (limited to 'parsepdftext.py')
| -rw-r--r-- | parsepdftext.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/parsepdftext.py b/parsepdftext.py index 8521d7a..d1af47e 100644 --- a/parsepdftext.py +++ b/parsepdftext.py @@ -11,10 +11,11 @@ def parse_book(book): words = [] coords = [] - for i, page in enumerate(document.findall('.//{{{0}}}page'.format(ns))): + for page in document.findall('.//{{{0}}}page'.format(ns)): words.append([word.text for word in page.getchildren()]) coords.append([parse_coords(word) for word in page.getchildren()]) return {"words": words, "coords": coords} if __name__=="__main__": book = parse_book(sys.argv[1]) + print book['words'][14] |
