diff options
| author | Guillaume Horel <guillaume.horel@gmail.com> | 2013-08-03 13:34:56 -0400 |
|---|---|---|
| committer | Guillaume Horel <guillaume.horel@gmail.com> | 2013-08-03 13:34:56 -0400 |
| commit | 277b70c538a00583485011a4aeda2b08618d1b6e (patch) | |
| tree | 906c1d469e6aa99722f1052ba54b807380134012 | |
| parent | 41df9107d8f7ae19bbdcadee0f411e9763c6fbbc (diff) | |
| download | ocr-layer-curation-277b70c538a00583485011a4aeda2b08618d1b6e.tar.gz | |
remove unneeded enumerate
| -rw-r--r-- | parsepdftext.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/parsepdftext.py b/parsepdftext.py index 8521d7a..d1af47e 100644 --- a/parsepdftext.py +++ b/parsepdftext.py @@ -11,10 +11,11 @@ def parse_book(book): words = [] coords = [] - for i, page in enumerate(document.findall('.//{{{0}}}page'.format(ns))): + for page in document.findall('.//{{{0}}}page'.format(ns)): words.append([word.text for word in page.getchildren()]) coords.append([parse_coords(word) for word in page.getchildren()]) return {"words": words, "coords": coords} if __name__=="__main__": book = parse_book(sys.argv[1]) + print book['words'][14] |
