diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2014-02-27 11:29:28 -0500 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2014-02-27 11:29:28 -0500 |
| commit | 31ca595e875dd4e2cb07b2d3610b0d4b4f590abe (patch) | |
| tree | 205ec06d52ef1d2d13a94543cba170119748a945 /wikisource.py | |
| parent | b8013d90c16cf4e1225205fc309f24f7c06ea6c5 (diff) | |
| download | ocr-layer-curation-31ca595e875dd4e2cb07b2d3610b0d4b4f590abe.tar.gz | |
PEP8
Diffstat (limited to 'wikisource.py')
| -rw-r--r-- | wikisource.py | 7 |
1 files changed, 4 insertions, 3 deletions
diff --git a/wikisource.py b/wikisource.py index 070c84a..1459468 100644 --- a/wikisource.py +++ b/wikisource.py @@ -1,14 +1,14 @@ # -*- coding: utf-8 -*- import requests -import lxml import sys from bs4 import BeautifulSoup from itertools import takewhile, count URL = "http://fr.wikisource.org/w/index.php" + def get_page(title, page): - params = { "action": "render", "title": "Page:" + title + "/" + str(page) } + params = {"action": "render", "title": "Page:" + title + "/" + str(page)} r = requests.get(URL, params=params) if r.status_code == requests.codes.ok: soup = BeautifulSoup(r.text, "lxml") @@ -16,9 +16,10 @@ def get_page(title, page): else: return None + def get_pages(title, begin=1, end=None): if end: - return (get_page(title, i) for i in xrange(begin, end+1)) + return (get_page(title, i) for i in xrange(begin, end + 1)) else: return takewhile(lambda x: x is not None, (get_page(title, i) for i in count(begin))) |
