From d56a449c4f745ba661bc952aa36746a1ef06e7d4 Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Sat, 28 Dec 2013 19:00:31 -0500 Subject: clean up get_pages function --- wikisource.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'wikisource.py') diff --git a/wikisource.py b/wikisource.py index 5227721..f0d230f 100644 --- a/wikisource.py +++ b/wikisource.py @@ -3,22 +3,33 @@ import requests import lxml import sys from bs4 import BeautifulSoup +from itertools import takewhile, count URL = "http://fr.wikisource.org/w/index.php" def get_page(title, page): params = { "action": "render", "title": "Page:" + title + "/" + str(page) } r = requests.get(URL, params=params) - soup = BeautifulSoup(r.text, "lxml") - return soup.select("div.pagetext")[0].text + if r.status_code == requests.codes.ok: + soup = BeautifulSoup(r.text, "lxml") + return soup.select("div.pagetext")[0].text + else: + return None def get_pages(title, begin=1, end=None): - if not end: - end = 100 - for page in xrange(begin, end+1): - yield get_page(title, page) + if end: + return (get_page(title, i) for i in xrange(begin, end+1)) + else: + return takewhile(lambda x: x is not None, + (get_page(title, i) for i in count(begin))) + if __name__ == "__main__": title = sys.argv[1] for page in get_pages(title): print page + + +def f(i): + if i <=10: + return i**2 -- cgit v1.2.3-70-g09d2