aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--wikisource.py23
1 files changed, 17 insertions, 6 deletions
diff --git a/wikisource.py b/wikisource.py
index 5227721..f0d230f 100644
--- a/wikisource.py
+++ b/wikisource.py
@@ -3,22 +3,33 @@ import requests
import lxml
import sys
from bs4 import BeautifulSoup
+from itertools import takewhile, count
URL = "http://fr.wikisource.org/w/index.php"
def get_page(title, page):
params = { "action": "render", "title": "Page:" + title + "/" + str(page) }
r = requests.get(URL, params=params)
- soup = BeautifulSoup(r.text, "lxml")
- return soup.select("div.pagetext")[0].text
+ if r.status_code == requests.codes.ok:
+ soup = BeautifulSoup(r.text, "lxml")
+ return soup.select("div.pagetext")[0].text
+ else:
+ return None
def get_pages(title, begin=1, end=None):
- if not end:
- end = 100
- for page in xrange(begin, end+1):
- yield get_page(title, page)
+ if end:
+ return (get_page(title, i) for i in xrange(begin, end+1))
+ else:
+ return takewhile(lambda x: x is not None,
+ (get_page(title, i) for i in count(begin)))
+
if __name__ == "__main__":
title = sys.argv[1]
for page in get_pages(title):
print page
+
+
+def f(i):
+ if i <=10:
+ return i**2