aboutsummaryrefslogtreecommitdiffstats
path: root/wikisource.py
diff options
context:
space:
mode:
Diffstat (limited to 'wikisource.py')
-rw-r--r--wikisource.py13
1 files changed, 8 insertions, 5 deletions
diff --git a/wikisource.py b/wikisource.py
index 2163483..7b48eb0 100644
--- a/wikisource.py
+++ b/wikisource.py
@@ -10,12 +10,15 @@ def get_page(title, page):
params = { "action": "render", "title": "Page:" + title + "/" + str(page) }
r = requests.get(URL, params=params)
soup = BeautifulSoup(r.text, "lxml")
- return soup.select("div.pagetext")[0]
+ return " ".join(soup.select("div.pagetext")[0].findAll(text=True))
-def get_book(title):
- n_pages = 10
- return [get_page(title, page) for page in xrange(1, n_pages)]
+def get_pages(title, begin=1, end=None):
+ if not end:
+ end = 10
+ for page in xrange(begin, end+1):
+ yield get_page(title, page)
if __name__ == "__main__":
title = sys.argv[1]
- print get_book(title)
+ for page in get_pages(title):
+ print page