From 331499d08be1ef32e5bb6200963ebc63500eb826 Mon Sep 17 00:00:00 2001 From: Thibaut Horel Date: Sat, 3 Aug 2013 23:43:45 +0200 Subject: Fix html stripping --- wikisource.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'wikisource.py') diff --git a/wikisource.py b/wikisource.py index 7b48eb0..38bd90d 100644 --- a/wikisource.py +++ b/wikisource.py @@ -10,11 +10,11 @@ def get_page(title, page): params = { "action": "render", "title": "Page:" + title + "/" + str(page) } r = requests.get(URL, params=params) soup = BeautifulSoup(r.text, "lxml") - return " ".join(soup.select("div.pagetext")[0].findAll(text=True)) + return "".join(soup.select("div.pagetext")[0].findAll(text=True)) def get_pages(title, begin=1, end=None): if not end: - end = 10 + end = 100 for page in xrange(begin, end+1): yield get_page(title, page) -- cgit v1.2.3-70-g09d2