diff options
| author | Guillaume Horel <guillaume.horel@gmail.com> | 2013-12-28 11:31:46 -0500 |
|---|---|---|
| committer | Guillaume Horel <guillaume.horel@gmail.com> | 2013-12-28 11:31:46 -0500 |
| commit | 6f1f274e260b5ba9df98d0869f5277d39588c9a7 (patch) | |
| tree | a757a224b7801ed2d6f1a38f216a756dcf8e633f | |
| parent | 5dc1322e4c7e78ca98e3ad910f816ad45ac7bfd8 (diff) | |
| download | ocr-layer-curation-6f1f274e260b5ba9df98d0869f5277d39588c9a7.tar.gz | |
fix download from wikisource
| -rw-r--r-- | wikisource.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/wikisource.py b/wikisource.py index 1b279e3..5227721 100644 --- a/wikisource.py +++ b/wikisource.py @@ -10,7 +10,7 @@ def get_page(title, page): params = { "action": "render", "title": "Page:" + title + "/" + str(page) } r = requests.get(URL, params=params) soup = BeautifulSoup(r.text, "lxml") - return "".join(soup.select("div.pagetext")[0].findAll(text=True)) + return soup.select("div.pagetext")[0].text def get_pages(title, begin=1, end=None): if not end: |
