diff options
| -rw-r--r-- | wikisource.py | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/wikisource.py b/wikisource.py new file mode 100644 index 0000000..2163483 --- /dev/null +++ b/wikisource.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +import requests +import lxml +import sys +from bs4 import BeautifulSoup + +URL="http://fr.wikisource.org/w/index.php" + +def get_page(title, page): + params = { "action": "render", "title": "Page:" + title + "/" + str(page) } + r = requests.get(URL, params=params) + soup = BeautifulSoup(r.text, "lxml") + return soup.select("div.pagetext")[0] + +def get_book(title): + n_pages = 10 + return [get_page(title, page) for page in xrange(1, n_pages)] + +if __name__ == "__main__": + title = sys.argv[1] + print get_book(title) |
