aboutsummaryrefslogtreecommitdiffstats
path: root/wikisource.py
blob: f0d230f7fdb6d57129ecee7aa096ef7c0057e1b6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# -*- coding: utf-8 -*-
import requests
import lxml
import sys
from bs4 import BeautifulSoup
from itertools import takewhile, count

URL = "http://fr.wikisource.org/w/index.php"

def get_page(title, page):
    params = { "action": "render", "title": "Page:" + title + "/" + str(page) }
    r = requests.get(URL, params=params)
    if r.status_code == requests.codes.ok:
        soup = BeautifulSoup(r.text, "lxml")
        return soup.select("div.pagetext")[0].text
    else:
        return None

def get_pages(title, begin=1, end=None):
    if end:
        return (get_page(title, i) for i in xrange(begin, end+1))
    else:
        return takewhile(lambda x: x is not None,
                         (get_page(title, i) for i in count(begin)))


if __name__ == "__main__":
    title = sys.argv[1]
    for page in get_pages(title):
        print page


def f(i):
    if i <=10:
        return i**2