diff options
| author | Guillaume Horel <guillaume.horel@gmail.com> | 2015-11-07 18:49:08 -0500 |
|---|---|---|
| committer | Guillaume Horel <guillaume.horel@gmail.com> | 2015-11-07 18:49:08 -0500 |
| commit | 939ea00056d5cc8817f00b8c293efa04d36bf6d5 (patch) | |
| tree | 39dc3949e43fda7f95fab01a3c94ce8cb05f3dc1 /xkcd.py | |
| parent | f8367663d358b410b821d8cd34e2ac8ad449b8d9 (diff) | |
| download | slack-939ea00056d5cc8817f00b8c293efa04d36bf6d5.tar.gz | |
add script to download all the explainations
Diffstat (limited to 'xkcd.py')
| -rw-r--r-- | xkcd.py | 21 |
1 files changed, 20 insertions, 1 deletions
@@ -1,5 +1,6 @@ import requests import bs4 +import os def get_xkcd(comicid): r = requests.get('http://www.xkcd.org/{0}/'.format(comicid)) @@ -8,5 +9,23 @@ def get_xkcd(comicid): img = soup.find("div", {'id':"comic"}).find("img") return img['title'], img['src'] +def get_explanation(comic_id): + """dowload explanation from explainxkcd + + ignore transcript for now""" + r = requests.get('http://www.explainxkcd.com/wiki/index.php/{0}'.format(comic_id)) + soup = bs4.BeautifulSoup(r.content) + firstp = soup.find('div', {'id':"content"}).find('p') + allp = [firstp]+firstp.find_next_siblings('p') + return "".join([t.text for t in allp]) + +def main(): + # last = sorted([int(f.split("_")[1]) for f in os.listdir("explanations")], + # reverse=True)[0] + # print(last) + for cid in range(1, 1601): + with open("explanations/comic_{0}".format(cid), "w") as fh: + fh.write(get_explanation(cid).encode("utf-8")) + if __name__=="__main__": - print(get_xkcd(1600)) + main() |
