aboutsummaryrefslogtreecommitdiffstats
path: root/xkcd.py
diff options
context:
space:
mode:
authorGuillaume Horel <guillaume.horel@gmail.com>2015-11-07 18:49:08 -0500
committerGuillaume Horel <guillaume.horel@gmail.com>2015-11-07 18:49:08 -0500
commit939ea00056d5cc8817f00b8c293efa04d36bf6d5 (patch)
tree39dc3949e43fda7f95fab01a3c94ce8cb05f3dc1 /xkcd.py
parentf8367663d358b410b821d8cd34e2ac8ad449b8d9 (diff)
downloadslack-939ea00056d5cc8817f00b8c293efa04d36bf6d5.tar.gz
add script to download all the explainations
Diffstat (limited to 'xkcd.py')
-rw-r--r--xkcd.py21
1 files changed, 20 insertions, 1 deletions
diff --git a/xkcd.py b/xkcd.py
index 3bbac65..97fe15e 100644
--- a/xkcd.py
+++ b/xkcd.py
@@ -1,5 +1,6 @@
import requests
import bs4
+import os
def get_xkcd(comicid):
r = requests.get('http://www.xkcd.org/{0}/'.format(comicid))
@@ -8,5 +9,23 @@ def get_xkcd(comicid):
img = soup.find("div", {'id':"comic"}).find("img")
return img['title'], img['src']
+def get_explanation(comic_id):
+ """dowload explanation from explainxkcd
+
+ ignore transcript for now"""
+ r = requests.get('http://www.explainxkcd.com/wiki/index.php/{0}'.format(comic_id))
+ soup = bs4.BeautifulSoup(r.content)
+ firstp = soup.find('div', {'id':"content"}).find('p')
+ allp = [firstp]+firstp.find_next_siblings('p')
+ return "".join([t.text for t in allp])
+
+def main():
+ # last = sorted([int(f.split("_")[1]) for f in os.listdir("explanations")],
+ # reverse=True)[0]
+ # print(last)
+ for cid in range(1, 1601):
+ with open("explanations/comic_{0}".format(cid), "w") as fh:
+ fh.write(get_explanation(cid).encode("utf-8"))
+
if __name__=="__main__":
- print(get_xkcd(1600))
+ main()