add script to download all the explainations

author: Guillaume Horel <guillaume.horel@gmail.com> 2015-11-07 18:49:08 -0500
committer: Guillaume Horel <guillaume.horel@gmail.com> 2015-11-07 18:49:08 -0500
commit: 939ea00056d5cc8817f00b8c293efa04d36bf6d5 (patch)
tree: 39dc3949e43fda7f95fab01a3c94ce8cb05f3dc1 /xkcd.py
parent: f8367663d358b410b821d8cd34e2ac8ad449b8d9 (diff)
download: slack-939ea00056d5cc8817f00b8c293efa04d36bf6d5.tar.gz
1 files changed, 20 insertions, 1 deletions
diff --git a/xkcd.py b/xkcd.py
index 3bbac65..97fe15e 100644
--- a/xkcd.py
+++ b/xkcd.py
@@ -1,5 +1,6 @@
 import requests
 import bs4
+import os
 
 def get_xkcd(comicid):
     r = requests.get('http://www.xkcd.org/{0}/'.format(comicid))
@@ -8,5 +9,23 @@ def get_xkcd(comicid):
         img = soup.find("div", {'id':"comic"}).find("img")
         return img['title'], img['src']
 
+def get_explanation(comic_id):
+    """dowload explanation from explainxkcd
+
+    ignore transcript for now"""
+    r = requests.get('http://www.explainxkcd.com/wiki/index.php/{0}'.format(comic_id))
+    soup = bs4.BeautifulSoup(r.content)
+    firstp = soup.find('div', {'id':"content"}).find('p')
+    allp = [firstp]+firstp.find_next_siblings('p')
+    return "".join([t.text for t in allp])
+
+def main():
+    # last = sorted([int(f.split("_")[1]) for f in os.listdir("explanations")],
+    #               reverse=True)[0]
+    # print(last)
+    for cid in range(1, 1601):
+        with open("explanations/comic_{0}".format(cid), "w") as fh:
+            fh.write(get_explanation(cid).encode("utf-8"))
+
 if __name__=="__main__":
-    print(get_xkcd(1600))
+    main()
author	Guillaume Horel <guillaume.horel@gmail.com>	2015-11-07 18:49:08 -0500
committer	Guillaume Horel <guillaume.horel@gmail.com>	2015-11-07 18:49:08 -0500
commit	939ea00056d5cc8817f00b8c293efa04d36bf6d5 (patch)
tree	39dc3949e43fda7f95fab01a3c94ce8cb05f3dc1 /xkcd.py
parent	f8367663d358b410b821d8cd34e2ac8ad449b8d9 (diff)
download	slack-939ea00056d5cc8817f00b8c293efa04d36bf6d5.tar.gz