From 939ea00056d5cc8817f00b8c293efa04d36bf6d5 Mon Sep 17 00:00:00 2001
From: Guillaume Horel <guillaume.horel@gmail.com>
Date: Sat, 7 Nov 2015 18:49:08 -0500
Subject: add script to download all the explainations

---
 xkcd.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/xkcd.py b/xkcd.py
index 3bbac65..97fe15e 100644
--- a/xkcd.py
+++ b/xkcd.py
@@ -1,5 +1,6 @@
 import requests
 import bs4
+import os
 
 def get_xkcd(comicid):
     r = requests.get('http://www.xkcd.org/{0}/'.format(comicid))
@@ -8,5 +9,23 @@ def get_xkcd(comicid):
         img = soup.find("div", {'id':"comic"}).find("img")
         return img['title'], img['src']
 
+def get_explanation(comic_id):
+    """dowload explanation from explainxkcd
+
+    ignore transcript for now"""
+    r = requests.get('http://www.explainxkcd.com/wiki/index.php/{0}'.format(comic_id))
+    soup = bs4.BeautifulSoup(r.content)
+    firstp = soup.find('div', {'id':"content"}).find('p')
+    allp = [firstp]+firstp.find_next_siblings('p')
+    return "".join([t.text for t in allp])
+
+def main():
+    # last = sorted([int(f.split("_")[1]) for f in os.listdir("explanations")],
+    #               reverse=True)[0]
+    # print(last)
+    for cid in range(1, 1601):
+        with open("explanations/comic_{0}".format(cid), "w") as fh:
+            fh.write(get_explanation(cid).encode("utf-8"))
+
 if __name__=="__main__":
-    print(get_xkcd(1600))
+    main()
-- 
cgit v1.3.1