aboutsummaryrefslogtreecommitdiffstats
path: root/xkcd.py
diff options
context:
space:
mode:
Diffstat (limited to 'xkcd.py')
-rw-r--r--xkcd.py21
1 files changed, 20 insertions, 1 deletions
diff --git a/xkcd.py b/xkcd.py
index 3bbac65..97fe15e 100644
--- a/xkcd.py
+++ b/xkcd.py
@@ -1,5 +1,6 @@
import requests
import bs4
+import os
def get_xkcd(comicid):
r = requests.get('http://www.xkcd.org/{0}/'.format(comicid))
@@ -8,5 +9,23 @@ def get_xkcd(comicid):
img = soup.find("div", {'id':"comic"}).find("img")
return img['title'], img['src']
+def get_explanation(comic_id):
+ """dowload explanation from explainxkcd
+
+ ignore transcript for now"""
+ r = requests.get('http://www.explainxkcd.com/wiki/index.php/{0}'.format(comic_id))
+ soup = bs4.BeautifulSoup(r.content)
+ firstp = soup.find('div', {'id':"content"}).find('p')
+ allp = [firstp]+firstp.find_next_siblings('p')
+ return "".join([t.text for t in allp])
+
+def main():
+ # last = sorted([int(f.split("_")[1]) for f in os.listdir("explanations")],
+ # reverse=True)[0]
+ # print(last)
+ for cid in range(1, 1601):
+ with open("explanations/comic_{0}".format(cid), "w") as fh:
+ fh.write(get_explanation(cid).encode("utf-8"))
+
if __name__=="__main__":
- print(get_xkcd(1600))
+ main()