diff options
| -rw-r--r-- | xkcd.py | 21 |
1 files changed, 20 insertions, 1 deletions
@@ -1,5 +1,6 @@ import requests import bs4 +import os def get_xkcd(comicid): r = requests.get('http://www.xkcd.org/{0}/'.format(comicid)) @@ -8,5 +9,23 @@ def get_xkcd(comicid): img = soup.find("div", {'id':"comic"}).find("img") return img['title'], img['src'] +def get_explanation(comic_id): + """dowload explanation from explainxkcd + + ignore transcript for now""" + r = requests.get('http://www.explainxkcd.com/wiki/index.php/{0}'.format(comic_id)) + soup = bs4.BeautifulSoup(r.content) + firstp = soup.find('div', {'id':"content"}).find('p') + allp = [firstp]+firstp.find_next_siblings('p') + return "".join([t.text for t in allp]) + +def main(): + # last = sorted([int(f.split("_")[1]) for f in os.listdir("explanations")], + # reverse=True)[0] + # print(last) + for cid in range(1, 1601): + with open("explanations/comic_{0}".format(cid), "w") as fh: + fh.write(get_explanation(cid).encode("utf-8")) + if __name__=="__main__": - print(get_xkcd(1600)) + main() |
