blob: 97fe15e12dd04342610741a6dfda0667ae48f0cf (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
|
import requests
import bs4
import os
def get_xkcd(comicid):
r = requests.get('http://www.xkcd.org/{0}/'.format(comicid))
if r.status_code == 200:
soup = bs4.BeautifulSoup(r.content)
img = soup.find("div", {'id':"comic"}).find("img")
return img['title'], img['src']
def get_explanation(comic_id):
"""dowload explanation from explainxkcd
ignore transcript for now"""
r = requests.get('http://www.explainxkcd.com/wiki/index.php/{0}'.format(comic_id))
soup = bs4.BeautifulSoup(r.content)
firstp = soup.find('div', {'id':"content"}).find('p')
allp = [firstp]+firstp.find_next_siblings('p')
return "".join([t.text for t in allp])
def main():
# last = sorted([int(f.split("_")[1]) for f in os.listdir("explanations")],
# reverse=True)[0]
# print(last)
for cid in range(1, 1601):
with open("explanations/comic_{0}".format(cid), "w") as fh:
fh.write(get_explanation(cid).encode("utf-8"))
if __name__=="__main__":
main()
|