diff options
| author | Zaran <zaran.krleza@gmail.com> | 2012-05-15 01:24:24 -0700 |
|---|---|---|
| committer | Zaran <zaran.krleza@gmail.com> | 2012-05-15 01:24:24 -0700 |
| commit | 0e37090239646349d74df0479a387896cb27349a (patch) | |
| tree | 91057641845d26f17f2bef33b0dd2af3fa68f209 | |
| parent | e903e8dba8bd7a060601020267d0728aafbbf37a (diff) | |
| download | google-0e37090239646349d74df0479a387896cb27349a.tar.gz | |
Simple access to the Google Reader API
| -rw-r--r-- | googlereader.py | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/googlereader.py b/googlereader.py new file mode 100644 index 0000000..d152b98 --- /dev/null +++ b/googlereader.py @@ -0,0 +1,45 @@ +import requests +from bs4 import BeautifulSoup + +class Feeder: + + AUTH_URL = "https://www.google.com/accounts/ClientLogin" + FEED_URL = "http://www.google.com/reader/atom/feed/" + + def __init__(self,email,passwd): + """sets the auth header to be included in + each subsequent request""" + self.email = email + self.passwd = passwd + params = { "accountType": "GOOGLE", + "Email": email, + "Passwd": passwd, + "service": "reader", + "source": "thibaut"} + r = requests.post(self.AUTH_URL, + data=params) + auth = r.text.split("\n")[2].split("=")[1] + self.headers = {"Authorization": "GoogleLogin auth={}".format(auth)} + + def getFeed(self,url): + """generator which returns feed entries one by one. + it seems that google caps the n parameter to 1000 so + we have to use the continuation parameter""" + r = requests.get(self.FEED_URL+url,headers = self.headers) + soup = BeautifulSoup(r.text) + for entry in soup("entry"): + yield entry + while soup.find("gr:continuation") is not None: + params = {"c": soup.find("gr:continuation").string} + r = requests.get(self.FEED_URL+url, + params = params, + headers = self.headers) + soup = BeautifulSoup(r.text) + for entry in soup("entry"): + yield entry + +if __name__=="__main__": + import sys + feeder = Feeder(sys.argv[1],sys.argv[2]) + for entry in feeder.getFeed("http://planetkde.org/rss20.xml"): + print entry.published.string, entry.title.string |
