import requests from bs4 import BeautifulSoup class Feeder: AUTH_URL = "https://www.google.com/accounts/ClientLogin" FEED_URL = "http://www.google.com/reader/atom/feed/" def __init__(self,email,passwd): """sets the auth header to be included in each subsequent request""" self.email = email self.passwd = passwd params = { "accountType": "GOOGLE", "Email": email, "Passwd": passwd, "service": "reader", "source": "thibaut"} r = requests.post(self.AUTH_URL, data=params) auth = r.text.split("\n")[2].split("=")[1] self.headers = {"Authorization": "GoogleLogin auth={}".format(auth)} def getFeed(self,url): """generator which returns feed entries one by one. it seems that google caps the n parameter to 1000 so we have to use the continuation parameter""" r = requests.get(self.FEED_URL+url, headers = self.headers) soup = BeautifulSoup(r.text) for entry in soup("entry"): yield entry while soup.find("gr:continuation") is not None: params = { "c": soup.find("gr:continuation").string } r = requests.get(self.FEED_URL + url, params = params, headers = self.headers) soup = BeautifulSoup(r.text) for entry in soup("entry"): yield entry if __name__ == "__main__": import sys feeder = Feeder(sys.argv[1], sys.argv[2]) for entry in feeder.getFeed("http://planetkde.org/rss20.xml"): print entry.published.string, entry.title.string