from __future__ import print_function from sys import exit import requests from bs4 import BeautifulSoup class Feeder: AUTH_URL = "https://www.google.com/accounts/ClientLogin" FEED_URL = "http://www.google.com/reader/atom/feed/" def __init__(self, email, passwd): """sets the auth header to be included in each subsequent request""" self.email = email self.passwd = passwd params = { "accountType": "GOOGLE", "Email": self.email, "Passwd": self.passwd, "service": "reader", "source": "thibaut"} r = requests.post(self.AUTH_URL, data=params) if r.status_code != requests.codes.ok: exit("Authentication failed") token = r.text.split("\n")[2].split("=")[1] self.headers = {"Authorization": "GoogleLogin auth={0}".format(token)} def feed(self, url): """generator which returns feed entries one by one.""" r = requests.get(self.FEED_URL+url, headers=self.headers) soup = BeautifulSoup(r.text) for entry in soup("entry"): yield entry while soup.find("gr:continuation") is not None: params = {"c": soup.find("gr:continuation").string} r = requests.get(self.FEED_URL + url, params=params, headers=self.headers) soup = BeautifulSoup(r.text) for entry in soup("entry"): yield entry if __name__ == "__main__": import sys try: feeder = Feeder(sys.argv[1], sys.argv[2]) except IndexError: exit("Please provide email and password on the command line") for entry in feeder.feed("http://planetkde.org/rss20.xml"): print(entry.published.string, entry.title.string)