from tweepy import StreamListener, OAuthHandler, Stream from itertools import chain from datetime import datetime import sys import os class Listener(StreamListener): def __init__(self, *args, **kwargs): copy = kwargs.copy() del copy["concepts"] super(Listener, self).__init__(*args, **copy) date = datetime.now().replace(microsecond=0).isoformat() self.fhandlers = {concept: open(concept + "_{0}.txt".format(date), "w") for concept in kwargs["concepts"]} def __del__(self, *args, **kwargs): super(Listener, self).__init__(*args, **kwargs) for fh in self.fhandlers.itervalues(): fh.close() def get_concepts(self, entities): hashtags = (hashtag["text"].lower() for hashtag in entities["hashtags"]) return set(hashtags) def on_status(self, tweet): concepts = self.get_concepts(tweet.entities) output = " ".join([str(tweet.user.id), tweet.user.screen_name, str(tweet.user.followers_count), str(tweet.user.friends_count), str(tweet.user.verified), tweet.created_at.isoformat()]) print str(dict(tweet)) for concept in concepts: if concept in self.fhandlers: fh = self.fhandlers[concept] fh.write(output + "\n") fh.flush() def process(filename, cred_file): with open(filename) as f: concepts = [line.strip() for line in f] credentials = open(cred_file).readline().strip().split() os.chdir("data") entities = [("#" + concept) for concept in concepts] track = chain.from_iterable(entities) auth = OAuthHandler(*credentials[2:4]) auth.set_access_token(*credentials[4:]) listener = Listener(concepts=concepts) stream = Stream(auth, listener) stream.filter(track=track) if __name__ == '__main__': try: process(sys.argv[1], sys.argv[2]) except IndexError: print "{0} ".format(sys.argv[0])