diff options
Diffstat (limited to 'stream.py')
| -rw-r--r-- | stream.py | 62 |
1 files changed, 62 insertions, 0 deletions
diff --git a/stream.py b/stream.py new file mode 100644 index 0000000..71cf615 --- /dev/null +++ b/stream.py @@ -0,0 +1,62 @@ +from tweepy import StreamListener, OAuthHandler, Stream + +from itertools import chain +from datetime import datetime +import sys +import os + + +class Listener(StreamListener): + + def __init__(self, *args, **kwargs): + copy = kwargs.copy() + del copy["concepts"] + super(Listener, self).__init__(*args, **copy) + date = datetime.now().replace(microsecond=0).isoformat() + self.fhandlers = {concept: open(concept + "_{0}.txt".format(date), "w") + for concept in kwargs["concepts"]} + + def __del__(self, *args, **kwargs): + super(Listener, self).__init__(*args, **kwargs) + for fh in self.fhandlers.itervalues(): + fh.close() + + def get_concepts(self, entities): + hashtags = (hashtag["text"].lower() + for hashtag in entities["hashtags"]) + users = (user["screen_name"].lower() + for user in entities["user_mentions"]) + return set(chain(hashtags, users)) + + def on_status(self, tweet): + concepts = self.get_concepts(tweet.entities) + output = " ".join([str(tweet.user.id), tweet.user.screen_name, + str(tweet.user.followers_count), + str(tweet.user.friends_count), + str(tweet.user.verified), + tweet.created_at.isoformat()]) + for concept in concepts: + if concept in self.fhandlers: + fh = self.fhandlers[concept] + fh.write(output + "\n") + fh.flush() + + +def process(filename, cred_file): + with open(filename) as f: + concepts = [line.strip() for line in f] + credentials = open(cred_file).readline().strip().split() + os.chdir("data") + entities = [("#" + concept, "@" + concept) for concept in concepts] + track = chain.from_iterable(entities) + auth = OAuthHandler(*credentials[2:4]) + auth.set_access_token(*credentials[4:]) + listener = Listener(concepts=concepts) + stream = Stream(auth, listener) + stream.filter(track=track) + +if __name__ == '__main__': + try: + process(sys.argv[1], sys.argv[2]) + except IndexError: + print "{0} <concept_file> <credentials_file>".format(sys.argv[0]) |
