summaryrefslogtreecommitdiffstats
path: root/stream.py
diff options
context:
space:
mode:
Diffstat (limited to 'stream.py')
-rw-r--r--stream.py62
1 files changed, 62 insertions, 0 deletions
diff --git a/stream.py b/stream.py
new file mode 100644
index 0000000..71cf615
--- /dev/null
+++ b/stream.py
@@ -0,0 +1,62 @@
+from tweepy import StreamListener, OAuthHandler, Stream
+
+from itertools import chain
+from datetime import datetime
+import sys
+import os
+
+
+class Listener(StreamListener):
+
+ def __init__(self, *args, **kwargs):
+ copy = kwargs.copy()
+ del copy["concepts"]
+ super(Listener, self).__init__(*args, **copy)
+ date = datetime.now().replace(microsecond=0).isoformat()
+ self.fhandlers = {concept: open(concept + "_{0}.txt".format(date), "w")
+ for concept in kwargs["concepts"]}
+
+ def __del__(self, *args, **kwargs):
+ super(Listener, self).__init__(*args, **kwargs)
+ for fh in self.fhandlers.itervalues():
+ fh.close()
+
+ def get_concepts(self, entities):
+ hashtags = (hashtag["text"].lower()
+ for hashtag in entities["hashtags"])
+ users = (user["screen_name"].lower()
+ for user in entities["user_mentions"])
+ return set(chain(hashtags, users))
+
+ def on_status(self, tweet):
+ concepts = self.get_concepts(tweet.entities)
+ output = " ".join([str(tweet.user.id), tweet.user.screen_name,
+ str(tweet.user.followers_count),
+ str(tweet.user.friends_count),
+ str(tweet.user.verified),
+ tweet.created_at.isoformat()])
+ for concept in concepts:
+ if concept in self.fhandlers:
+ fh = self.fhandlers[concept]
+ fh.write(output + "\n")
+ fh.flush()
+
+
+def process(filename, cred_file):
+ with open(filename) as f:
+ concepts = [line.strip() for line in f]
+ credentials = open(cred_file).readline().strip().split()
+ os.chdir("data")
+ entities = [("#" + concept, "@" + concept) for concept in concepts]
+ track = chain.from_iterable(entities)
+ auth = OAuthHandler(*credentials[2:4])
+ auth.set_access_token(*credentials[4:])
+ listener = Listener(concepts=concepts)
+ stream = Stream(auth, listener)
+ stream.filter(track=track)
+
+if __name__ == '__main__':
+ try:
+ process(sys.argv[1], sys.argv[2])
+ except IndexError:
+ print "{0} <concept_file> <credentials_file>".format(sys.argv[0])