1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
|
from tweepy import StreamListener, OAuthHandler, Stream
from itertools import chain
from datetime import datetime
import sys
import os
class Listener(StreamListener):
def __init__(self, *args, **kwargs):
copy = kwargs.copy()
del copy["concepts"]
super(Listener, self).__init__(*args, **copy)
date = datetime.now().replace(microsecond=0).isoformat()
self.fhandlers = {concept: open(concept + "_{0}.txt".format(date), "w")
for concept in kwargs["concepts"]}
def __del__(self, *args, **kwargs):
super(Listener, self).__init__(*args, **kwargs)
for fh in self.fhandlers.itervalues():
fh.close()
def get_concepts(self, entities):
hashtags = (hashtag["text"].lower()
for hashtag in entities["hashtags"])
users = (user["screen_name"].lower()
for user in entities["user_mentions"])
return set(chain(hashtags, users))
def on_status(self, tweet):
concepts = self.get_concepts(tweet.entities)
output = " ".join([str(tweet.user.id), tweet.user.screen_name,
str(tweet.user.followers_count),
str(tweet.user.friends_count),
str(tweet.user.verified),
tweet.created_at.isoformat()])
for concept in concepts:
if concept in self.fhandlers:
fh = self.fhandlers[concept]
fh.write(output + "\n")
fh.flush()
def process(filename, cred_file):
with open(filename) as f:
concepts = [line.strip() for line in f]
credentials = open(cred_file).readline().strip().split()
os.chdir("data")
entities = [("#" + concept, "@" + concept) for concept in concepts]
track = chain.from_iterable(entities)
auth = OAuthHandler(*credentials[2:4])
auth.set_access_token(*credentials[4:])
listener = Listener(concepts=concepts)
stream = Stream(auth, listener)
stream.filter(track=track)
if __name__ == '__main__':
try:
process(sys.argv[1], sys.argv[2])
except IndexError:
print "{0} <concept_file> <credentials_file>".format(sys.argv[0])
|