summaryrefslogtreecommitdiffstats
path: root/twitter/stream.py
blob: 4fe38c46afab18b85cb65003cfd7482c4e192600 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from tweepy import StreamListener, OAuthHandler, Stream

from itertools import chain
from datetime import datetime
import sys
import os


class Listener(StreamListener):

    def __init__(self, *args, **kwargs):
        copy = kwargs.copy()
        del copy["concepts"]
        super(Listener, self).__init__(*args, **copy)
        date = datetime.now().replace(microsecond=0).isoformat()
        self.fhandlers = {concept: open(concept + "_{0}.txt".format(date), "w")
                          for concept in kwargs["concepts"]}

    def __del__(self, *args, **kwargs):
        super(Listener, self).__init__(*args, **kwargs)
        for fh in self.fhandlers.itervalues():
            fh.close()

    def get_concepts(self, entities):
        hashtags = (hashtag["text"].lower()
                    for hashtag in entities["hashtags"])
        return set(hashtags)

    def on_status(self, tweet):
        concepts = self.get_concepts(tweet.entities)
        output = " ".join([str(tweet.user.id), tweet.user.screen_name,
                           str(tweet.user.followers_count),
                           str(tweet.user.friends_count),
                           str(tweet.user.verified),
                           tweet.created_at.isoformat()])
        print str(dict(tweet))
        for concept in concepts:
            if concept in self.fhandlers:
                fh = self.fhandlers[concept]
                fh.write(output + "\n")
                fh.flush()


def process(filename, cred_file):
    with open(filename) as f:
        concepts = [line.strip() for line in f]
    credentials = open(cred_file).readline().strip().split()
    os.chdir("data")
    entities = [("#" + concept) for concept in concepts]
    track = chain.from_iterable(entities)
    auth = OAuthHandler(*credentials[2:4])
    auth.set_access_token(*credentials[4:])
    listener = Listener(concepts=concepts)
    stream = Stream(auth, listener)
    stream.filter(track=track)

if __name__ == '__main__':
    try:
        process(sys.argv[1], sys.argv[2])
    except IndexError:
        print "{0} <concept_file> <credentials_file>".format(sys.argv[0])