summaryrefslogtreecommitdiffstats
path: root/twitter/api.py
blob: b6f2863431df7b7e6951dc6f9c32e2141bafc8ca (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from tweepy import API, OAuthHandler
from tweepy import cursor
from bs4 import BeautifulSoup

import os.path
import uuid
from time import time, sleep
from urllib import urlopen


class RequestHandler:

    def __init__(self, *args):
        auth = OAuthHandler(*args[0:2])
        auth.set_access_token(*args[2:])
        self.api = API(auth)
        self.state = {}
        limits = self.api.rate_limit_status()
        self.state["followers"] = limits["resources"]["followers"]["/followers/ids"]
        self.state["lookup"] = limits["resources"]["users"]["/users/lookup"]

    def __get_followers(self, user_id):
        pages = cursor.Cursor(self.api.followers_ids, id=user_id).pages(1)
        for page in pages:
            for follower in page:
                yield follower

    def get_followers(self, user_id):
        filename = os.path.join("data", "users", user_id + ".txt")
        if os.path.isfile(filename):
            return filename
        l = list(self.__get_followers(user_id))
        with open(filename, "w") as f:
            for fid in l:
                f.write(str(fid) + "\n")
        for key, value in self.api.last_response.getheaders():
            if key.startswith("x-rate-limit"):
                self.state["followers"][key.split("-")[-1]] = int(value)
        return filename

    def __lookup(self, users_list):
        for user in self.api.lookup_users(users_list):
            yield user

    def lookup(self, users_list):
        uid = uuid.uuid1()
        filename = os.path.join("data", "users", "lookup-" + str(uid) + ".txt")
        l = list(self.__lookup(users_list))
        with open(filename, "w") as f:
            for user in l:
                output = " ".join([str(user.id), user.screen_name,
                                   str(user.followers_count),
                                   str(user.friends_count),
                                   str(user.verified)])
                f.write(output + "\n")
        for key, value in self.api.last_response.getheaders():
            if key.startswith("x-rate-limit"):
                self.state["lookup"][key.split("-")[-1]] = int(value)
        return filename

    def get_profile(self, user_id, username):
        fh = urlopen("https://twitter.com/{0}".format(username))
        soup = BeautifulSoup(fh)
        ul = soup.find("ul", class_="js-mini-profile-stats")
        following, followers = [li.strong.string
                                for li in ul.find_all("li")[1:]]
        return user_id, username, followers, following

    def short_lookup(self, users_list):
        uid = uuid.uuid1()
        filename = os.path.join("data", "users", "lookup-" + str(uid) + ".txt")

        def get_output():
            for user_id, username in users_list:
                try:
                    output = " ".join(map(str, self.get_profile(user_id,
                                                            username)))
                except:
                    pass
                else:
                    yield output
                    sleep(0.5)

        to_write = list(get_output())
        with open(filename, "w") as f:
            f.write("\n".join(to_write))

        return filename

    def ready(self, method):
        now = int(time())
        if (int(self.state[method]["remaining"]) > 0
                or int(self.state[method]["reset"]) < now):
            return True
        else:
            return False


if __name__ == "__main__":
    credentials = open("api_accounts.txt").readline().strip().split()
    handler = RequestHandler(*credentials[2:])
    # if handler.ready("lookup"):
    #     handler.lookup(["304224106"])
    # if handler.ready("followers"):
    #     handler.lookup("304224106")
    # starbucks 30973
    # bestbuy 17475575
    # sears 19464428
    # macys 50687788
    # target 89084561
    # gap 18462157
    # mountain 9409552
    # coachella 688583

    id = "688583"
    print handler.get_followers(id)
    f = open("data/users/{0}.txt".format(id))
    g = open("data/users/{0}_followers.txt".format(id), "w")
    l = []
    for line in f:
        l.append(line.strip())
        if len(l) == 100:
            i = open(handler.lookup(l))
            for line in i:
                g.write(line)
            l = []