1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
from tweepy import API, OAuthHandler
from tweepy import cursor
from bs4 import BeautifulSoup
import os.path
import uuid
from time import time, sleep
from urllib import urlopen
class RequestHandler:
def __init__(self, *args):
auth = OAuthHandler(*args[0:2])
auth.set_access_token(*args[2:])
self.api = API(auth)
self.state = {}
limits = self.api.rate_limit_status()
self.state["followers"] = limits["resources"]["followers"]["/followers/ids"]
self.state["lookup"] = limits["resources"]["users"]["/users/lookup"]
def __get_followers(self, user_id):
pages = cursor.Cursor(self.api.followers_ids, id=user_id).pages(1)
for page in pages:
for follower in page:
yield follower
def get_followers(self, user_id):
filename = os.path.join("data", "users", user_id + ".txt")
if os.path.isfile(filename):
return filename
l = list(self.__get_followers(user_id))
with open(filename, "w") as f:
for fid in l:
f.write(str(fid) + "\n")
for key, value in self.api.last_response.getheaders():
if key.startswith("x-rate-limit"):
self.state["followers"][key.split("-")[-1]] = int(value)
return filename
def __lookup(self, users_list):
for user in self.api.lookup_users(users_list):
yield user
def lookup(self, users_list):
uid = uuid.uuid1()
filename = os.path.join("data", "users", "lookup-" + str(uid) + ".txt")
l = list(self.__lookup(users_list))
with open(filename, "w") as f:
for user in l:
output = " ".join([str(user.id), user.screen_name,
str(user.followers_count),
str(user.friends_count),
str(user.verified)])
f.write(output + "\n")
for key, value in self.api.last_response.getheaders():
if key.startswith("x-rate-limit"):
self.state["lookup"][key.split("-")[-1]] = int(value)
return filename
def get_profile(self, user_id, username):
fh = urlopen("https://twitter.com/{0}".format(username))
soup = BeautifulSoup(fh)
ul = soup.find("ul", class_="js-mini-profile-stats")
following, followers = [li.strong.string
for li in ul.find_all("li")[1:]]
return user_id, username, followers, following
def short_lookup(self, users_list):
uid = uuid.uuid1()
filename = os.path.join("data", "users", "lookup-" + str(uid) + ".txt")
def get_output():
for user_id, username in users_list:
try:
output = " ".join(map(str, self.get_profile(user_id,
username)))
except:
pass
else:
yield output
sleep(0.5)
to_write = list(get_output())
with open(filename, "w") as f:
f.write("\n".join(to_write))
return filename
def ready(self, method):
now = int(time())
if (int(self.state[method]["remaining"]) > 0
or int(self.state[method]["reset"]) < now):
return True
else:
return False
if __name__ == "__main__":
credentials = open("api_accounts.txt").readline().strip().split()
handler = RequestHandler(*credentials[2:])
# if handler.ready("lookup"):
# handler.lookup(["304224106"])
# if handler.ready("followers"):
# handler.lookup("304224106")
# starbucks 30973
# bestbuy 17475575
# sears 19464428
# macys 50687788
# target 89084561
# gap 18462157
# mountain 9409552
# coachella 688583
id = "688583"
print handler.get_followers(id)
f = open("data/users/{0}.txt".format(id))
g = open("data/users/{0}_followers.txt".format(id), "w")
l = []
for line in f:
l.append(line.strip())
if len(l) == 100:
i = open(handler.lookup(l))
for line in i:
g.write(line)
l = []
|