from tweepy import API, OAuthHandler from tweepy import cursor from bs4 import BeautifulSoup import os.path import uuid from time import time, sleep from urllib import urlopen class RequestHandler: def __init__(self, *args): auth = OAuthHandler(*args[0:2]) auth.set_access_token(*args[2:]) self.api = API(auth) self.state = {} limits = self.api.rate_limit_status() self.state["followers"] = limits["resources"]["followers"]["/followers/ids"] self.state["lookup"] = limits["resources"]["users"]["/users/lookup"] def __get_followers(self, user_id): pages = cursor.Cursor(self.api.followers_ids, id=user_id).pages(1) for page in pages: for follower in page: yield follower def get_followers(self, user_id): filename = os.path.join("data", "users", user_id + ".txt") if os.path.isfile(filename): return filename l = list(self.__get_followers(user_id)) with open(filename, "w") as f: for fid in l: f.write(str(fid) + "\n") for key, value in self.api.last_response.getheaders(): if key.startswith("x-rate-limit"): self.state["followers"][key.split("-")[-1]] = int(value) return filename def __lookup(self, users_list): for user in self.api.lookup_users(users_list): yield user def lookup(self, users_list): uid = uuid.uuid1() filename = os.path.join("data", "users", "lookup-" + str(uid) + ".txt") l = list(self.__lookup(users_list)) with open(filename, "w") as f: for user in l: output = " ".join([str(user.id), user.screen_name, str(user.followers_count), str(user.friends_count), str(user.verified)]) f.write(output + "\n") for key, value in self.api.last_response.getheaders(): if key.startswith("x-rate-limit"): self.state["lookup"][key.split("-")[-1]] = int(value) return filename def get_profile(self, user_id, username): fh = urlopen("https://twitter.com/{0}".format(username)) soup = BeautifulSoup(fh) ul = soup.find("ul", class_="js-mini-profile-stats") following, followers = [li.strong.string for li in ul.find_all("li")[1:]] return user_id, username, followers, following def short_lookup(self, users_list): uid = uuid.uuid1() filename = os.path.join("data", "users", "lookup-" + str(uid) + ".txt") def get_output(): for user_id, username in users_list: try: output = " ".join(map(str, self.get_profile(user_id, username))) except: pass else: yield output sleep(0.5) to_write = list(get_output()) with open(filename, "w") as f: f.write("\n".join(to_write)) return filename def ready(self, method): now = int(time()) if (int(self.state[method]["remaining"]) > 0 or int(self.state[method]["reset"]) < now): return True else: return False if __name__ == "__main__": credentials = open("api_accounts.txt").readline().strip().split() handler = RequestHandler(*credentials[2:]) # if handler.ready("lookup"): # handler.lookup(["304224106"]) # if handler.ready("followers"): # handler.lookup("304224106") # starbucks 30973 # bestbuy 17475575 # sears 19464428 # macys 50687788 # target 89084561 # gap 18462157 # mountain 9409552 # coachella 688583 id = "688583" print handler.get_followers(id) f = open("data/users/{0}.txt".format(id)) g = open("data/users/{0}_followers.txt".format(id), "w") l = [] for line in f: l.append(line.strip()) if len(l) == 100: i = open(handler.lookup(l)) for line in i: g.write(line) l = []