from selenium import webdriver from selenium.webdriver.support.wait import WebDriverWait from selenium.common.exceptions import TimeoutException,\ ElementNotVisibleException, NoSuchElementException from bs4 import BeautifulSoup, Tag import os.path class Driver: def __init__(self, username, password): self.driver = webdriver.PhantomJS() self.username = username self.password = password self.__connect() def __ajax_complete(self): return 0 == self.driver.execute_script("return jQuery.active") def __connect(self): driver = self.driver driver.get("http://twitter.com") driver.find_element_by_id("signin-email").send_keys(self.username) elem = driver.find_element_by_id("signin-password") elem.send_keys(self.password) elem.submit() def __get_followers(self, username): driver = self.driver driver.get("https://twitter.com/{0}/followers".format(username)) try: footer = driver.find_element_by_class_name("timeline-end") except NoSuchElementException: return while True: try: if "has-more-items" not in footer.get_attribute("class"): break footer.click() try: WebDriverWait(driver, 5).until(lambda x: self.__ajax_complete(), "Timeout waiting for " "ajax to return") except TimeoutException: break except (NoSuchElementException, ElementNotVisibleException): break try: fws = driver.find_element_by_id("stream-items-id") except NoSuchElementException: return soup = BeautifulSoup(fws.get_attribute("outerHTML")) for follower in soup.ol: if type(follower) == Tag: div = follower.div user_id = div["data-user-id"] screen_name = div["data-screen-name"] yield user_id, screen_name def get_followers(self, user_id, username): filename = os.path.join("data", "users", user_id + ".txt") if os.path.isfile(filename): return filename l = list(self.__get_followers(username)) with open(filename, "w") as f: for (fid, fname) in l: f.write(fid + " " + fname + "\n") return filename def process(self, filename): with open(filename) as f: for line in f: values = line.strip().split() self.get_followers(*values[:2]) if __name__ == "__main__": credentials = open("scraping_accounts.txt").readline().strip().split() driver = Driver(*credentials[:2]) # driver.get_followers("23302126", "flipper509") print driver.get_profile(100, "thibauthorel")