from selenium import webdriver from selenium.webdriver.support.wait import WebDriverWait from selenium.common.exceptions import TimeoutException,\ ElementNotVisibleException, NoSuchElementException from bs4 import BeautifulSoup, Tag import os.path from urllib2 import URLError from time import sleep class Driver: def __init__(self, username, password): self.driver = webdriver.PhantomJS() self.username = username self.password = password self.__connect() def __ajax_complete(self): return 0 == self.driver.execute_script("return jQuery.active") def __connect(self): driver = self.driver driver.get("http://twitter.com") driver.find_element_by_id("signin-email").send_keys(self.username) elem = driver.find_element_by_id("signin-password") elem.send_keys(self.password) elem.submit() def __get_followers(self, username): driver = self.driver try: driver.get("https://twitter.com/{0}/followers".format(username)) except URLError: sleep(1) try: driver.get("https://twitter.com/{0}/followers".format(username)) except URLError: return try: footer = driver.find_element_by_class_name("timeline-end") except NoSuchElementException: return while True: try: if "has-more-items" not in footer.get_attribute("class"): break footer.click() try: WebDriverWait(driver, 5).until(lambda x: self.__ajax_complete(), "Timeout waiting for " "ajax to return") except TimeoutException: break except (NoSuchElementException, ElementNotVisibleException): break try: fws = driver.find_element_by_id("stream-items-id") except NoSuchElementException: return soup = BeautifulSoup(fws.get_attribute("outerHTML")) for follower in soup.ol: if type(follower) == Tag: div = follower.div user_id = div["data-user-id"] screen_name = div["data-screen-name"] yield user_id, screen_name def get_followers(self, user_id, username): filename = os.path.join("data", "users", user_id + ".txt") if os.path.isfile(filename): return filename l = list(self.__get_followers(username)) with open(filename, "w") as f: for (fid, fname) in l: f.write(fid + " " + fname + "\n") return filename def process(self, filename): with open(filename) as f: for line in f: values = line.strip().split() self.get_followers(*values[:2]) if __name__ == "__main__": credentials = open("scraping_accounts.txt").readline().strip().split() driver = Driver(*credentials[:2]) # driver.get_followers("23302126", "flipper509") print driver.get_profile(100, "thibauthorel")