from xvfbwrapper import Xvfb from selenium import webdriver from selenium.common.exceptions import ElementNotVisibleException,\ NoSuchElementException, StaleElementReferenceException, WebDriverException from time import sleep from bs4 import BeautifulSoup, NavigableString from celery import Celery, Task from urllib2 import urlopen import socket app = Celery('tasks', broker='amqp://guest@horel.org//') app.conf.CELERY_RESULT_BACKEND = 'rpc' app.conf.CELERY_ENABLE_UTC = True app.conf.CELERY_ACKS_LATE = True drivers = [None] ip = socket.gethostbyname(socket.gethostname()) def strip(url): if url.endswith("/friends"): return url[:-8] else: return url.split("&")[0] def normalize(url): if "profile.php" in url: basename = url.split("&")[0] fname = basename.split("=")[-1] getname = basename + "&sk=friends" else: basename = url.split("?")[0] fname = basename.split("/")[-1] getname = basename + "/friends" return basename, fname, getname class ListFollowers(Task): @property def driver(self): if drivers[0] is None: uname, passwd = urlopen("http://horel.org:8080/").readline().strip().split() vdisplay = Xvfb() vdisplay.start() driver = webdriver.Chrome() driver.get("https://facebook.com") driver.find_element_by_id("email").send_keys(uname) elem = driver.find_element_by_id("pass") elem.send_keys(passwd) elem.submit() drivers[0] = driver return drivers[0] def run(self, url): try: self.driver.get(url) except WebDriverException: return {"friends": [], "for": url, "orig": ip} while True: for _ in xrange(5): try: footer = self.driver.find_element_by_class_name("_359") except (NoSuchElementException, ElementNotVisibleException): sleep(0.1) else: break else: break try: self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") footer.click() except StaleElementReferenceException: sleep(0.1) except WebDriverException: for _ in xrange(5): try: footer.click() except (WebDriverException, StaleElementReferenceException): sleep(0.1) else: break else: break for _ in xrange(5): try: div = self.driver.find_element_by_class_name("_30f") except NoSuchElementException: sleep(0.1) else: break else: try: self.driver.find_element_by_id("loginbutton") except NoSuchElementException: return {"friends": [], "for": url, "orig": ip} else: return {"friends": None, "for": url, "orig": ip} soup = BeautifulSoup(div.get_attribute("outerHTML")) return {"friends": [li.a["href"] for li in soup.findAll("li", class_="_698")], "for": url, "orig": ip} class NumFollowers(Task): @property def driver(self): if drivers[0] is None: uname, passwd = urlopen("http://horel.org:8080/").readline().strip().split() vdisplay = Xvfb() vdisplay.start() driver = webdriver.Chrome() driver.get("https://facebook.com") driver.find_element_by_id("email").send_keys(uname) elem = driver.find_element_by_id("pass") elem.send_keys(passwd) elem.submit() drivers[0] = driver return drivers[0] def run(self, url): try: self.driver.get(url) except WebDriverException: return {"nfriends": 0, "for": url, "orig": ip} for i in xrange(20): try: box = self.driver.find_element_by_class_name("_1f8g") except (NoSuchElementException, ElementNotVisibleException): sleep(0.1) else: break else: try: self.driver.find_element_by_id("loginbutton") except NoSuchElementException: return {"nfriends": 0, "for": url, "orig": ip} else: return {"nfriends": None, "for": url, "orig": ip} soup = BeautifulSoup(box.get_attribute("outerHTML")) a = soup.find("a", class_="uiLinkSubtle") try: n_friends = int(a.string.replace(",", "").replace(".", "").replace(" ", "").encode("ascii", "ignore")) except ValueError: n_friends = a.string print n_friends return {"nfriends": n_friends, "for": url, "orig": ip} class Likes(Task): @property def driver(self): if drivers[0] is None: uname, passwd = urlopen("http://horel.org:8080/").readline().strip().split() vdisplay = Xvfb() vdisplay.start() driver = webdriver.Chrome() driver.get("https://facebook.com") driver.find_element_by_id("email").send_keys(uname) elem = driver.find_element_by_id("pass") elem.send_keys(passwd) elem.submit() drivers[0] = driver return drivers[0] def run(self, url): try: self.driver.get(url) except WebDriverException: return {"likes": [], "for": url, "orig": ip} while True: for _ in xrange(5): try: footer = self.driver.find_element_by_class_name("_359") except (NoSuchElementException, ElementNotVisibleException): sleep(0.1) else: break else: break try: self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") footer.click() except StaleElementReferenceException: sleep(0.1) except WebDriverException: for _ in xrange(5): try: footer.click() except (WebDriverException, StaleElementReferenceException): sleep(0.1) else: break else: break for _ in xrange(5): try: div = self.driver.find_element_by_class_name("_30f") except NoSuchElementException: sleep(0.1) else: break else: try: self.driver.find_element_by_id("loginbutton") except NoSuchElementException: return {"likes": "", "for": url, "orig": ip} else: return {"likes": None, "for": url, "orig": ip} def clean(a): for child in a.children: if type(child) == NavigableString: return child else: return "" return "" soup = BeautifulSoup(div.get_attribute("outerHTML")) likes = [clean(li.find("a", class_="_gx7")) for li in soup.findAll("li", class_="_5rz")] return {"likes": u"\t".join(likes).encode("utf8"), "for": url, "orig": ip} if __name__ == "__main__": nf = Likes() with open("toto.txt", "w") as f: f.write( u"\t".join(nf.run("https://www.facebook.com/grvgaba29" + "/video_tv_show_favorite")["likes"]).encode("utf8") + "\n")