from tasks import NumFollowers, ListFollowers, normalize from bs4 import BeautifulSoup from celery.result import ResultSet import os.path as op from glob import glob nf = NumFollowers() lf = ListFollowers() rset = ResultSet([]) users = {} try: with open("all_users.txt") as f: for line in f: values = line.strip().split() users[values[0]] = int(values[1]) except IOError: pass output = open("all_users.txt", "a") def strip(url): if url.endswith("/friends"): return url[:-8] else: return url.split("&")[0] def add_user(user, degree): print user, degree users[user] = degree output.write(user + " " + str(degree) + "\n") output.flush() def call_back(tid, value): if "friends" in value: return if "nfriends" in value: basename, fname, getname = normalize(value["for"]) add_user(fname, value["nfriends"]) return todo = ResultSet([]) for finame in glob("facebook/*"): with open(finame) as f: for line in f: basename, fname, getname = normalize(line.strip()) if fname not in users: print finame todo.add(nf.delay(basename)) todo.join_native(callback=call_back) soup = BeautifulSoup(open("seed.txt")) links = [div.a["href"] for div in soup.findAll("div", class_="fsl")] for link in links[:100]: basename, fname, getname = normalize(link) if not op.isfile("facebook/" + fname): result = lf.delay(getname) value = result.get() basename, fname, getname = normalize(strip(value["for"])) add_user(fname, len(value["friends"])) todo = ResultSet([]) with open("facebook/" + fname, "w") as f: for friend in value["friends"]: basename, fname, getname = normalize(friend) f.write(basename + "\n") if fname not in users: todo.add(nf.delay(basename)) print ("facebook/" + fname) todo.join_native(callback=call_back)