diff options
Diffstat (limited to 'run.py')
| -rw-r--r-- | run.py | 73 |
1 files changed, 0 insertions, 73 deletions
@@ -1,73 +0,0 @@ -from tasks import NumFollowers, ListFollowers, normalize -from bs4 import BeautifulSoup -from celery.result import ResultSet -import os.path as op -from glob import glob - -nf = NumFollowers() -lf = ListFollowers() -rset = ResultSet([]) - -users = {} -try: - with open("all_users.txt") as f: - for line in f: - values = line.strip().split() - users[values[0]] = int(values[1]) -except IOError: - pass - -output = open("all_users.txt", "a") - - -def strip(url): - if url.endswith("/friends"): - return url[:-8] - else: - return url.split("&")[0] - - -def add_user(user, degree): - print user, degree - users[user] = degree - output.write(user + " " + str(degree) + "\n") - output.flush() - - -def call_back(tid, value): - if "friends" in value: - return - - if "nfriends" in value: - basename, fname, getname = normalize(value["for"]) - add_user(fname, value["nfriends"]) - return - -todo = ResultSet([]) -for finame in glob("facebook/*"): - with open(finame) as f: - for line in f: - basename, fname, getname = normalize(line.strip()) - if fname not in users: - print finame - todo.add(nf.delay(basename)) -todo.join_native(callback=call_back) - -soup = BeautifulSoup(open("seed.txt")) -links = [div.a["href"] for div in soup.findAll("div", class_="fsl")] -for link in links[:100]: - basename, fname, getname = normalize(link) - if not op.isfile("facebook/" + fname): - result = lf.delay(getname) - value = result.get() - basename, fname, getname = normalize(strip(value["for"])) - add_user(fname, len(value["friends"])) - todo = ResultSet([]) - with open("facebook/" + fname, "w") as f: - for friend in value["friends"]: - basename, fname, getname = normalize(friend) - f.write(basename + "\n") - if fname not in users: - todo.add(nf.delay(basename)) - print ("facebook/" + fname) - todo.join_native(callback=call_back) |
