summaryrefslogtreecommitdiffstats
path: root/run.py
diff options
context:
space:
mode:
Diffstat (limited to 'run.py')
-rw-r--r--run.py73
1 files changed, 73 insertions, 0 deletions
diff --git a/run.py b/run.py
new file mode 100644
index 0000000..ffee06a
--- /dev/null
+++ b/run.py
@@ -0,0 +1,73 @@
+from tasks import NumFollowers, ListFollowers, normalize
+from bs4 import BeautifulSoup
+from celery.result import ResultSet
+import os.path as op
+from glob import glob
+
+nf = NumFollowers()
+lf = ListFollowers()
+rset = ResultSet([])
+
+users = {}
+try:
+ with open("all_users.txt") as f:
+ for line in f:
+ values = line.strip().split()
+ users[values[0]] = int(values[1])
+except IOError:
+ pass
+
+output = open("all_users.txt", "a")
+
+
+def strip(url):
+ if url.endswith("/friends"):
+ return url[:-8]
+ else:
+ return url.split("&")[0]
+
+
+def add_user(user, degree):
+ print user, degree
+ users[user] = degree
+ output.write(user + " " + str(degree) + "\n")
+ output.flush()
+
+
+def call_back(tid, value):
+ if "friends" in value:
+ return
+
+ if "nfriends" in value:
+ basename, fname, getname = normalize(value["for"])
+ add_user(fname, value["nfriends"])
+ return
+
+todo = ResultSet([])
+for finame in glob("facebook/*"):
+ with open(finame) as f:
+ for line in f:
+ basename, fname, getname = normalize(line.strip())
+ if fname not in users:
+ print finame
+ todo.add(nf.delay(basename))
+todo.join_native(callback=call_back)
+
+soup = BeautifulSoup(open("seed.txt"))
+links = [div.a["href"] for div in soup.findAll("div", class_="fsl")]
+for link in links[:100]:
+ basename, fname, getname = normalize(link)
+ if not op.isfile("facebook/" + fname):
+ result = lf.delay(getname)
+ value = result.get()
+ basename, fname, getname = normalize(strip(value["for"]))
+ add_user(fname, len(value["friends"]))
+ todo = ResultSet([])
+ with open("facebook/" + fname, "w") as f:
+ for friend in value["friends"]:
+ basename, fname, getname = normalize(friend)
+ f.write(basename + "\n")
+ if fname not in users:
+ todo.add(nf.delay(basename))
+ print ("facebook/" + fname)
+ todo.join_native(callback=call_back)