summaryrefslogtreecommitdiffstats
path: root/run.py
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2014-10-24 12:16:51 -0400
committerThibaut Horel <thibaut.horel@gmail.com>2014-10-24 12:16:51 -0400
commitece1d828d53d6123fcecb5ea8bf9b126d1728ccc (patch)
treeb669382d0e5f1234556d1aeb7fa919891510b24d /run.py
parent7426d8ff0e7969eb1a86bdb5bec8a0c971309e2b (diff)
downloadfast-seeding-ece1d828d53d6123fcecb5ea8bf9b126d1728ccc.tar.gz
Add code
Diffstat (limited to 'run.py')
-rw-r--r--run.py73
1 files changed, 0 insertions, 73 deletions
diff --git a/run.py b/run.py
deleted file mode 100644
index ffee06a..0000000
--- a/run.py
+++ /dev/null
@@ -1,73 +0,0 @@
-from tasks import NumFollowers, ListFollowers, normalize
-from bs4 import BeautifulSoup
-from celery.result import ResultSet
-import os.path as op
-from glob import glob
-
-nf = NumFollowers()
-lf = ListFollowers()
-rset = ResultSet([])
-
-users = {}
-try:
- with open("all_users.txt") as f:
- for line in f:
- values = line.strip().split()
- users[values[0]] = int(values[1])
-except IOError:
- pass
-
-output = open("all_users.txt", "a")
-
-
-def strip(url):
- if url.endswith("/friends"):
- return url[:-8]
- else:
- return url.split("&")[0]
-
-
-def add_user(user, degree):
- print user, degree
- users[user] = degree
- output.write(user + " " + str(degree) + "\n")
- output.flush()
-
-
-def call_back(tid, value):
- if "friends" in value:
- return
-
- if "nfriends" in value:
- basename, fname, getname = normalize(value["for"])
- add_user(fname, value["nfriends"])
- return
-
-todo = ResultSet([])
-for finame in glob("facebook/*"):
- with open(finame) as f:
- for line in f:
- basename, fname, getname = normalize(line.strip())
- if fname not in users:
- print finame
- todo.add(nf.delay(basename))
-todo.join_native(callback=call_back)
-
-soup = BeautifulSoup(open("seed.txt"))
-links = [div.a["href"] for div in soup.findAll("div", class_="fsl")]
-for link in links[:100]:
- basename, fname, getname = normalize(link)
- if not op.isfile("facebook/" + fname):
- result = lf.delay(getname)
- value = result.get()
- basename, fname, getname = normalize(strip(value["for"]))
- add_user(fname, len(value["friends"]))
- todo = ResultSet([])
- with open("facebook/" + fname, "w") as f:
- for friend in value["friends"]:
- basename, fname, getname = normalize(friend)
- f.write(basename + "\n")
- if fname not in users:
- todo.add(nf.delay(basename))
- print ("facebook/" + fname)
- todo.join_native(callback=call_back)