1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
|
from tasks import NumFollowers, ListFollowers, normalize, strip
from bs4 import BeautifulSoup
from celery.result import ResultSet
import os.path as op
from datetime import datetime
import sys
nf = NumFollowers()
lf = ListFollowers()
users = {}
try:
with open(sys.argv[1]) as f:
for line in f:
values = line.strip().split()
users[values[0]] = int(values[1].replace(",", "").replace(".", "").replace(" ", "").encode("ascii", "ignore"))
except IOError:
pass
output = open(sys.argv[1], "a")
bad = open("bad.txt", "a")
def add_user(user, degree):
users[user] = degree
output.write(user + " " + str(degree) + "\n")
def call_back(tid, value):
print datetime.now().isoformat() + " " + str(value)
if "nfriends" in value:
if value["nfriends"] is None:
bad.write(value["orig"] + "\n")
bad.flush()
return
basename, fname, getname = normalize(value["for"])
n_friends = int(str(value["nfriends"]).replace(",", "").replace(".", "").replace(" ", "").encode("ascii", "ignore"))
add_user(fname, n_friends)
return
if sys.argv[4] == "True":
todo = ResultSet([])
soup = BeautifulSoup(open(sys.argv[2]))
links = [div.a["href"] for div in soup.findAll("div", class_="fsl")]
chunk = []
for link in links:
basename, finame, getname = normalize(link)
if op.isfile("facebook/" + finame):
with open("facebook/" + finame) as f:
for line in f:
basename, fname, getname = normalize(line.strip())
if fname not in users:
print finame
todo.add(nf.delay(basename))
todo.join_native(callback=call_back)
todo = []
def call_back_fd(tid, value):
print datetime.now().isoformat() + " " + str(value)
if value["friends"] is None:
bad.write(value["orig"] + "\n")
bad.flush()
return
basename, fname, getname = normalize(strip(value["for"]))
add_user(fname, len(value["friends"]))
with open("facebook/" + fname, "w") as f:
for friend in value["friends"]:
basename, fname, getname = normalize(friend)
f.write(basename + "\n")
if fname not in users:
todo.append(basename)
soup = BeautifulSoup(open(sys.argv[2]))
links = [div.a["href"] for div in soup.findAll("div", class_="fsl")]
chunk = []
for link in links:
basename, fname, getname = normalize(link)
if not op.isfile("facebook/" + fname):
chunk.append(getname)
if len(chunk) == int(sys.argv[3]):
todofd = ResultSet([])
for name in chunk:
todofd.add(lf.delay(name))
chunk = []
todofd.join_native(callback=call_back_fd)
todos = ResultSet([])
for name in todo:
todos.add(nf.delay(name))
todo = []
todos.join_native(callback=call_back)
|