summaryrefslogtreecommitdiffstats
path: root/run.py
blob: ffee06a57e593bcda385597f7283046daeebce93 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from tasks import NumFollowers, ListFollowers, normalize
from bs4 import BeautifulSoup
from celery.result import ResultSet
import os.path as op
from glob import glob

nf = NumFollowers()
lf = ListFollowers()
rset = ResultSet([])

users = {}
try:
    with open("all_users.txt") as f:
        for line in f:
            values = line.strip().split()
            users[values[0]] = int(values[1])
except IOError:
    pass

output = open("all_users.txt", "a")


def strip(url):
    if url.endswith("/friends"):
        return url[:-8]
    else:
        return url.split("&")[0]


def add_user(user, degree):
    print user, degree
    users[user] = degree
    output.write(user + " " + str(degree) + "\n")
    output.flush()


def call_back(tid, value):
    if "friends" in value:
        return

    if "nfriends" in value:
        basename, fname, getname = normalize(value["for"])
        add_user(fname, value["nfriends"])
        return

todo = ResultSet([])
for finame in glob("facebook/*"):
    with open(finame) as f:
        for line in f:
            basename, fname, getname = normalize(line.strip())
            if fname not in users:
                print finame
                todo.add(nf.delay(basename))
todo.join_native(callback=call_back)

soup = BeautifulSoup(open("seed.txt"))
links = [div.a["href"] for div in soup.findAll("div", class_="fsl")]
for link in links[:100]:
    basename, fname, getname = normalize(link)
    if not op.isfile("facebook/" + fname):
        result = lf.delay(getname)
        value = result.get()
        basename, fname, getname = normalize(strip(value["for"]))
        add_user(fname, len(value["friends"]))
        todo = ResultSet([])
        with open("facebook/" + fname, "w") as f:
            for friend in value["friends"]:
                basename, fname, getname = normalize(friend)
                f.write(basename + "\n")
                if fname not in users:
                    todo.add(nf.delay(basename))
        print ("facebook/" + fname)
        todo.join_native(callback=call_back)