5 files changed, 997 insertions, 0 deletions
diff --git a/facebook_analysis/Makefile b/facebook_analysis/Makefile
new file mode 100644
index 0000000..e3df848
--- /dev/null
+++ b/facebook_analysis/Makefile
@@ -0,0 +1,3 @@
+all:
+	python2 setup.py build_ext --inplace
+	cython2 -a ads.pyx
diff --git a/facebook_analysis/ads.pyx b/facebook_analysis/ads.pyx
new file mode 100644
index 0000000..2682456
--- /dev/null
+++ b/facebook_analysis/ads.pyx
@@ -0,0 +1,403 @@
+#!python
+#cython: boundscheck=False, nonecheck=False
+cimport cython
+import random
+
+
+@cython.nonecheck(False)
+cdef int merge_opt(list l1, list l2, int t, dict degrees):
+    cdef int n1, n2, i, j, n, s
+    n1 = len(l1)
+    n2 = len(l2)
+    i = j = n = s = 0
+    while n < t:
+        if i == n1 and j == n2:
+            break
+        if i == n1:
+            s += degrees[l2[j]]
+            j += 1
+            n += 1
+            continue
+        if j == n2:
+            s += degrees[l1[i]]
+            i += 1
+            n += 1
+            continue
+
+        li, lj = l1[i], l2[j]
+        di, dj = degrees[li], degrees[lj]
+        if lj == li:
+            j += 1
+            s += di
+            i += 1
+            n += 1
+        elif di > dj:
+            s += di
+            i += 1
+            n += 1
+        else:
+            s += dj
+            j += 1
+            n += 1
+    return s
+
+@cython.nonecheck(False)
+cdef float merge_opt_p(list l1, list l2, int t, dict degrees, float p):
+    cdef int n1, n2, i, j
+    cdef float n, s
+    n1 = len(l1)
+    n2 = len(l2)
+    i = j = 0
+    n = s = 0.
+    while n < t:
+        if i == n1 and j == n2:
+            break
+        if i == n1:
+            s += degrees[l2[j]]*p
+            j += 1
+            n += p
+            continue
+        if j == n2:
+            s += degrees[l1[i]]*p
+            i += 1
+            n += p
+            continue
+
+        li, lj = l1[i], l2[j]
+        di, dj = degrees[li], degrees[lj]
+        if lj == li:
+            j += 1
+            s += di*p
+            i += 1
+            n += p
+        elif di > dj:
+            s += di*p
+            i += 1
+            n += p
+        else:
+            s += dj*p
+            j += 1
+            n += p
+    return s
+
+
+@cython.nonecheck(False)
+cdef float merge_opt_p_sample(list l1, list l2, int t, dict degrees, float p):
+    random.seed()
+    cdef int n1, n2, i, j
+    cdef float n, s
+    n1 = len(l1)
+    n2 = len(l2)
+    i = j = 0
+    n = s = 0.
+    cdef int k
+    cdef dict a
+    cdef float r
+    k = 0
+    r = 0
+    a = {}
+    for k in xrange(len(degrees)**2):
+        for d in degrees:
+            a[d] = random.random()
+        while n < t:
+            if i == n1 and j == n2:
+                break
+            if i == n1:
+                s += degrees[l2[j]]*p
+                j += 1
+                n += p
+                continue
+            if j == n2:
+                s += degrees[l1[i]]*p
+                i += 1
+                n += p
+                continue
+
+            li, lj = l1[i], l2[j]
+            di, dj = degrees[li], degrees[lj]
+            if lj == li:
+                j += 1
+                s += di*p
+                i += 1
+                n += p
+            elif di > dj:
+                s += di*p
+                i += 1
+                n += p
+            else:
+                s += dj*p
+                j += 1
+                n += p
+        r += s
+    r /= n**2
+    return r
+
+
+@cython.nonecheck(False)
+cdef float merge_opt_ps(list l1, list l2, int t, dict degrees, dict p):
+    cdef int n1, n2, i, j
+    cdef float n, s
+    n1 = len(l1)
+    n2 = len(l2)
+    i = j = 0
+    n = s = 0.
+    while n < t:
+        if i == n1 and j == n2:
+            break
+        if i == n1:
+            s += degrees[l2[j]]*p[l2[j]]
+            n += p[l2[j]]
+            j += 1
+            continue
+        if j == n2:
+            s += degrees[l1[i]]*p[l1[i]]
+            n += p[l1[i]]
+            i += 1
+            continue
+
+        li, lj = l1[i], l2[j]
+        di, dj = degrees[li], degrees[lj]
+        if lj == li:
+            j += 1
+            s += di*p[li]
+            n += p[li]
+            i += 1
+        elif di > dj:
+            s += di*p[li]
+            n += p[li]
+            i += 1
+        else:
+            s += dj*p[lj]
+            n += p[lj]
+            j += 1
+    return s
+
+
+@cython.nonecheck(False)
+cdef list merge(list l1, list l2, int t, dict degrees):
+    cdef int n1, n2, i, j, n
+    n1 = len(l1)
+    n2 = len(l2)
+    result = []
+    i = j = n = 0
+    while n < t:
+        if i == n1 and j == n2:
+            break
+        if i == n1:
+            result.append(l2[j])
+            j += 1
+            n += 1
+            continue
+        if j == n2:
+            result.append(l1[i])
+            i += 1
+            n += 1
+            continue
+
+        if l2[j] == l1[i]:
+            j += 1
+            result.append(l1[i])
+            i += 1
+            n += 1
+        elif degrees[l1[i]] > degrees[l2[j]]:
+            result.append(l1[i])
+            i += 1
+            n += 1
+        else:
+            result.append(l2[j])
+            j += 1
+            n += 1
+    return result
+
+@cython.nonecheck(False)
+cdef list merge_p(list l1, list l2, int t, dict degrees, float p):
+    cdef int n1, n2, i, j
+    cdef float n
+    n1 = len(l1)
+    n2 = len(l2)
+    result = []
+    i = j = 0
+    n = 0.
+    while n < t:
+        if i == n1 and j == n2:
+            break
+        if i == n1:
+            result.append(l2[j])
+            j += 1
+            n += p
+            continue
+        if j == n2:
+            result.append(l1[i])
+            i += 1
+            n += p
+            continue
+
+        if l2[j] == l1[i]:
+            j += 1
+            result.append(l1[i])
+            i += 1
+            n += p
+        elif degrees[l1[i]] > degrees[l2[j]]:
+            result.append(l1[i])
+            i += 1
+            n += p
+        else:
+            result.append(l2[j])
+            j += 1
+            n += p
+    return result
+
+
+@cython.nonecheck(False)
+cdef list merge_ps(list l1, list l2, int t, dict degrees, dict p):
+    cdef int n1, n2, i, j
+    cdef float n
+    n1 = len(l1)
+    n2 = len(l2)
+    result = []
+    i = j = 0
+    n = 0.
+    while n < t:
+        if i == n1 and j == n2:
+            break
+        if i == n1:
+            result.append(l2[j])
+            n += p[l2[j]]
+            j += 1
+            continue
+        if j == n2:
+            result.append(l1[i])
+            n += p[l1[i]]
+            i += 1
+            continue
+
+        if l2[j] == l1[i]:
+            j += 1
+            result.append(l1[i])
+            n += p[l1[i]]
+            i += 1
+        elif degrees[l1[i]] > degrees[l2[j]]:
+            result.append(l1[i])
+            n += p[l1[i]]
+            i += 1
+        else:
+            result.append(l2[j])
+            n += p[l2[j]]
+            j += 1
+    return result
+
+@cython.nonecheck(False)
+def fs(tuple a):
+    cdef int cur_val, best_diff, best_value, o, t, k
+    cdef list x
+    cdef dict graph
+    cdef dict degrees
+    t, k, x, graph, degrees = a
+    cdef list n
+    cur_val = 0
+    n = []  # neighbors of greedy set
+    for i in range(1, k - t):
+        best_diff = 0
+        best_user = None
+        best_value = 0
+        for user in x:
+            o = merge_opt(n, graph[user], t, degrees)
+            if o - cur_val > best_diff:
+                best_diff = o - cur_val
+                best_user = user
+                best_value = o
+        if best_user is not None:
+            x.remove(best_user)
+            cur_val = best_value
+            n = merge(n, graph[best_user], t, degrees)
+        else:
+            break
+    return cur_val
+
+@cython.nonecheck(False)
+def fs_p(tuple a):
+    cdef int t, k
+    cdef float o, p, best_value, best_diff, cur_val
+    cdef list x
+    cdef dict graph
+    cdef dict degrees
+    t, k, x, graph, degrees, p = a
+    cdef list n
+    cur_val = 0
+    n = []  # neighbors of greedy set
+    for i in range(1, k - t):
+        best_diff = 0
+        best_user = None
+        best_value = 0
+        for user in x:
+            o = merge_opt_p(n, graph[user], t, degrees, p)
+            if o - cur_val > best_diff:
+                best_diff = o - cur_val
+                best_user = user
+                best_value = o
+        if best_user is not None:
+            x.remove(best_user)
+            cur_val = best_value
+            n = merge_p(n, graph[best_user], t, degrees, p)
+        else:
+            break
+    return cur_val
+
+@cython.nonecheck(False)
+def fs_p_sample(tuple a):
+    cdef int t, k
+    cdef float o, p, best_value, best_diff, cur_val
+    cdef list x
+    cdef dict graph
+    cdef dict degrees
+    t, k, x, graph, degrees, p = a
+    cdef list n
+    cur_val = 0
+    n = []  # neighbors of greedy set
+    for i in range(1, k - t):
+        best_diff = 0
+        best_user = None
+        best_value = 0
+        for user in x:
+            o = merge_opt_p_sample(n, graph[user], t, degrees, p)
+            if o - cur_val > best_diff:
+                best_diff = o - cur_val
+                best_user = user
+                best_value = o
+        if best_user is not None:
+            x.remove(best_user)
+            cur_val = best_value
+            n = merge_p(n, graph[best_user], t, degrees, p)
+        else:
+            break
+    return cur_val
+
+@cython.nonecheck(False)
+def fs_ps(tuple a):
+    cdef int t, k
+    cdef float o, best_value, best_diff, cur_val
+    cdef list x
+    cdef dict graph
+    cdef dict degrees
+    cdef dict p
+    t, k, x, graph, degrees, p = a
+    cdef list n
+    cur_val = 0
+    n = []  # neighbors of greedy set
+    for i in range(1, k - t):
+        best_diff = 0
+        best_user = None
+        best_value = 0
+        for user in x:
+            o = merge_opt_ps(n, graph[user], t, degrees, p)
+            if o - cur_val > best_diff:
+                best_diff = o - cur_val
+                best_user = user
+                best_value = o
+        if best_user is not None:
+            x.remove(best_user)
+            cur_val = best_value
+            n = merge_ps(n, graph[best_user], t, degrees, p)
+        else:
+            break
+    return cur_val
diff --git a/facebook_analysis/analyze.py b/facebook_analysis/analyze.py
new file mode 100644
index 0000000..c5e6feb
--- /dev/null
+++ b/facebook_analysis/analyze.py
@@ -0,0 +1,340 @@
+from bs4 import BeautifulSoup
+import os.path as op
+from client.tasks import normalize
+from itertools import chain
+from random import sample
+from multiprocessing import Pool
+from ads import fs, fs_p, fs_ps, fs_p_sample
+import numpy as np
+import pulp
+import sys
+from random import seed, betavariate, normalvariate
+import matplotlib.pyplot as plt
+
+DATA_DIR = "../facebook_data"
+DATASETS = ["hbo", "nyt",  "lp", "google", "lmpt", "gp", "kiva", "coachella",
+            "peet", "gap"]
+SYNTH_DIR = "../apgl"
+SYNTH_DATASETS = ["b-a", "kk", "sw"]
+
+
+def build_graph2(dataset):
+    users_file = op.join(DATA_DIR, dataset + "_users.txt")
+    seed_file = op.join(DATA_DIR, dataset + ".txt")
+    degrees = {}
+    graph = {}
+    with open(users_file) as f:
+        for line in f:
+            values = line.strip().split()
+            degrees[values[0]] = int(values[1])
+
+    soup = BeautifulSoup(open(seed_file))
+    links = [div.a["href"] for div in soup.findAll("div", class_="fsl")]
+    for link in links:
+        basename, fname, getname = normalize(link)
+        long_name = op.join(DATA_DIR, "facebook", fname)
+        if not op.isfile(long_name):
+            continue
+        else:
+            with open(long_name) as f:
+                friends = [normalize(line.strip())[1] for line in f]
+                friends = [friend for friend in friends
+                           if (friend in degrees) and degrees[friend] > 0]
+                if len(friends) > 0:
+                    friends = list(set(friends))
+                    friends.sort(key=degrees.get, reverse=True)
+                    graph[fname] = friends
+                    degrees[fname] = len(friends)
+    print dataset, len(graph), len(list(sd_users(graph)))
+    return graph, degrees
+
+
+def build_graph1(dataset):
+    fname = op.join(SYNTH_DIR, dataset + ".txt")
+    degrees = {}
+    graph = {}
+    with open(fname) as fh:
+        for line in fh:
+            values = line.strip().split("\t")
+            node = int(values[0])
+            friends = zip(*[iter(values[1:])] * 2)
+            friends = [map(int, f) for f in friends]
+            for friend in friends:
+                degrees[friend[0]] = friend[1]
+            graph[node] = [friend[0] for friend in friends]
+            degrees[node] = len(graph[node])
+    print fname, len(graph), len(list(sd_users(graph)))
+    return graph, degrees
+
+
+def build_graph(dataset):
+    if dataset in DATASETS or dataset == "big":
+        return build_graph2(dataset)
+    else:
+        return build_graph1(dataset)
+
+
+def print_graph(dataset):
+    graph, degrees = build_graph(dataset)
+    with open(dataset + "_single_graph.txt", "w") as f:
+        for user, friends in graph.iteritems():
+            friends_deg = [(friend, str(degrees[friend]))
+                           for friend in friends]
+            f.write(user + "\t"
+                    + "\t".join("\t".join(friend) for friend in friends_deg)
+                    + "\n")
+
+
+def sd_users(graph):
+    return chain.from_iterable(graph.itervalues())
+
+
+def random(graph, degrees, n):
+    #n = int(len(graph) * ratio)
+    values = []
+    for _ in xrange(100):
+        users = sample(graph, n)
+        values.append(sum(degrees[user] for user in users))
+    return sum(values) / float(len(values))
+
+
+def random_friend(graph, degrees, n):
+    #n = int(len(graph) * ratio)
+    values = []
+    for _ in xrange(100):
+        users = sample(graph, n / 2)
+        values.append(sum(degrees[sample(graph[user], 1)[0]] + degrees[user]
+                          for user in users))
+    return sum(values) / float(len(values))
+
+
+def im(graph, degrees, n):
+    #n = int(len(graph) * ratio)
+    l = list(graph.iterkeys())
+    l.sort(key=lambda x: degrees[x], reverse=True)
+    return sum(degrees[user] for user in l[:n])
+
+
+def aps(graph, degrees, k, p=1, sampl=False):
+    x = list(set(graph.keys()))
+    #k = int(len(graph) * ratio)  # budget
+    P = Pool(5)
+    if p == 1:
+        m = P.map(fs, zip(range(1, k - 1),
+                          [k] * (k - 1),
+                          [x] * (k - 1),
+                          [graph] * (k - 1),
+                          [degrees] * (k - 1)))
+    elif type(p) is dict:
+        m = P.map(fs_ps, zip(range(1, k - 1),
+                             [k] * (k - 1),
+                             [x] * (k - 1),
+                             [graph] * (k - 1),
+                             [degrees] * (k - 1),
+                             [p] * (k - 1)))
+    elif sampl:
+        m = P.map(fs_p_sample, zip(range(1, k - 1),
+                                   [k] * (k - 1),
+                                   [x] * (k - 1),
+                                   [graph] * (k - 1),
+                                   [degrees] * (k - 1),
+                                   [p] * (k - 1)))
+    else:
+        m = P.map(fs_p, zip(range(1, k - 1),
+                            [k] * (k - 1),
+                            [x] * (k - 1),
+                            [graph] * (k - 1),
+                            [degrees] * (k - 1),
+                            [p] * (k - 1)))
+
+    P.close()
+    try:
+        m = max(m)
+    except ValueError:
+        m = 0
+    print m
+    return m
+
+
+def generate_degree(degrees, p, distr="beta"):
+    #plt.figure()
+    seed()
+    ps = {}
+    if distr == "beta":
+        beta = 5.
+        alpha = 5. * p / (1 - p)
+        sample = lambda d: betavariate(alpha, beta)
+    elif distr == "gauss":
+        sample = lambda d: normalvariate(p, 0.01)
+    elif distr == "power":
+        alpha = 1.
+        beta = (1. - p) / p
+        sample = lambda d: betavariate(alpha, beta)
+    elif distr == "deg":
+        s = sum((1. / d) for d in degrees.itervalues() if d != 0)
+        c = len(list(d for d in degrees if d != 0)) * p / s
+        sample = lambda d: c / d if d != 0 else p
+    for node, deg in degrees.iteritems():
+        s = sample(deg)
+        if s < 0.001:
+            ps[node] = 0.
+        elif s > 1.:
+            ps[node] = 1.
+        else:
+            ps[node] = s
+    #plt.hist(list(ps.itervalues()), 50)
+    #plt.savefig(distr + "_dist.pdf")
+    return ps
+
+
+def compute_performance(dataset):
+    graph, degrees = build_graph(dataset)
+    a = [int(len(graph) * i) for i in np.arange(0, 1.1, 0.1)]
+    r = (a,
+         [im(graph, degrees, k) for k in a],
+         [random(graph, degrees, k) for k in a],
+         [random_friend(graph, degrees, k) for k in a],
+         [aps(graph, degrees, k) for k in a])
+    with open(dataset + "_performance.txt", "w") as f:
+        f.write("\n".join("\t".join(map(str, k)) for k in zip(*r)))
+
+
+def compute_performance_p(dataset, distr=None):
+    graph, degrees = build_graph(dataset)
+    ps = np.arange(0.01, 0.99, 0.1)
+    a = [int(len(graph) * i) for i in np.arange(0, 1.1, 0.1)]
+    if distr is None:
+        l = [[aps(graph, degrees, k, p) for k in a]
+             for p in ps]
+    else:
+        l = [[aps(graph, degrees, k, generate_degree(degrees, p, distr))
+              for k in a]
+             for p in ps]
+    r = [a]
+    r += l
+    with open(dataset + "_performance_p_" + str(distr) + ".txt", "w") as f:
+        f.write("\n".join("\t".join(map(str, k)) for k in zip(*r)))
+
+
+def lp(graph, degrees, k):
+    reverse = {}
+    for user in sd_users(graph):
+        reverse[user] = []
+    for user in graph:
+        for friend in graph[user]:
+            reverse[friend].append(user)
+
+    prob = pulp.LpProblem("ads", pulp.LpMaximize)
+    x = pulp.LpVariable.dicts("x", graph.keys(), 0., 1.)
+    y = pulp.LpVariable.dicts("y", sd_users(graph), 0., 1.)
+    prob += pulp.lpSum([degrees[user] * x[user] for user in graph] +
+                       [degrees[user] * y[user] for user in sd_users(graph)])
+    for user in sd_users(graph):
+        prob += pulp.lpSum([x[u] for u in reverse[user]] + [-y[user]]) >= 0
+    prob += pulp.lpSum([x[u] for u in graph] + [y[u] for u in reverse]) <= k
+    prob.solve(pulp.COIN_CMD())
+    print "Status:", pulp.LpStatus[prob.status]
+    print "Value =", pulp.value(prob.objective)
+
+
+def lp_perf():
+    graph, degrees = build_graph("peet")
+    a = [int(len(graph) * i) for i in np.arange(0, 1.1, 0.1)]
+    r = (a,
+         #[aps(graph, degrees, k) for k in a],
+         [lp(graph, degrees, k) for k in a])
+    with open("lp_running_time.txt", "w") as f:
+        f.write("\n".join("\t".join(map(str, k)) for k in zip(*r)))
+
+
+def lp_time():
+    graph, degrees = build_graph("big")
+    sp = sample(graph.keys(), int(sys.argv[2]))
+    graph = {s: graph[s] for s in sp}
+    a = int(sys.argv[1])
+    print len(list(sd_users(graph))), a
+    lp(graph, degrees, a)
+
+
+def aps_time():
+    graph, degrees = build_graph("big")
+    sp = sample(graph.keys(), int(sys.argv[2]))
+    graph = {s: graph[s] for s in sp}
+    a = int(sys.argv[1])
+    print len(list(sd_users(graph))), a
+    aps(graph, degrees, a, p=0.9, sampl=True)
+
+
+def lp_time_big():
+    graph, degrees = build_graph("big")
+    graph_big = {}
+    for i in xrange(10):
+        for user in graph:
+            graph_big[user + str(i)] = graph[user]
+            degrees[user + str(i)] = degrees[user]
+    aps(graph_big, degrees, 500)
+
+
+def hbo_likes():
+    graph, degrees = build_graph("hbo")
+    like_file = op.join(DATA_DIR, "hbo_likes.txt")
+    likes = {}
+    for line in open(like_file):
+        values = line.strip().split("\t")
+        if "HBO" in values[1:]:
+            likes[values[0]] = True
+    a = [int(len(graph) * i) for i in np.arange(0, 1.1, 0.1)]
+    l = [aps(graph, degrees, k) for k in a]
+    for user in graph:
+        graph[user] = [friend for friend in graph[user]
+                       if (friend in degrees and friend in likes)]
+    r = (a,
+         [im(graph, degrees, k) for k in a],
+         [aps(graph, degrees, k) for k in a],
+         l)
+    with open("hbo_likes_performance.txt", "w") as f:
+        f.write("\n".join("\t".join(map(str, k)) for k in zip(*r)))
+
+
+def stats():
+    for dataset in ["coachella"]:
+        graph, degrees = build_graph(dataset)
+        print dataset, len(graph) * 7, len(list(sd_users(graph))) * 7,\
+            np.mean([degrees[u] for u in graph]),\
+            np.mean([degrees[u] for u in sd_users(graph)])
+    for dataset in ["nyt", "gap", "gp", "kiva"]:
+        graph, degrees = build_graph(dataset)
+        print dataset, len(graph) * 6, len(list(sd_users(graph))) * 6,\
+            np.mean([degrees[u] for u in graph]),\
+            np.mean([degrees[u] for u in sd_users(graph)])
+    for dataset in ["google"]:
+        graph, degrees = build_graph(dataset)
+        print dataset, len(graph) * 5, len(list(sd_users(graph))) * 5,\
+            np.mean([degrees[u] for u in graph]),\
+            np.mean([degrees[u] for u in sd_users(graph)])
+    for dataset in ["lp", "hbo", "lmpt"]:
+        graph, degrees = build_graph(dataset)
+        print dataset, len(graph) * 3, len(list(sd_users(graph))) * 3,\
+            np.mean([degrees[u] for u in graph]),\
+            np.mean([degrees[u] for u in sd_users(graph)])
+    for dataset in ["peet"]:
+        graph, degrees = build_graph(dataset)
+        print dataset, len(graph) * 2, len(list(sd_users(graph))) * 2,\
+            np.mean([degrees[u] for u in graph]),\
+            np.mean([degrees[u] for u in sd_users(graph)])
+
+if __name__ == "__main__":
+    #for dataset in SYNTH_DATASETS:
+    #    compute_performance(dataset)
+    compute_performance_p("coachella", "deg")
+    #compute_performance("coachella")
+    #hbo_likes()
+    #lp_perf()
+    #lp_time()
+    #aps_time()
+    #stats()
+    #lp_time_big()
+    # _, degrees = build_graph2("coachella")
+    # with open("coachella_degrees.txt", "w") as fh:
+    #     for deg in degrees.itervalues():
+    #         fh.write(str(deg) + "\n")
diff --git a/facebook_analysis/seed.py b/facebook_analysis/seed.py
new file mode 100644
index 0000000..7e2b851
--- /dev/null
+++ b/facebook_analysis/seed.py
@@ -0,0 +1,247 @@
+from analyze import sd_users, build_graph, DATASETS, SYNTH_DATASETS
+import matplotlib.pyplot as plt
+from matplotlib import rcParams, cm
+from matplotlib.colors import Normalize
+from matplotlib.pyplot import plot, legend, savefig, xlabel, ylabel,\
+    hist, title, subplot, tight_layout, ticklabel_format, xlim, ylim
+from mpl_toolkits.mplot3d import Axes3D
+import numpy as np
+import itertools
+
+mq = lambda x: x * 4
+
+
+def plot_degree_distributions():
+    plt.figure(figsize=(7, 3))
+    graph, degrees = build_graph("kiva")
+    fd_degrees = list(degrees[user] for user in graph)
+    sd_degrees = list(degrees[user] for user in sd_users(graph))
+    n, bins, patches = plt.hist(fd_degrees, bins=50, cumulative=True,
+                                label="Initial users", normed=True,
+                                alpha=0.5, histtype="stepfilled")
+    n, bins, patches = plt.hist(sd_degrees, bins=50, cumulative=True,
+                                histtype="stepfilled", normed=True, alpha=0.5,
+                                label="Friends")
+    ylim(ymax=1.1)
+    plt.xlabel("Degree")
+    plt.ylabel("Probability")
+    plt.legend(loc="lower right")
+    plt.savefig("dist.pdf")
+
+
+def plot_all_performances():
+    plt.figure(figsize=(7, 14))
+    for i, dataset in enumerate(DATASETS):
+        values = [map(float, line.strip().split("\t"))
+                  for line in open(dataset + "_performance.txt")]
+        a, im, rd, rdf, aps = zip(*values)
+        a, im, rd, rdf, aps = [map(mq, l) for l in (a, im, rd, rdf, aps)]
+        a = np.arange(0, 1.001, 0.1)
+        ax = plt.subplot(5, 2, i + 1)
+        #ax.set_yscale("log")
+        plt.plot(a, im, label="Max deg.")
+        plt.plot(a, rd, label="Rand.")
+        plt.plot(a, rdf, label="Rand. friend")
+        plt.plot(a, aps, label="Adapt. Seeding")
+        plt.xlabel("Budget (fraction of the total number of users)")
+        plt.ylabel("Performance")
+        if dataset == "sw":
+            titl = "SmallWord"
+        if dataset == "coachella":
+            titl = "Conf. Model"
+        if dataset == "kk":
+            titl = "Kronecker"
+        if dataset == "b-a":
+            titl = "Barabasi-Albert"
+        plt.title(titl)
+        xlim(xmax=1.1)
+    plt.legend(loc="upper center", ncol=4, bbox_to_anchor=(0, 0, 1, 1.03),
+               bbox_transform=plt.gcf().transFigure)
+    plt.tight_layout()
+    plt.savefig("test2.pdf")
+
+
+def compare_performance(fn):
+    plots = {}
+    plt.figure()
+    for dataset in DATASETS:
+        values = [map(float, line.strip().split("\t"))
+                  for line in open(dataset + "_performance.txt")]
+        a, im, rd, rdf, aps = zip(*values)
+        plots[dataset] = [j * 1. / i for (j, i) in zip(aps, im)[1:]]
+    a = map(mq, a)
+    for dataset in DATASETS:
+        plt.plot(a[1:], plots[dataset], label=dataset)
+    xlim(xmax=550)
+    plt.xlabel("Budget")
+    plt.ylabel("Performance")
+    plt.legend(loc="lower right", ncol=2, fontsize="small")
+    plt.savefig(fn)
+
+
+def compare_performance2(fn):
+    plots = {}
+    plt.figure()
+    for dataset in DATASETS:
+        values = [map(float, line.strip().split("\t"))
+                  for line in open(dataset + "_performance.txt")]
+        a, im, rd, rdf, aps = zip(*values)
+        plots[dataset] = [j * 1. / i for (j, i) in zip(aps, im)[1:]]
+    a = map(mq, a)
+    a = map(int, a)
+    z = zip(*plots.itervalues())
+    means = [np.mean(w) for w in z]
+    maxi = [np.max(w) for w in z]
+    mini = [np.min(w) for w in z]
+    ind = range(len(a[1:]))
+    width = 0.35
+    plt.bar(ind, means, width, linewidth=0.1)
+    plt.errorbar([i + width / 2. for i in ind], means, [mini, maxi], elinewidth=1.2, fmt="none")
+    plt.xticks([i + width / 2. for i in ind], a[1:])
+    plt.xlim(-width, len(ind) - 1 + 2 * width)
+    plt.xlabel("Budget")
+    plt.ylabel("Relative improvement")
+    plt.savefig(fn)
+
+
+def compare_dist():
+    fd, sd = [], []
+    plt.figure(figsize=(5, 3))
+    cm = iter(rcParams["axes.color_cycle"])
+    for dataset in DATASETS:
+        graph, degrees = build_graph(dataset)
+        fd_degrees = list(degrees[user] for user in graph)
+        sd_degrees = list(degrees[user] for user in sd_users(graph))
+        fd.append(np.mean(fd_degrees))
+        sd.append(np.mean(sd_degrees))
+    ind = range(len(DATASETS))
+    width = 0.35
+    plt.bar(ind, fd, width, label="Initial users", color=next(cm))
+    plt.bar([i + width for i in ind], sd, width, label="Friends",
+            color=next(cm))
+    plt.xlim(-width, len(ind) - 1 + 3 * width)
+    plt.xticks([i + width for i in ind], DATASETS)
+    plt.ylabel("Avg. degree")
+    plt.legend()
+    plt.savefig("para.pdf")
+
+
+def plot_perf_prob():
+    plt.figure()
+    with open("peet_performance_p.txt") as f:
+        values = [map(float, line.strip().split("\t")) for line in f]
+        values = zip(*values)
+        a = [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+        for i in [0, 1, 2, 3, 5, 9]:
+            plt.plot(values[0], values[i + 1], label="$p = " + str(a[i]) + "$")
+        plt.legend()
+    with open("peet_performance.txt") as f:
+        values = [map(float, line.strip().split("\t")) for line in f]
+        values = zip(*values)
+        plt.gca().set_yscale("log")
+        plt.xlabel("Budget")
+        plt.ylabel("Performance")
+        plt.plot(values[0], values[1], label="Max. degree")
+        plt.legend(loc="lower right", fontsize="small", ncol=2)
+        xlim(xmax=450)
+        plt.savefig("prob.pdf")
+
+
+def plot_hbo_likes():
+    plt.figure()
+    rcParams["font.size"] = 6
+    with open("hbo_likes_performance.txt") as f:
+        values = [map(float, line.strip().split("\t")) for line in f]
+    a, im, aps, apso = zip(*values)
+    a = np.arange(0, 1.001, 0.1)
+    plt.gca().set_yscale("log")
+    #plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
+    plt.plot(a, map(mq, im), label="Max. degr.")
+    plt.plot(a, map(mq, aps), label="Adapt. seed. (rest.)")
+    plt.plot(a, map(mq, apso), label="Adapt. seed.")
+    plt.xlabel("Budget")
+    plt.ylabel("Performance")
+    xlim(xmax=1.1)
+    plt.legend(loc="lower right")
+    plt.savefig("hbo_likes.pdf")
+
+
+def plot_3d():
+    for dist in ["beta", "gauss"]:
+        fig = plt.figure()
+        with open("coachella_performance_p_" + dist + ".txt") as f:
+            values = [map(float, line.strip().split("\t")) for line in f]
+        k = np.arange(0, 1.001, 0.1)
+        ps = np.arange(0.01, 0.99, 0.1)
+        x, y = np.meshgrid(k, ps)
+        perfs = [value[1:] for value in values]
+        perfs = zip(*perfs)
+        ax = fig.add_subplot(111, projection='3d')
+        ax.plot_wireframe(x, y, perfs, linewidth=0.1)
+        ticklabel_format(style='sci', axis='z', scilimits=(0, 0))
+        xlabel("Budget (fraction of nodes)")
+        ylabel("Distribution mean")
+        ax.set_zlabel("Performance")
+        ax.invert_xaxis()
+        plt.savefig(dist + ".pdf")
+        plt.show()
+
+
+def plot_time():
+    plt.figure()
+    rcParams["font.size"] = 6
+    a1 = np.loadtxt("time_aps_100.txt")
+    a2 = np.loadtxt("time_aps_500.txt")
+    lp1 = np.loadtxt("time_lp_100.txt")
+    lp2 = np.loadtxt("time_lp_500.txt")
+    subplot(2, 2, 1)
+    plot(a1[:, 0], a1[:, 1], "-", label="Comb.")
+    plot(lp1[:, 0], lp1[:, 1], "-", label="LP")
+    xlabel("n")
+    ylabel("time (s)")
+    xlim(0, 100000)
+    legend(loc="upper left")
+    ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
+    subplot(2, 2, 2)
+    plot(a1[:, 0], a1[:, 2], "-", label="Comb.")
+    plot(lp1[:, 0], lp1[:, 2], "-", label="LP")
+    ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
+    xlabel("n")
+    ylabel("\# cycles")
+    xlim(0, 100000)
+    legend(loc="upper left")
+    subplot(2, 2, 3)
+    plot(a2[:, 0], a2[:, 1], "-", label="Comb.")
+    plot(lp2[:, 0], lp2[:, 1], "-", label="LP")
+    ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
+    xlabel("n")
+    ylabel("time (s)")
+    xlim(0, 100000)
+    legend(loc="upper left")
+    subplot(2, 2, 4)
+    plot(a2[:, 0], a2[:, 2], "-", label="Comb.")
+    plot(lp2[:, 0], lp2[:, 2], "-", label="LP")
+    ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
+    xlabel("n")
+    ylabel("\# cycles")
+    xlim(0, 100000)
+    legend(loc="upper left")
+    tight_layout(h_pad=-0.5)
+    savefig("time.pdf")
+
+
+if __name__ == "__main__":
+    SYNTH_DATASETS = ["b-a", "kk", "sw", "coachella"]
+    DATASETS = SYNTH_DATASETS
+    plot_all_performances()
+    #plot_3d()
+    #plot_hbo_likes()
+    #compare_performance()
+    #plot_perf_prob()
+    #compare_dist()
+    #plot_time()
+    #plot_degree_distributions()
+    #for style in plt.style.available:
+    #    plt.style.use(style)
+    #    compare_performance("performance_" + style + ".pdf")
+    #compare_performance2("comp4_" + ".pdf")
diff --git a/facebook_analysis/setup.py b/facebook_analysis/setup.py
new file mode 100644
index 0000000..043e734
--- /dev/null
+++ b/facebook_analysis/setup.py
@@ -0,0 +1,4 @@
+from distutils.core import setup
+from Cython.Build import cythonize
+
+setup(ext_modules=cythonize("ads.pyx"))