summaryrefslogtreecommitdiffstats
path: root/facebook_analysis
diff options
context:
space:
mode:
Diffstat (limited to 'facebook_analysis')
-rw-r--r--facebook_analysis/Makefile3
-rw-r--r--facebook_analysis/ads.pyx403
-rw-r--r--facebook_analysis/analyze.py340
-rw-r--r--facebook_analysis/seed.py247
-rw-r--r--facebook_analysis/setup.py4
5 files changed, 997 insertions, 0 deletions
diff --git a/facebook_analysis/Makefile b/facebook_analysis/Makefile
new file mode 100644
index 0000000..e3df848
--- /dev/null
+++ b/facebook_analysis/Makefile
@@ -0,0 +1,3 @@
+all:
+ python2 setup.py build_ext --inplace
+ cython2 -a ads.pyx
diff --git a/facebook_analysis/ads.pyx b/facebook_analysis/ads.pyx
new file mode 100644
index 0000000..2682456
--- /dev/null
+++ b/facebook_analysis/ads.pyx
@@ -0,0 +1,403 @@
+#!python
+#cython: boundscheck=False, nonecheck=False
+cimport cython
+import random
+
+
+@cython.nonecheck(False)
+cdef int merge_opt(list l1, list l2, int t, dict degrees):
+ cdef int n1, n2, i, j, n, s
+ n1 = len(l1)
+ n2 = len(l2)
+ i = j = n = s = 0
+ while n < t:
+ if i == n1 and j == n2:
+ break
+ if i == n1:
+ s += degrees[l2[j]]
+ j += 1
+ n += 1
+ continue
+ if j == n2:
+ s += degrees[l1[i]]
+ i += 1
+ n += 1
+ continue
+
+ li, lj = l1[i], l2[j]
+ di, dj = degrees[li], degrees[lj]
+ if lj == li:
+ j += 1
+ s += di
+ i += 1
+ n += 1
+ elif di > dj:
+ s += di
+ i += 1
+ n += 1
+ else:
+ s += dj
+ j += 1
+ n += 1
+ return s
+
+@cython.nonecheck(False)
+cdef float merge_opt_p(list l1, list l2, int t, dict degrees, float p):
+ cdef int n1, n2, i, j
+ cdef float n, s
+ n1 = len(l1)
+ n2 = len(l2)
+ i = j = 0
+ n = s = 0.
+ while n < t:
+ if i == n1 and j == n2:
+ break
+ if i == n1:
+ s += degrees[l2[j]]*p
+ j += 1
+ n += p
+ continue
+ if j == n2:
+ s += degrees[l1[i]]*p
+ i += 1
+ n += p
+ continue
+
+ li, lj = l1[i], l2[j]
+ di, dj = degrees[li], degrees[lj]
+ if lj == li:
+ j += 1
+ s += di*p
+ i += 1
+ n += p
+ elif di > dj:
+ s += di*p
+ i += 1
+ n += p
+ else:
+ s += dj*p
+ j += 1
+ n += p
+ return s
+
+
+@cython.nonecheck(False)
+cdef float merge_opt_p_sample(list l1, list l2, int t, dict degrees, float p):
+ random.seed()
+ cdef int n1, n2, i, j
+ cdef float n, s
+ n1 = len(l1)
+ n2 = len(l2)
+ i = j = 0
+ n = s = 0.
+ cdef int k
+ cdef dict a
+ cdef float r
+ k = 0
+ r = 0
+ a = {}
+ for k in xrange(len(degrees)**2):
+ for d in degrees:
+ a[d] = random.random()
+ while n < t:
+ if i == n1 and j == n2:
+ break
+ if i == n1:
+ s += degrees[l2[j]]*p
+ j += 1
+ n += p
+ continue
+ if j == n2:
+ s += degrees[l1[i]]*p
+ i += 1
+ n += p
+ continue
+
+ li, lj = l1[i], l2[j]
+ di, dj = degrees[li], degrees[lj]
+ if lj == li:
+ j += 1
+ s += di*p
+ i += 1
+ n += p
+ elif di > dj:
+ s += di*p
+ i += 1
+ n += p
+ else:
+ s += dj*p
+ j += 1
+ n += p
+ r += s
+ r /= n**2
+ return r
+
+
+@cython.nonecheck(False)
+cdef float merge_opt_ps(list l1, list l2, int t, dict degrees, dict p):
+ cdef int n1, n2, i, j
+ cdef float n, s
+ n1 = len(l1)
+ n2 = len(l2)
+ i = j = 0
+ n = s = 0.
+ while n < t:
+ if i == n1 and j == n2:
+ break
+ if i == n1:
+ s += degrees[l2[j]]*p[l2[j]]
+ n += p[l2[j]]
+ j += 1
+ continue
+ if j == n2:
+ s += degrees[l1[i]]*p[l1[i]]
+ n += p[l1[i]]
+ i += 1
+ continue
+
+ li, lj = l1[i], l2[j]
+ di, dj = degrees[li], degrees[lj]
+ if lj == li:
+ j += 1
+ s += di*p[li]
+ n += p[li]
+ i += 1
+ elif di > dj:
+ s += di*p[li]
+ n += p[li]
+ i += 1
+ else:
+ s += dj*p[lj]
+ n += p[lj]
+ j += 1
+ return s
+
+
+@cython.nonecheck(False)
+cdef list merge(list l1, list l2, int t, dict degrees):
+ cdef int n1, n2, i, j, n
+ n1 = len(l1)
+ n2 = len(l2)
+ result = []
+ i = j = n = 0
+ while n < t:
+ if i == n1 and j == n2:
+ break
+ if i == n1:
+ result.append(l2[j])
+ j += 1
+ n += 1
+ continue
+ if j == n2:
+ result.append(l1[i])
+ i += 1
+ n += 1
+ continue
+
+ if l2[j] == l1[i]:
+ j += 1
+ result.append(l1[i])
+ i += 1
+ n += 1
+ elif degrees[l1[i]] > degrees[l2[j]]:
+ result.append(l1[i])
+ i += 1
+ n += 1
+ else:
+ result.append(l2[j])
+ j += 1
+ n += 1
+ return result
+
+@cython.nonecheck(False)
+cdef list merge_p(list l1, list l2, int t, dict degrees, float p):
+ cdef int n1, n2, i, j
+ cdef float n
+ n1 = len(l1)
+ n2 = len(l2)
+ result = []
+ i = j = 0
+ n = 0.
+ while n < t:
+ if i == n1 and j == n2:
+ break
+ if i == n1:
+ result.append(l2[j])
+ j += 1
+ n += p
+ continue
+ if j == n2:
+ result.append(l1[i])
+ i += 1
+ n += p
+ continue
+
+ if l2[j] == l1[i]:
+ j += 1
+ result.append(l1[i])
+ i += 1
+ n += p
+ elif degrees[l1[i]] > degrees[l2[j]]:
+ result.append(l1[i])
+ i += 1
+ n += p
+ else:
+ result.append(l2[j])
+ j += 1
+ n += p
+ return result
+
+
+@cython.nonecheck(False)
+cdef list merge_ps(list l1, list l2, int t, dict degrees, dict p):
+ cdef int n1, n2, i, j
+ cdef float n
+ n1 = len(l1)
+ n2 = len(l2)
+ result = []
+ i = j = 0
+ n = 0.
+ while n < t:
+ if i == n1 and j == n2:
+ break
+ if i == n1:
+ result.append(l2[j])
+ n += p[l2[j]]
+ j += 1
+ continue
+ if j == n2:
+ result.append(l1[i])
+ n += p[l1[i]]
+ i += 1
+ continue
+
+ if l2[j] == l1[i]:
+ j += 1
+ result.append(l1[i])
+ n += p[l1[i]]
+ i += 1
+ elif degrees[l1[i]] > degrees[l2[j]]:
+ result.append(l1[i])
+ n += p[l1[i]]
+ i += 1
+ else:
+ result.append(l2[j])
+ n += p[l2[j]]
+ j += 1
+ return result
+
+@cython.nonecheck(False)
+def fs(tuple a):
+ cdef int cur_val, best_diff, best_value, o, t, k
+ cdef list x
+ cdef dict graph
+ cdef dict degrees
+ t, k, x, graph, degrees = a
+ cdef list n
+ cur_val = 0
+ n = [] # neighbors of greedy set
+ for i in range(1, k - t):
+ best_diff = 0
+ best_user = None
+ best_value = 0
+ for user in x:
+ o = merge_opt(n, graph[user], t, degrees)
+ if o - cur_val > best_diff:
+ best_diff = o - cur_val
+ best_user = user
+ best_value = o
+ if best_user is not None:
+ x.remove(best_user)
+ cur_val = best_value
+ n = merge(n, graph[best_user], t, degrees)
+ else:
+ break
+ return cur_val
+
+@cython.nonecheck(False)
+def fs_p(tuple a):
+ cdef int t, k
+ cdef float o, p, best_value, best_diff, cur_val
+ cdef list x
+ cdef dict graph
+ cdef dict degrees
+ t, k, x, graph, degrees, p = a
+ cdef list n
+ cur_val = 0
+ n = [] # neighbors of greedy set
+ for i in range(1, k - t):
+ best_diff = 0
+ best_user = None
+ best_value = 0
+ for user in x:
+ o = merge_opt_p(n, graph[user], t, degrees, p)
+ if o - cur_val > best_diff:
+ best_diff = o - cur_val
+ best_user = user
+ best_value = o
+ if best_user is not None:
+ x.remove(best_user)
+ cur_val = best_value
+ n = merge_p(n, graph[best_user], t, degrees, p)
+ else:
+ break
+ return cur_val
+
+@cython.nonecheck(False)
+def fs_p_sample(tuple a):
+ cdef int t, k
+ cdef float o, p, best_value, best_diff, cur_val
+ cdef list x
+ cdef dict graph
+ cdef dict degrees
+ t, k, x, graph, degrees, p = a
+ cdef list n
+ cur_val = 0
+ n = [] # neighbors of greedy set
+ for i in range(1, k - t):
+ best_diff = 0
+ best_user = None
+ best_value = 0
+ for user in x:
+ o = merge_opt_p_sample(n, graph[user], t, degrees, p)
+ if o - cur_val > best_diff:
+ best_diff = o - cur_val
+ best_user = user
+ best_value = o
+ if best_user is not None:
+ x.remove(best_user)
+ cur_val = best_value
+ n = merge_p(n, graph[best_user], t, degrees, p)
+ else:
+ break
+ return cur_val
+
+@cython.nonecheck(False)
+def fs_ps(tuple a):
+ cdef int t, k
+ cdef float o, best_value, best_diff, cur_val
+ cdef list x
+ cdef dict graph
+ cdef dict degrees
+ cdef dict p
+ t, k, x, graph, degrees, p = a
+ cdef list n
+ cur_val = 0
+ n = [] # neighbors of greedy set
+ for i in range(1, k - t):
+ best_diff = 0
+ best_user = None
+ best_value = 0
+ for user in x:
+ o = merge_opt_ps(n, graph[user], t, degrees, p)
+ if o - cur_val > best_diff:
+ best_diff = o - cur_val
+ best_user = user
+ best_value = o
+ if best_user is not None:
+ x.remove(best_user)
+ cur_val = best_value
+ n = merge_ps(n, graph[best_user], t, degrees, p)
+ else:
+ break
+ return cur_val
diff --git a/facebook_analysis/analyze.py b/facebook_analysis/analyze.py
new file mode 100644
index 0000000..c5e6feb
--- /dev/null
+++ b/facebook_analysis/analyze.py
@@ -0,0 +1,340 @@
+from bs4 import BeautifulSoup
+import os.path as op
+from client.tasks import normalize
+from itertools import chain
+from random import sample
+from multiprocessing import Pool
+from ads import fs, fs_p, fs_ps, fs_p_sample
+import numpy as np
+import pulp
+import sys
+from random import seed, betavariate, normalvariate
+import matplotlib.pyplot as plt
+
+DATA_DIR = "../facebook_data"
+DATASETS = ["hbo", "nyt", "lp", "google", "lmpt", "gp", "kiva", "coachella",
+ "peet", "gap"]
+SYNTH_DIR = "../apgl"
+SYNTH_DATASETS = ["b-a", "kk", "sw"]
+
+
+def build_graph2(dataset):
+ users_file = op.join(DATA_DIR, dataset + "_users.txt")
+ seed_file = op.join(DATA_DIR, dataset + ".txt")
+ degrees = {}
+ graph = {}
+ with open(users_file) as f:
+ for line in f:
+ values = line.strip().split()
+ degrees[values[0]] = int(values[1])
+
+ soup = BeautifulSoup(open(seed_file))
+ links = [div.a["href"] for div in soup.findAll("div", class_="fsl")]
+ for link in links:
+ basename, fname, getname = normalize(link)
+ long_name = op.join(DATA_DIR, "facebook", fname)
+ if not op.isfile(long_name):
+ continue
+ else:
+ with open(long_name) as f:
+ friends = [normalize(line.strip())[1] for line in f]
+ friends = [friend for friend in friends
+ if (friend in degrees) and degrees[friend] > 0]
+ if len(friends) > 0:
+ friends = list(set(friends))
+ friends.sort(key=degrees.get, reverse=True)
+ graph[fname] = friends
+ degrees[fname] = len(friends)
+ print dataset, len(graph), len(list(sd_users(graph)))
+ return graph, degrees
+
+
+def build_graph1(dataset):
+ fname = op.join(SYNTH_DIR, dataset + ".txt")
+ degrees = {}
+ graph = {}
+ with open(fname) as fh:
+ for line in fh:
+ values = line.strip().split("\t")
+ node = int(values[0])
+ friends = zip(*[iter(values[1:])] * 2)
+ friends = [map(int, f) for f in friends]
+ for friend in friends:
+ degrees[friend[0]] = friend[1]
+ graph[node] = [friend[0] for friend in friends]
+ degrees[node] = len(graph[node])
+ print fname, len(graph), len(list(sd_users(graph)))
+ return graph, degrees
+
+
+def build_graph(dataset):
+ if dataset in DATASETS or dataset == "big":
+ return build_graph2(dataset)
+ else:
+ return build_graph1(dataset)
+
+
+def print_graph(dataset):
+ graph, degrees = build_graph(dataset)
+ with open(dataset + "_single_graph.txt", "w") as f:
+ for user, friends in graph.iteritems():
+ friends_deg = [(friend, str(degrees[friend]))
+ for friend in friends]
+ f.write(user + "\t"
+ + "\t".join("\t".join(friend) for friend in friends_deg)
+ + "\n")
+
+
+def sd_users(graph):
+ return chain.from_iterable(graph.itervalues())
+
+
+def random(graph, degrees, n):
+ #n = int(len(graph) * ratio)
+ values = []
+ for _ in xrange(100):
+ users = sample(graph, n)
+ values.append(sum(degrees[user] for user in users))
+ return sum(values) / float(len(values))
+
+
+def random_friend(graph, degrees, n):
+ #n = int(len(graph) * ratio)
+ values = []
+ for _ in xrange(100):
+ users = sample(graph, n / 2)
+ values.append(sum(degrees[sample(graph[user], 1)[0]] + degrees[user]
+ for user in users))
+ return sum(values) / float(len(values))
+
+
+def im(graph, degrees, n):
+ #n = int(len(graph) * ratio)
+ l = list(graph.iterkeys())
+ l.sort(key=lambda x: degrees[x], reverse=True)
+ return sum(degrees[user] for user in l[:n])
+
+
+def aps(graph, degrees, k, p=1, sampl=False):
+ x = list(set(graph.keys()))
+ #k = int(len(graph) * ratio) # budget
+ P = Pool(5)
+ if p == 1:
+ m = P.map(fs, zip(range(1, k - 1),
+ [k] * (k - 1),
+ [x] * (k - 1),
+ [graph] * (k - 1),
+ [degrees] * (k - 1)))
+ elif type(p) is dict:
+ m = P.map(fs_ps, zip(range(1, k - 1),
+ [k] * (k - 1),
+ [x] * (k - 1),
+ [graph] * (k - 1),
+ [degrees] * (k - 1),
+ [p] * (k - 1)))
+ elif sampl:
+ m = P.map(fs_p_sample, zip(range(1, k - 1),
+ [k] * (k - 1),
+ [x] * (k - 1),
+ [graph] * (k - 1),
+ [degrees] * (k - 1),
+ [p] * (k - 1)))
+ else:
+ m = P.map(fs_p, zip(range(1, k - 1),
+ [k] * (k - 1),
+ [x] * (k - 1),
+ [graph] * (k - 1),
+ [degrees] * (k - 1),
+ [p] * (k - 1)))
+
+ P.close()
+ try:
+ m = max(m)
+ except ValueError:
+ m = 0
+ print m
+ return m
+
+
+def generate_degree(degrees, p, distr="beta"):
+ #plt.figure()
+ seed()
+ ps = {}
+ if distr == "beta":
+ beta = 5.
+ alpha = 5. * p / (1 - p)
+ sample = lambda d: betavariate(alpha, beta)
+ elif distr == "gauss":
+ sample = lambda d: normalvariate(p, 0.01)
+ elif distr == "power":
+ alpha = 1.
+ beta = (1. - p) / p
+ sample = lambda d: betavariate(alpha, beta)
+ elif distr == "deg":
+ s = sum((1. / d) for d in degrees.itervalues() if d != 0)
+ c = len(list(d for d in degrees if d != 0)) * p / s
+ sample = lambda d: c / d if d != 0 else p
+ for node, deg in degrees.iteritems():
+ s = sample(deg)
+ if s < 0.001:
+ ps[node] = 0.
+ elif s > 1.:
+ ps[node] = 1.
+ else:
+ ps[node] = s
+ #plt.hist(list(ps.itervalues()), 50)
+ #plt.savefig(distr + "_dist.pdf")
+ return ps
+
+
+def compute_performance(dataset):
+ graph, degrees = build_graph(dataset)
+ a = [int(len(graph) * i) for i in np.arange(0, 1.1, 0.1)]
+ r = (a,
+ [im(graph, degrees, k) for k in a],
+ [random(graph, degrees, k) for k in a],
+ [random_friend(graph, degrees, k) for k in a],
+ [aps(graph, degrees, k) for k in a])
+ with open(dataset + "_performance.txt", "w") as f:
+ f.write("\n".join("\t".join(map(str, k)) for k in zip(*r)))
+
+
+def compute_performance_p(dataset, distr=None):
+ graph, degrees = build_graph(dataset)
+ ps = np.arange(0.01, 0.99, 0.1)
+ a = [int(len(graph) * i) for i in np.arange(0, 1.1, 0.1)]
+ if distr is None:
+ l = [[aps(graph, degrees, k, p) for k in a]
+ for p in ps]
+ else:
+ l = [[aps(graph, degrees, k, generate_degree(degrees, p, distr))
+ for k in a]
+ for p in ps]
+ r = [a]
+ r += l
+ with open(dataset + "_performance_p_" + str(distr) + ".txt", "w") as f:
+ f.write("\n".join("\t".join(map(str, k)) for k in zip(*r)))
+
+
+def lp(graph, degrees, k):
+ reverse = {}
+ for user in sd_users(graph):
+ reverse[user] = []
+ for user in graph:
+ for friend in graph[user]:
+ reverse[friend].append(user)
+
+ prob = pulp.LpProblem("ads", pulp.LpMaximize)
+ x = pulp.LpVariable.dicts("x", graph.keys(), 0., 1.)
+ y = pulp.LpVariable.dicts("y", sd_users(graph), 0., 1.)
+ prob += pulp.lpSum([degrees[user] * x[user] for user in graph] +
+ [degrees[user] * y[user] for user in sd_users(graph)])
+ for user in sd_users(graph):
+ prob += pulp.lpSum([x[u] for u in reverse[user]] + [-y[user]]) >= 0
+ prob += pulp.lpSum([x[u] for u in graph] + [y[u] for u in reverse]) <= k
+ prob.solve(pulp.COIN_CMD())
+ print "Status:", pulp.LpStatus[prob.status]
+ print "Value =", pulp.value(prob.objective)
+
+
+def lp_perf():
+ graph, degrees = build_graph("peet")
+ a = [int(len(graph) * i) for i in np.arange(0, 1.1, 0.1)]
+ r = (a,
+ #[aps(graph, degrees, k) for k in a],
+ [lp(graph, degrees, k) for k in a])
+ with open("lp_running_time.txt", "w") as f:
+ f.write("\n".join("\t".join(map(str, k)) for k in zip(*r)))
+
+
+def lp_time():
+ graph, degrees = build_graph("big")
+ sp = sample(graph.keys(), int(sys.argv[2]))
+ graph = {s: graph[s] for s in sp}
+ a = int(sys.argv[1])
+ print len(list(sd_users(graph))), a
+ lp(graph, degrees, a)
+
+
+def aps_time():
+ graph, degrees = build_graph("big")
+ sp = sample(graph.keys(), int(sys.argv[2]))
+ graph = {s: graph[s] for s in sp}
+ a = int(sys.argv[1])
+ print len(list(sd_users(graph))), a
+ aps(graph, degrees, a, p=0.9, sampl=True)
+
+
+def lp_time_big():
+ graph, degrees = build_graph("big")
+ graph_big = {}
+ for i in xrange(10):
+ for user in graph:
+ graph_big[user + str(i)] = graph[user]
+ degrees[user + str(i)] = degrees[user]
+ aps(graph_big, degrees, 500)
+
+
+def hbo_likes():
+ graph, degrees = build_graph("hbo")
+ like_file = op.join(DATA_DIR, "hbo_likes.txt")
+ likes = {}
+ for line in open(like_file):
+ values = line.strip().split("\t")
+ if "HBO" in values[1:]:
+ likes[values[0]] = True
+ a = [int(len(graph) * i) for i in np.arange(0, 1.1, 0.1)]
+ l = [aps(graph, degrees, k) for k in a]
+ for user in graph:
+ graph[user] = [friend for friend in graph[user]
+ if (friend in degrees and friend in likes)]
+ r = (a,
+ [im(graph, degrees, k) for k in a],
+ [aps(graph, degrees, k) for k in a],
+ l)
+ with open("hbo_likes_performance.txt", "w") as f:
+ f.write("\n".join("\t".join(map(str, k)) for k in zip(*r)))
+
+
+def stats():
+ for dataset in ["coachella"]:
+ graph, degrees = build_graph(dataset)
+ print dataset, len(graph) * 7, len(list(sd_users(graph))) * 7,\
+ np.mean([degrees[u] for u in graph]),\
+ np.mean([degrees[u] for u in sd_users(graph)])
+ for dataset in ["nyt", "gap", "gp", "kiva"]:
+ graph, degrees = build_graph(dataset)
+ print dataset, len(graph) * 6, len(list(sd_users(graph))) * 6,\
+ np.mean([degrees[u] for u in graph]),\
+ np.mean([degrees[u] for u in sd_users(graph)])
+ for dataset in ["google"]:
+ graph, degrees = build_graph(dataset)
+ print dataset, len(graph) * 5, len(list(sd_users(graph))) * 5,\
+ np.mean([degrees[u] for u in graph]),\
+ np.mean([degrees[u] for u in sd_users(graph)])
+ for dataset in ["lp", "hbo", "lmpt"]:
+ graph, degrees = build_graph(dataset)
+ print dataset, len(graph) * 3, len(list(sd_users(graph))) * 3,\
+ np.mean([degrees[u] for u in graph]),\
+ np.mean([degrees[u] for u in sd_users(graph)])
+ for dataset in ["peet"]:
+ graph, degrees = build_graph(dataset)
+ print dataset, len(graph) * 2, len(list(sd_users(graph))) * 2,\
+ np.mean([degrees[u] for u in graph]),\
+ np.mean([degrees[u] for u in sd_users(graph)])
+
+if __name__ == "__main__":
+ #for dataset in SYNTH_DATASETS:
+ # compute_performance(dataset)
+ compute_performance_p("coachella", "deg")
+ #compute_performance("coachella")
+ #hbo_likes()
+ #lp_perf()
+ #lp_time()
+ #aps_time()
+ #stats()
+ #lp_time_big()
+ # _, degrees = build_graph2("coachella")
+ # with open("coachella_degrees.txt", "w") as fh:
+ # for deg in degrees.itervalues():
+ # fh.write(str(deg) + "\n")
diff --git a/facebook_analysis/seed.py b/facebook_analysis/seed.py
new file mode 100644
index 0000000..7e2b851
--- /dev/null
+++ b/facebook_analysis/seed.py
@@ -0,0 +1,247 @@
+from analyze import sd_users, build_graph, DATASETS, SYNTH_DATASETS
+import matplotlib.pyplot as plt
+from matplotlib import rcParams, cm
+from matplotlib.colors import Normalize
+from matplotlib.pyplot import plot, legend, savefig, xlabel, ylabel,\
+ hist, title, subplot, tight_layout, ticklabel_format, xlim, ylim
+from mpl_toolkits.mplot3d import Axes3D
+import numpy as np
+import itertools
+
+mq = lambda x: x * 4
+
+
+def plot_degree_distributions():
+ plt.figure(figsize=(7, 3))
+ graph, degrees = build_graph("kiva")
+ fd_degrees = list(degrees[user] for user in graph)
+ sd_degrees = list(degrees[user] for user in sd_users(graph))
+ n, bins, patches = plt.hist(fd_degrees, bins=50, cumulative=True,
+ label="Initial users", normed=True,
+ alpha=0.5, histtype="stepfilled")
+ n, bins, patches = plt.hist(sd_degrees, bins=50, cumulative=True,
+ histtype="stepfilled", normed=True, alpha=0.5,
+ label="Friends")
+ ylim(ymax=1.1)
+ plt.xlabel("Degree")
+ plt.ylabel("Probability")
+ plt.legend(loc="lower right")
+ plt.savefig("dist.pdf")
+
+
+def plot_all_performances():
+ plt.figure(figsize=(7, 14))
+ for i, dataset in enumerate(DATASETS):
+ values = [map(float, line.strip().split("\t"))
+ for line in open(dataset + "_performance.txt")]
+ a, im, rd, rdf, aps = zip(*values)
+ a, im, rd, rdf, aps = [map(mq, l) for l in (a, im, rd, rdf, aps)]
+ a = np.arange(0, 1.001, 0.1)
+ ax = plt.subplot(5, 2, i + 1)
+ #ax.set_yscale("log")
+ plt.plot(a, im, label="Max deg.")
+ plt.plot(a, rd, label="Rand.")
+ plt.plot(a, rdf, label="Rand. friend")
+ plt.plot(a, aps, label="Adapt. Seeding")
+ plt.xlabel("Budget (fraction of the total number of users)")
+ plt.ylabel("Performance")
+ if dataset == "sw":
+ titl = "SmallWord"
+ if dataset == "coachella":
+ titl = "Conf. Model"
+ if dataset == "kk":
+ titl = "Kronecker"
+ if dataset == "b-a":
+ titl = "Barabasi-Albert"
+ plt.title(titl)
+ xlim(xmax=1.1)
+ plt.legend(loc="upper center", ncol=4, bbox_to_anchor=(0, 0, 1, 1.03),
+ bbox_transform=plt.gcf().transFigure)
+ plt.tight_layout()
+ plt.savefig("test2.pdf")
+
+
+def compare_performance(fn):
+ plots = {}
+ plt.figure()
+ for dataset in DATASETS:
+ values = [map(float, line.strip().split("\t"))
+ for line in open(dataset + "_performance.txt")]
+ a, im, rd, rdf, aps = zip(*values)
+ plots[dataset] = [j * 1. / i for (j, i) in zip(aps, im)[1:]]
+ a = map(mq, a)
+ for dataset in DATASETS:
+ plt.plot(a[1:], plots[dataset], label=dataset)
+ xlim(xmax=550)
+ plt.xlabel("Budget")
+ plt.ylabel("Performance")
+ plt.legend(loc="lower right", ncol=2, fontsize="small")
+ plt.savefig(fn)
+
+
+def compare_performance2(fn):
+ plots = {}
+ plt.figure()
+ for dataset in DATASETS:
+ values = [map(float, line.strip().split("\t"))
+ for line in open(dataset + "_performance.txt")]
+ a, im, rd, rdf, aps = zip(*values)
+ plots[dataset] = [j * 1. / i for (j, i) in zip(aps, im)[1:]]
+ a = map(mq, a)
+ a = map(int, a)
+ z = zip(*plots.itervalues())
+ means = [np.mean(w) for w in z]
+ maxi = [np.max(w) for w in z]
+ mini = [np.min(w) for w in z]
+ ind = range(len(a[1:]))
+ width = 0.35
+ plt.bar(ind, means, width, linewidth=0.1)
+ plt.errorbar([i + width / 2. for i in ind], means, [mini, maxi], elinewidth=1.2, fmt="none")
+ plt.xticks([i + width / 2. for i in ind], a[1:])
+ plt.xlim(-width, len(ind) - 1 + 2 * width)
+ plt.xlabel("Budget")
+ plt.ylabel("Relative improvement")
+ plt.savefig(fn)
+
+
+def compare_dist():
+ fd, sd = [], []
+ plt.figure(figsize=(5, 3))
+ cm = iter(rcParams["axes.color_cycle"])
+ for dataset in DATASETS:
+ graph, degrees = build_graph(dataset)
+ fd_degrees = list(degrees[user] for user in graph)
+ sd_degrees = list(degrees[user] for user in sd_users(graph))
+ fd.append(np.mean(fd_degrees))
+ sd.append(np.mean(sd_degrees))
+ ind = range(len(DATASETS))
+ width = 0.35
+ plt.bar(ind, fd, width, label="Initial users", color=next(cm))
+ plt.bar([i + width for i in ind], sd, width, label="Friends",
+ color=next(cm))
+ plt.xlim(-width, len(ind) - 1 + 3 * width)
+ plt.xticks([i + width for i in ind], DATASETS)
+ plt.ylabel("Avg. degree")
+ plt.legend()
+ plt.savefig("para.pdf")
+
+
+def plot_perf_prob():
+ plt.figure()
+ with open("peet_performance_p.txt") as f:
+ values = [map(float, line.strip().split("\t")) for line in f]
+ values = zip(*values)
+ a = [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+ for i in [0, 1, 2, 3, 5, 9]:
+ plt.plot(values[0], values[i + 1], label="$p = " + str(a[i]) + "$")
+ plt.legend()
+ with open("peet_performance.txt") as f:
+ values = [map(float, line.strip().split("\t")) for line in f]
+ values = zip(*values)
+ plt.gca().set_yscale("log")
+ plt.xlabel("Budget")
+ plt.ylabel("Performance")
+ plt.plot(values[0], values[1], label="Max. degree")
+ plt.legend(loc="lower right", fontsize="small", ncol=2)
+ xlim(xmax=450)
+ plt.savefig("prob.pdf")
+
+
+def plot_hbo_likes():
+ plt.figure()
+ rcParams["font.size"] = 6
+ with open("hbo_likes_performance.txt") as f:
+ values = [map(float, line.strip().split("\t")) for line in f]
+ a, im, aps, apso = zip(*values)
+ a = np.arange(0, 1.001, 0.1)
+ plt.gca().set_yscale("log")
+ #plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
+ plt.plot(a, map(mq, im), label="Max. degr.")
+ plt.plot(a, map(mq, aps), label="Adapt. seed. (rest.)")
+ plt.plot(a, map(mq, apso), label="Adapt. seed.")
+ plt.xlabel("Budget")
+ plt.ylabel("Performance")
+ xlim(xmax=1.1)
+ plt.legend(loc="lower right")
+ plt.savefig("hbo_likes.pdf")
+
+
+def plot_3d():
+ for dist in ["beta", "gauss"]:
+ fig = plt.figure()
+ with open("coachella_performance_p_" + dist + ".txt") as f:
+ values = [map(float, line.strip().split("\t")) for line in f]
+ k = np.arange(0, 1.001, 0.1)
+ ps = np.arange(0.01, 0.99, 0.1)
+ x, y = np.meshgrid(k, ps)
+ perfs = [value[1:] for value in values]
+ perfs = zip(*perfs)
+ ax = fig.add_subplot(111, projection='3d')
+ ax.plot_wireframe(x, y, perfs, linewidth=0.1)
+ ticklabel_format(style='sci', axis='z', scilimits=(0, 0))
+ xlabel("Budget (fraction of nodes)")
+ ylabel("Distribution mean")
+ ax.set_zlabel("Performance")
+ ax.invert_xaxis()
+ plt.savefig(dist + ".pdf")
+ plt.show()
+
+
+def plot_time():
+ plt.figure()
+ rcParams["font.size"] = 6
+ a1 = np.loadtxt("time_aps_100.txt")
+ a2 = np.loadtxt("time_aps_500.txt")
+ lp1 = np.loadtxt("time_lp_100.txt")
+ lp2 = np.loadtxt("time_lp_500.txt")
+ subplot(2, 2, 1)
+ plot(a1[:, 0], a1[:, 1], "-", label="Comb.")
+ plot(lp1[:, 0], lp1[:, 1], "-", label="LP")
+ xlabel("n")
+ ylabel("time (s)")
+ xlim(0, 100000)
+ legend(loc="upper left")
+ ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
+ subplot(2, 2, 2)
+ plot(a1[:, 0], a1[:, 2], "-", label="Comb.")
+ plot(lp1[:, 0], lp1[:, 2], "-", label="LP")
+ ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
+ xlabel("n")
+ ylabel("\# cycles")
+ xlim(0, 100000)
+ legend(loc="upper left")
+ subplot(2, 2, 3)
+ plot(a2[:, 0], a2[:, 1], "-", label="Comb.")
+ plot(lp2[:, 0], lp2[:, 1], "-", label="LP")
+ ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
+ xlabel("n")
+ ylabel("time (s)")
+ xlim(0, 100000)
+ legend(loc="upper left")
+ subplot(2, 2, 4)
+ plot(a2[:, 0], a2[:, 2], "-", label="Comb.")
+ plot(lp2[:, 0], lp2[:, 2], "-", label="LP")
+ ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
+ xlabel("n")
+ ylabel("\# cycles")
+ xlim(0, 100000)
+ legend(loc="upper left")
+ tight_layout(h_pad=-0.5)
+ savefig("time.pdf")
+
+
+if __name__ == "__main__":
+ SYNTH_DATASETS = ["b-a", "kk", "sw", "coachella"]
+ DATASETS = SYNTH_DATASETS
+ plot_all_performances()
+ #plot_3d()
+ #plot_hbo_likes()
+ #compare_performance()
+ #plot_perf_prob()
+ #compare_dist()
+ #plot_time()
+ #plot_degree_distributions()
+ #for style in plt.style.available:
+ # plt.style.use(style)
+ # compare_performance("performance_" + style + ".pdf")
+ #compare_performance2("comp4_" + ".pdf")
diff --git a/facebook_analysis/setup.py b/facebook_analysis/setup.py
new file mode 100644
index 0000000..043e734
--- /dev/null
+++ b/facebook_analysis/setup.py
@@ -0,0 +1,4 @@
+from distutils.core import setup
+from Cython.Build import cythonize
+
+setup(ext_modules=cythonize("ads.pyx"))