diff options
Diffstat (limited to 'facebook_analysis')
| -rw-r--r-- | facebook_analysis/Makefile | 3 | ||||
| -rw-r--r-- | facebook_analysis/ads.pyx | 403 | ||||
| -rw-r--r-- | facebook_analysis/analyze.py | 340 | ||||
| -rw-r--r-- | facebook_analysis/seed.py | 247 | ||||
| -rw-r--r-- | facebook_analysis/setup.py | 4 |
5 files changed, 997 insertions, 0 deletions
diff --git a/facebook_analysis/Makefile b/facebook_analysis/Makefile new file mode 100644 index 0000000..e3df848 --- /dev/null +++ b/facebook_analysis/Makefile @@ -0,0 +1,3 @@ +all: + python2 setup.py build_ext --inplace + cython2 -a ads.pyx diff --git a/facebook_analysis/ads.pyx b/facebook_analysis/ads.pyx new file mode 100644 index 0000000..2682456 --- /dev/null +++ b/facebook_analysis/ads.pyx @@ -0,0 +1,403 @@ +#!python +#cython: boundscheck=False, nonecheck=False +cimport cython +import random + + +@cython.nonecheck(False) +cdef int merge_opt(list l1, list l2, int t, dict degrees): + cdef int n1, n2, i, j, n, s + n1 = len(l1) + n2 = len(l2) + i = j = n = s = 0 + while n < t: + if i == n1 and j == n2: + break + if i == n1: + s += degrees[l2[j]] + j += 1 + n += 1 + continue + if j == n2: + s += degrees[l1[i]] + i += 1 + n += 1 + continue + + li, lj = l1[i], l2[j] + di, dj = degrees[li], degrees[lj] + if lj == li: + j += 1 + s += di + i += 1 + n += 1 + elif di > dj: + s += di + i += 1 + n += 1 + else: + s += dj + j += 1 + n += 1 + return s + +@cython.nonecheck(False) +cdef float merge_opt_p(list l1, list l2, int t, dict degrees, float p): + cdef int n1, n2, i, j + cdef float n, s + n1 = len(l1) + n2 = len(l2) + i = j = 0 + n = s = 0. + while n < t: + if i == n1 and j == n2: + break + if i == n1: + s += degrees[l2[j]]*p + j += 1 + n += p + continue + if j == n2: + s += degrees[l1[i]]*p + i += 1 + n += p + continue + + li, lj = l1[i], l2[j] + di, dj = degrees[li], degrees[lj] + if lj == li: + j += 1 + s += di*p + i += 1 + n += p + elif di > dj: + s += di*p + i += 1 + n += p + else: + s += dj*p + j += 1 + n += p + return s + + +@cython.nonecheck(False) +cdef float merge_opt_p_sample(list l1, list l2, int t, dict degrees, float p): + random.seed() + cdef int n1, n2, i, j + cdef float n, s + n1 = len(l1) + n2 = len(l2) + i = j = 0 + n = s = 0. + cdef int k + cdef dict a + cdef float r + k = 0 + r = 0 + a = {} + for k in xrange(len(degrees)**2): + for d in degrees: + a[d] = random.random() + while n < t: + if i == n1 and j == n2: + break + if i == n1: + s += degrees[l2[j]]*p + j += 1 + n += p + continue + if j == n2: + s += degrees[l1[i]]*p + i += 1 + n += p + continue + + li, lj = l1[i], l2[j] + di, dj = degrees[li], degrees[lj] + if lj == li: + j += 1 + s += di*p + i += 1 + n += p + elif di > dj: + s += di*p + i += 1 + n += p + else: + s += dj*p + j += 1 + n += p + r += s + r /= n**2 + return r + + +@cython.nonecheck(False) +cdef float merge_opt_ps(list l1, list l2, int t, dict degrees, dict p): + cdef int n1, n2, i, j + cdef float n, s + n1 = len(l1) + n2 = len(l2) + i = j = 0 + n = s = 0. + while n < t: + if i == n1 and j == n2: + break + if i == n1: + s += degrees[l2[j]]*p[l2[j]] + n += p[l2[j]] + j += 1 + continue + if j == n2: + s += degrees[l1[i]]*p[l1[i]] + n += p[l1[i]] + i += 1 + continue + + li, lj = l1[i], l2[j] + di, dj = degrees[li], degrees[lj] + if lj == li: + j += 1 + s += di*p[li] + n += p[li] + i += 1 + elif di > dj: + s += di*p[li] + n += p[li] + i += 1 + else: + s += dj*p[lj] + n += p[lj] + j += 1 + return s + + +@cython.nonecheck(False) +cdef list merge(list l1, list l2, int t, dict degrees): + cdef int n1, n2, i, j, n + n1 = len(l1) + n2 = len(l2) + result = [] + i = j = n = 0 + while n < t: + if i == n1 and j == n2: + break + if i == n1: + result.append(l2[j]) + j += 1 + n += 1 + continue + if j == n2: + result.append(l1[i]) + i += 1 + n += 1 + continue + + if l2[j] == l1[i]: + j += 1 + result.append(l1[i]) + i += 1 + n += 1 + elif degrees[l1[i]] > degrees[l2[j]]: + result.append(l1[i]) + i += 1 + n += 1 + else: + result.append(l2[j]) + j += 1 + n += 1 + return result + +@cython.nonecheck(False) +cdef list merge_p(list l1, list l2, int t, dict degrees, float p): + cdef int n1, n2, i, j + cdef float n + n1 = len(l1) + n2 = len(l2) + result = [] + i = j = 0 + n = 0. + while n < t: + if i == n1 and j == n2: + break + if i == n1: + result.append(l2[j]) + j += 1 + n += p + continue + if j == n2: + result.append(l1[i]) + i += 1 + n += p + continue + + if l2[j] == l1[i]: + j += 1 + result.append(l1[i]) + i += 1 + n += p + elif degrees[l1[i]] > degrees[l2[j]]: + result.append(l1[i]) + i += 1 + n += p + else: + result.append(l2[j]) + j += 1 + n += p + return result + + +@cython.nonecheck(False) +cdef list merge_ps(list l1, list l2, int t, dict degrees, dict p): + cdef int n1, n2, i, j + cdef float n + n1 = len(l1) + n2 = len(l2) + result = [] + i = j = 0 + n = 0. + while n < t: + if i == n1 and j == n2: + break + if i == n1: + result.append(l2[j]) + n += p[l2[j]] + j += 1 + continue + if j == n2: + result.append(l1[i]) + n += p[l1[i]] + i += 1 + continue + + if l2[j] == l1[i]: + j += 1 + result.append(l1[i]) + n += p[l1[i]] + i += 1 + elif degrees[l1[i]] > degrees[l2[j]]: + result.append(l1[i]) + n += p[l1[i]] + i += 1 + else: + result.append(l2[j]) + n += p[l2[j]] + j += 1 + return result + +@cython.nonecheck(False) +def fs(tuple a): + cdef int cur_val, best_diff, best_value, o, t, k + cdef list x + cdef dict graph + cdef dict degrees + t, k, x, graph, degrees = a + cdef list n + cur_val = 0 + n = [] # neighbors of greedy set + for i in range(1, k - t): + best_diff = 0 + best_user = None + best_value = 0 + for user in x: + o = merge_opt(n, graph[user], t, degrees) + if o - cur_val > best_diff: + best_diff = o - cur_val + best_user = user + best_value = o + if best_user is not None: + x.remove(best_user) + cur_val = best_value + n = merge(n, graph[best_user], t, degrees) + else: + break + return cur_val + +@cython.nonecheck(False) +def fs_p(tuple a): + cdef int t, k + cdef float o, p, best_value, best_diff, cur_val + cdef list x + cdef dict graph + cdef dict degrees + t, k, x, graph, degrees, p = a + cdef list n + cur_val = 0 + n = [] # neighbors of greedy set + for i in range(1, k - t): + best_diff = 0 + best_user = None + best_value = 0 + for user in x: + o = merge_opt_p(n, graph[user], t, degrees, p) + if o - cur_val > best_diff: + best_diff = o - cur_val + best_user = user + best_value = o + if best_user is not None: + x.remove(best_user) + cur_val = best_value + n = merge_p(n, graph[best_user], t, degrees, p) + else: + break + return cur_val + +@cython.nonecheck(False) +def fs_p_sample(tuple a): + cdef int t, k + cdef float o, p, best_value, best_diff, cur_val + cdef list x + cdef dict graph + cdef dict degrees + t, k, x, graph, degrees, p = a + cdef list n + cur_val = 0 + n = [] # neighbors of greedy set + for i in range(1, k - t): + best_diff = 0 + best_user = None + best_value = 0 + for user in x: + o = merge_opt_p_sample(n, graph[user], t, degrees, p) + if o - cur_val > best_diff: + best_diff = o - cur_val + best_user = user + best_value = o + if best_user is not None: + x.remove(best_user) + cur_val = best_value + n = merge_p(n, graph[best_user], t, degrees, p) + else: + break + return cur_val + +@cython.nonecheck(False) +def fs_ps(tuple a): + cdef int t, k + cdef float o, best_value, best_diff, cur_val + cdef list x + cdef dict graph + cdef dict degrees + cdef dict p + t, k, x, graph, degrees, p = a + cdef list n + cur_val = 0 + n = [] # neighbors of greedy set + for i in range(1, k - t): + best_diff = 0 + best_user = None + best_value = 0 + for user in x: + o = merge_opt_ps(n, graph[user], t, degrees, p) + if o - cur_val > best_diff: + best_diff = o - cur_val + best_user = user + best_value = o + if best_user is not None: + x.remove(best_user) + cur_val = best_value + n = merge_ps(n, graph[best_user], t, degrees, p) + else: + break + return cur_val diff --git a/facebook_analysis/analyze.py b/facebook_analysis/analyze.py new file mode 100644 index 0000000..c5e6feb --- /dev/null +++ b/facebook_analysis/analyze.py @@ -0,0 +1,340 @@ +from bs4 import BeautifulSoup +import os.path as op +from client.tasks import normalize +from itertools import chain +from random import sample +from multiprocessing import Pool +from ads import fs, fs_p, fs_ps, fs_p_sample +import numpy as np +import pulp +import sys +from random import seed, betavariate, normalvariate +import matplotlib.pyplot as plt + +DATA_DIR = "../facebook_data" +DATASETS = ["hbo", "nyt", "lp", "google", "lmpt", "gp", "kiva", "coachella", + "peet", "gap"] +SYNTH_DIR = "../apgl" +SYNTH_DATASETS = ["b-a", "kk", "sw"] + + +def build_graph2(dataset): + users_file = op.join(DATA_DIR, dataset + "_users.txt") + seed_file = op.join(DATA_DIR, dataset + ".txt") + degrees = {} + graph = {} + with open(users_file) as f: + for line in f: + values = line.strip().split() + degrees[values[0]] = int(values[1]) + + soup = BeautifulSoup(open(seed_file)) + links = [div.a["href"] for div in soup.findAll("div", class_="fsl")] + for link in links: + basename, fname, getname = normalize(link) + long_name = op.join(DATA_DIR, "facebook", fname) + if not op.isfile(long_name): + continue + else: + with open(long_name) as f: + friends = [normalize(line.strip())[1] for line in f] + friends = [friend for friend in friends + if (friend in degrees) and degrees[friend] > 0] + if len(friends) > 0: + friends = list(set(friends)) + friends.sort(key=degrees.get, reverse=True) + graph[fname] = friends + degrees[fname] = len(friends) + print dataset, len(graph), len(list(sd_users(graph))) + return graph, degrees + + +def build_graph1(dataset): + fname = op.join(SYNTH_DIR, dataset + ".txt") + degrees = {} + graph = {} + with open(fname) as fh: + for line in fh: + values = line.strip().split("\t") + node = int(values[0]) + friends = zip(*[iter(values[1:])] * 2) + friends = [map(int, f) for f in friends] + for friend in friends: + degrees[friend[0]] = friend[1] + graph[node] = [friend[0] for friend in friends] + degrees[node] = len(graph[node]) + print fname, len(graph), len(list(sd_users(graph))) + return graph, degrees + + +def build_graph(dataset): + if dataset in DATASETS or dataset == "big": + return build_graph2(dataset) + else: + return build_graph1(dataset) + + +def print_graph(dataset): + graph, degrees = build_graph(dataset) + with open(dataset + "_single_graph.txt", "w") as f: + for user, friends in graph.iteritems(): + friends_deg = [(friend, str(degrees[friend])) + for friend in friends] + f.write(user + "\t" + + "\t".join("\t".join(friend) for friend in friends_deg) + + "\n") + + +def sd_users(graph): + return chain.from_iterable(graph.itervalues()) + + +def random(graph, degrees, n): + #n = int(len(graph) * ratio) + values = [] + for _ in xrange(100): + users = sample(graph, n) + values.append(sum(degrees[user] for user in users)) + return sum(values) / float(len(values)) + + +def random_friend(graph, degrees, n): + #n = int(len(graph) * ratio) + values = [] + for _ in xrange(100): + users = sample(graph, n / 2) + values.append(sum(degrees[sample(graph[user], 1)[0]] + degrees[user] + for user in users)) + return sum(values) / float(len(values)) + + +def im(graph, degrees, n): + #n = int(len(graph) * ratio) + l = list(graph.iterkeys()) + l.sort(key=lambda x: degrees[x], reverse=True) + return sum(degrees[user] for user in l[:n]) + + +def aps(graph, degrees, k, p=1, sampl=False): + x = list(set(graph.keys())) + #k = int(len(graph) * ratio) # budget + P = Pool(5) + if p == 1: + m = P.map(fs, zip(range(1, k - 1), + [k] * (k - 1), + [x] * (k - 1), + [graph] * (k - 1), + [degrees] * (k - 1))) + elif type(p) is dict: + m = P.map(fs_ps, zip(range(1, k - 1), + [k] * (k - 1), + [x] * (k - 1), + [graph] * (k - 1), + [degrees] * (k - 1), + [p] * (k - 1))) + elif sampl: + m = P.map(fs_p_sample, zip(range(1, k - 1), + [k] * (k - 1), + [x] * (k - 1), + [graph] * (k - 1), + [degrees] * (k - 1), + [p] * (k - 1))) + else: + m = P.map(fs_p, zip(range(1, k - 1), + [k] * (k - 1), + [x] * (k - 1), + [graph] * (k - 1), + [degrees] * (k - 1), + [p] * (k - 1))) + + P.close() + try: + m = max(m) + except ValueError: + m = 0 + print m + return m + + +def generate_degree(degrees, p, distr="beta"): + #plt.figure() + seed() + ps = {} + if distr == "beta": + beta = 5. + alpha = 5. * p / (1 - p) + sample = lambda d: betavariate(alpha, beta) + elif distr == "gauss": + sample = lambda d: normalvariate(p, 0.01) + elif distr == "power": + alpha = 1. + beta = (1. - p) / p + sample = lambda d: betavariate(alpha, beta) + elif distr == "deg": + s = sum((1. / d) for d in degrees.itervalues() if d != 0) + c = len(list(d for d in degrees if d != 0)) * p / s + sample = lambda d: c / d if d != 0 else p + for node, deg in degrees.iteritems(): + s = sample(deg) + if s < 0.001: + ps[node] = 0. + elif s > 1.: + ps[node] = 1. + else: + ps[node] = s + #plt.hist(list(ps.itervalues()), 50) + #plt.savefig(distr + "_dist.pdf") + return ps + + +def compute_performance(dataset): + graph, degrees = build_graph(dataset) + a = [int(len(graph) * i) for i in np.arange(0, 1.1, 0.1)] + r = (a, + [im(graph, degrees, k) for k in a], + [random(graph, degrees, k) for k in a], + [random_friend(graph, degrees, k) for k in a], + [aps(graph, degrees, k) for k in a]) + with open(dataset + "_performance.txt", "w") as f: + f.write("\n".join("\t".join(map(str, k)) for k in zip(*r))) + + +def compute_performance_p(dataset, distr=None): + graph, degrees = build_graph(dataset) + ps = np.arange(0.01, 0.99, 0.1) + a = [int(len(graph) * i) for i in np.arange(0, 1.1, 0.1)] + if distr is None: + l = [[aps(graph, degrees, k, p) for k in a] + for p in ps] + else: + l = [[aps(graph, degrees, k, generate_degree(degrees, p, distr)) + for k in a] + for p in ps] + r = [a] + r += l + with open(dataset + "_performance_p_" + str(distr) + ".txt", "w") as f: + f.write("\n".join("\t".join(map(str, k)) for k in zip(*r))) + + +def lp(graph, degrees, k): + reverse = {} + for user in sd_users(graph): + reverse[user] = [] + for user in graph: + for friend in graph[user]: + reverse[friend].append(user) + + prob = pulp.LpProblem("ads", pulp.LpMaximize) + x = pulp.LpVariable.dicts("x", graph.keys(), 0., 1.) + y = pulp.LpVariable.dicts("y", sd_users(graph), 0., 1.) + prob += pulp.lpSum([degrees[user] * x[user] for user in graph] + + [degrees[user] * y[user] for user in sd_users(graph)]) + for user in sd_users(graph): + prob += pulp.lpSum([x[u] for u in reverse[user]] + [-y[user]]) >= 0 + prob += pulp.lpSum([x[u] for u in graph] + [y[u] for u in reverse]) <= k + prob.solve(pulp.COIN_CMD()) + print "Status:", pulp.LpStatus[prob.status] + print "Value =", pulp.value(prob.objective) + + +def lp_perf(): + graph, degrees = build_graph("peet") + a = [int(len(graph) * i) for i in np.arange(0, 1.1, 0.1)] + r = (a, + #[aps(graph, degrees, k) for k in a], + [lp(graph, degrees, k) for k in a]) + with open("lp_running_time.txt", "w") as f: + f.write("\n".join("\t".join(map(str, k)) for k in zip(*r))) + + +def lp_time(): + graph, degrees = build_graph("big") + sp = sample(graph.keys(), int(sys.argv[2])) + graph = {s: graph[s] for s in sp} + a = int(sys.argv[1]) + print len(list(sd_users(graph))), a + lp(graph, degrees, a) + + +def aps_time(): + graph, degrees = build_graph("big") + sp = sample(graph.keys(), int(sys.argv[2])) + graph = {s: graph[s] for s in sp} + a = int(sys.argv[1]) + print len(list(sd_users(graph))), a + aps(graph, degrees, a, p=0.9, sampl=True) + + +def lp_time_big(): + graph, degrees = build_graph("big") + graph_big = {} + for i in xrange(10): + for user in graph: + graph_big[user + str(i)] = graph[user] + degrees[user + str(i)] = degrees[user] + aps(graph_big, degrees, 500) + + +def hbo_likes(): + graph, degrees = build_graph("hbo") + like_file = op.join(DATA_DIR, "hbo_likes.txt") + likes = {} + for line in open(like_file): + values = line.strip().split("\t") + if "HBO" in values[1:]: + likes[values[0]] = True + a = [int(len(graph) * i) for i in np.arange(0, 1.1, 0.1)] + l = [aps(graph, degrees, k) for k in a] + for user in graph: + graph[user] = [friend for friend in graph[user] + if (friend in degrees and friend in likes)] + r = (a, + [im(graph, degrees, k) for k in a], + [aps(graph, degrees, k) for k in a], + l) + with open("hbo_likes_performance.txt", "w") as f: + f.write("\n".join("\t".join(map(str, k)) for k in zip(*r))) + + +def stats(): + for dataset in ["coachella"]: + graph, degrees = build_graph(dataset) + print dataset, len(graph) * 7, len(list(sd_users(graph))) * 7,\ + np.mean([degrees[u] for u in graph]),\ + np.mean([degrees[u] for u in sd_users(graph)]) + for dataset in ["nyt", "gap", "gp", "kiva"]: + graph, degrees = build_graph(dataset) + print dataset, len(graph) * 6, len(list(sd_users(graph))) * 6,\ + np.mean([degrees[u] for u in graph]),\ + np.mean([degrees[u] for u in sd_users(graph)]) + for dataset in ["google"]: + graph, degrees = build_graph(dataset) + print dataset, len(graph) * 5, len(list(sd_users(graph))) * 5,\ + np.mean([degrees[u] for u in graph]),\ + np.mean([degrees[u] for u in sd_users(graph)]) + for dataset in ["lp", "hbo", "lmpt"]: + graph, degrees = build_graph(dataset) + print dataset, len(graph) * 3, len(list(sd_users(graph))) * 3,\ + np.mean([degrees[u] for u in graph]),\ + np.mean([degrees[u] for u in sd_users(graph)]) + for dataset in ["peet"]: + graph, degrees = build_graph(dataset) + print dataset, len(graph) * 2, len(list(sd_users(graph))) * 2,\ + np.mean([degrees[u] for u in graph]),\ + np.mean([degrees[u] for u in sd_users(graph)]) + +if __name__ == "__main__": + #for dataset in SYNTH_DATASETS: + # compute_performance(dataset) + compute_performance_p("coachella", "deg") + #compute_performance("coachella") + #hbo_likes() + #lp_perf() + #lp_time() + #aps_time() + #stats() + #lp_time_big() + # _, degrees = build_graph2("coachella") + # with open("coachella_degrees.txt", "w") as fh: + # for deg in degrees.itervalues(): + # fh.write(str(deg) + "\n") diff --git a/facebook_analysis/seed.py b/facebook_analysis/seed.py new file mode 100644 index 0000000..7e2b851 --- /dev/null +++ b/facebook_analysis/seed.py @@ -0,0 +1,247 @@ +from analyze import sd_users, build_graph, DATASETS, SYNTH_DATASETS +import matplotlib.pyplot as plt +from matplotlib import rcParams, cm +from matplotlib.colors import Normalize +from matplotlib.pyplot import plot, legend, savefig, xlabel, ylabel,\ + hist, title, subplot, tight_layout, ticklabel_format, xlim, ylim +from mpl_toolkits.mplot3d import Axes3D +import numpy as np +import itertools + +mq = lambda x: x * 4 + + +def plot_degree_distributions(): + plt.figure(figsize=(7, 3)) + graph, degrees = build_graph("kiva") + fd_degrees = list(degrees[user] for user in graph) + sd_degrees = list(degrees[user] for user in sd_users(graph)) + n, bins, patches = plt.hist(fd_degrees, bins=50, cumulative=True, + label="Initial users", normed=True, + alpha=0.5, histtype="stepfilled") + n, bins, patches = plt.hist(sd_degrees, bins=50, cumulative=True, + histtype="stepfilled", normed=True, alpha=0.5, + label="Friends") + ylim(ymax=1.1) + plt.xlabel("Degree") + plt.ylabel("Probability") + plt.legend(loc="lower right") + plt.savefig("dist.pdf") + + +def plot_all_performances(): + plt.figure(figsize=(7, 14)) + for i, dataset in enumerate(DATASETS): + values = [map(float, line.strip().split("\t")) + for line in open(dataset + "_performance.txt")] + a, im, rd, rdf, aps = zip(*values) + a, im, rd, rdf, aps = [map(mq, l) for l in (a, im, rd, rdf, aps)] + a = np.arange(0, 1.001, 0.1) + ax = plt.subplot(5, 2, i + 1) + #ax.set_yscale("log") + plt.plot(a, im, label="Max deg.") + plt.plot(a, rd, label="Rand.") + plt.plot(a, rdf, label="Rand. friend") + plt.plot(a, aps, label="Adapt. Seeding") + plt.xlabel("Budget (fraction of the total number of users)") + plt.ylabel("Performance") + if dataset == "sw": + titl = "SmallWord" + if dataset == "coachella": + titl = "Conf. Model" + if dataset == "kk": + titl = "Kronecker" + if dataset == "b-a": + titl = "Barabasi-Albert" + plt.title(titl) + xlim(xmax=1.1) + plt.legend(loc="upper center", ncol=4, bbox_to_anchor=(0, 0, 1, 1.03), + bbox_transform=plt.gcf().transFigure) + plt.tight_layout() + plt.savefig("test2.pdf") + + +def compare_performance(fn): + plots = {} + plt.figure() + for dataset in DATASETS: + values = [map(float, line.strip().split("\t")) + for line in open(dataset + "_performance.txt")] + a, im, rd, rdf, aps = zip(*values) + plots[dataset] = [j * 1. / i for (j, i) in zip(aps, im)[1:]] + a = map(mq, a) + for dataset in DATASETS: + plt.plot(a[1:], plots[dataset], label=dataset) + xlim(xmax=550) + plt.xlabel("Budget") + plt.ylabel("Performance") + plt.legend(loc="lower right", ncol=2, fontsize="small") + plt.savefig(fn) + + +def compare_performance2(fn): + plots = {} + plt.figure() + for dataset in DATASETS: + values = [map(float, line.strip().split("\t")) + for line in open(dataset + "_performance.txt")] + a, im, rd, rdf, aps = zip(*values) + plots[dataset] = [j * 1. / i for (j, i) in zip(aps, im)[1:]] + a = map(mq, a) + a = map(int, a) + z = zip(*plots.itervalues()) + means = [np.mean(w) for w in z] + maxi = [np.max(w) for w in z] + mini = [np.min(w) for w in z] + ind = range(len(a[1:])) + width = 0.35 + plt.bar(ind, means, width, linewidth=0.1) + plt.errorbar([i + width / 2. for i in ind], means, [mini, maxi], elinewidth=1.2, fmt="none") + plt.xticks([i + width / 2. for i in ind], a[1:]) + plt.xlim(-width, len(ind) - 1 + 2 * width) + plt.xlabel("Budget") + plt.ylabel("Relative improvement") + plt.savefig(fn) + + +def compare_dist(): + fd, sd = [], [] + plt.figure(figsize=(5, 3)) + cm = iter(rcParams["axes.color_cycle"]) + for dataset in DATASETS: + graph, degrees = build_graph(dataset) + fd_degrees = list(degrees[user] for user in graph) + sd_degrees = list(degrees[user] for user in sd_users(graph)) + fd.append(np.mean(fd_degrees)) + sd.append(np.mean(sd_degrees)) + ind = range(len(DATASETS)) + width = 0.35 + plt.bar(ind, fd, width, label="Initial users", color=next(cm)) + plt.bar([i + width for i in ind], sd, width, label="Friends", + color=next(cm)) + plt.xlim(-width, len(ind) - 1 + 3 * width) + plt.xticks([i + width for i in ind], DATASETS) + plt.ylabel("Avg. degree") + plt.legend() + plt.savefig("para.pdf") + + +def plot_perf_prob(): + plt.figure() + with open("peet_performance_p.txt") as f: + values = [map(float, line.strip().split("\t")) for line in f] + values = zip(*values) + a = [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] + for i in [0, 1, 2, 3, 5, 9]: + plt.plot(values[0], values[i + 1], label="$p = " + str(a[i]) + "$") + plt.legend() + with open("peet_performance.txt") as f: + values = [map(float, line.strip().split("\t")) for line in f] + values = zip(*values) + plt.gca().set_yscale("log") + plt.xlabel("Budget") + plt.ylabel("Performance") + plt.plot(values[0], values[1], label="Max. degree") + plt.legend(loc="lower right", fontsize="small", ncol=2) + xlim(xmax=450) + plt.savefig("prob.pdf") + + +def plot_hbo_likes(): + plt.figure() + rcParams["font.size"] = 6 + with open("hbo_likes_performance.txt") as f: + values = [map(float, line.strip().split("\t")) for line in f] + a, im, aps, apso = zip(*values) + a = np.arange(0, 1.001, 0.1) + plt.gca().set_yscale("log") + #plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0)) + plt.plot(a, map(mq, im), label="Max. degr.") + plt.plot(a, map(mq, aps), label="Adapt. seed. (rest.)") + plt.plot(a, map(mq, apso), label="Adapt. seed.") + plt.xlabel("Budget") + plt.ylabel("Performance") + xlim(xmax=1.1) + plt.legend(loc="lower right") + plt.savefig("hbo_likes.pdf") + + +def plot_3d(): + for dist in ["beta", "gauss"]: + fig = plt.figure() + with open("coachella_performance_p_" + dist + ".txt") as f: + values = [map(float, line.strip().split("\t")) for line in f] + k = np.arange(0, 1.001, 0.1) + ps = np.arange(0.01, 0.99, 0.1) + x, y = np.meshgrid(k, ps) + perfs = [value[1:] for value in values] + perfs = zip(*perfs) + ax = fig.add_subplot(111, projection='3d') + ax.plot_wireframe(x, y, perfs, linewidth=0.1) + ticklabel_format(style='sci', axis='z', scilimits=(0, 0)) + xlabel("Budget (fraction of nodes)") + ylabel("Distribution mean") + ax.set_zlabel("Performance") + ax.invert_xaxis() + plt.savefig(dist + ".pdf") + plt.show() + + +def plot_time(): + plt.figure() + rcParams["font.size"] = 6 + a1 = np.loadtxt("time_aps_100.txt") + a2 = np.loadtxt("time_aps_500.txt") + lp1 = np.loadtxt("time_lp_100.txt") + lp2 = np.loadtxt("time_lp_500.txt") + subplot(2, 2, 1) + plot(a1[:, 0], a1[:, 1], "-", label="Comb.") + plot(lp1[:, 0], lp1[:, 1], "-", label="LP") + xlabel("n") + ylabel("time (s)") + xlim(0, 100000) + legend(loc="upper left") + ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) + subplot(2, 2, 2) + plot(a1[:, 0], a1[:, 2], "-", label="Comb.") + plot(lp1[:, 0], lp1[:, 2], "-", label="LP") + ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) + xlabel("n") + ylabel("\# cycles") + xlim(0, 100000) + legend(loc="upper left") + subplot(2, 2, 3) + plot(a2[:, 0], a2[:, 1], "-", label="Comb.") + plot(lp2[:, 0], lp2[:, 1], "-", label="LP") + ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) + xlabel("n") + ylabel("time (s)") + xlim(0, 100000) + legend(loc="upper left") + subplot(2, 2, 4) + plot(a2[:, 0], a2[:, 2], "-", label="Comb.") + plot(lp2[:, 0], lp2[:, 2], "-", label="LP") + ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) + xlabel("n") + ylabel("\# cycles") + xlim(0, 100000) + legend(loc="upper left") + tight_layout(h_pad=-0.5) + savefig("time.pdf") + + +if __name__ == "__main__": + SYNTH_DATASETS = ["b-a", "kk", "sw", "coachella"] + DATASETS = SYNTH_DATASETS + plot_all_performances() + #plot_3d() + #plot_hbo_likes() + #compare_performance() + #plot_perf_prob() + #compare_dist() + #plot_time() + #plot_degree_distributions() + #for style in plt.style.available: + # plt.style.use(style) + # compare_performance("performance_" + style + ".pdf") + #compare_performance2("comp4_" + ".pdf") diff --git a/facebook_analysis/setup.py b/facebook_analysis/setup.py new file mode 100644 index 0000000..043e734 --- /dev/null +++ b/facebook_analysis/setup.py @@ -0,0 +1,4 @@ +from distutils.core import setup +from Cython.Build import cythonize + +setup(ext_modules=cythonize("ads.pyx")) |
