From 36eb1fee5492e57368846cbf4e107f1e4cb31589 Mon Sep 17 00:00:00 2001 From: Thibaut Horel Date: Sat, 22 Nov 2014 21:08:41 -0500 Subject: WWW version --- facebook_analysis/seed.py | 125 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 94 insertions(+), 31 deletions(-) (limited to 'facebook_analysis/seed.py') diff --git a/facebook_analysis/seed.py b/facebook_analysis/seed.py index 7e2b851..cba45e1 100644 --- a/facebook_analysis/seed.py +++ b/facebook_analysis/seed.py @@ -1,23 +1,80 @@ from analyze import sd_users, build_graph, DATASETS, SYNTH_DATASETS import matplotlib.pyplot as plt from matplotlib import rcParams, cm -from matplotlib.colors import Normalize from matplotlib.pyplot import plot, legend, savefig, xlabel, ylabel,\ hist, title, subplot, tight_layout, ticklabel_format, xlim, ylim from mpl_toolkits.mplot3d import Axes3D import numpy as np -import itertools mq = lambda x: x * 4 +def voter(): + with open("epinions_voter.txt") as f: + ep = [float(line) for line in f] + with open("epinions_voter_influence.txt") as f: + epr = [float(line) for line in f] + with open("slashdot_voter.txt") as f: + sl = [float(line) for line in f] + with open("slashdot_voter_influence.txt") as f: + slr = [float(line) for line in f] + a = range(1, 51) + plt.figure(figsize=(7, 3)) + plt.subplot(1, 2, 1) + plt.plot(a, ep, label="Adapt. Seeding") + plt.plot(a, epr, label="Inf. Max.") + plt.legend() + plt.title("Epinions") + plt.xlabel("t") + plt.ylabel("Performance") + plt.subplot(1, 2, 2) + plt.plot(a, sl, label="Adapt. Seeding") + plt.plot(a, slr, label="Inf. Max") + plt.legend() + plt.title("Slashdot") + plt.xlabel("t") + plt.ylabel("Performance") + plt.savefig("voter.pdf") + + +def sampling(): + with open("hbo_sampling.txt") as f: + values = [line.strip().replace(",", "").split() for line in f] + ks, ts, cs = zip(*values) + ks = map(int, ks) + ts = map(float, ts) + cs = map(float, cs) + with open("hbo_sans_sampling.txt") as f: + values = [line.strip().replace(",", "").split() for line in f] + k, t, c = zip(*values) + k = map(int, k) + t = map(float, t) + c = map(float, c) + plt.figure(figsize=(7, 3)) + plt.subplot(1, 2, 1) + plt.gca().set_yscale("log") + plt.plot(ks, ts, label="Sampling based") + plt.plot(ks, t, label="Comb. alg.") + plt.xlabel("Size") + plt.ylabel("Time (s)") + plt.legend(loc="upper left") + plt.subplot(1, 2, 2) + plt.gca().set_yscale("log") + plt.plot(ks, cs, label="Sampling based") + plt.plot(ks, c, label="Comb. alg.") + plt.legend(loc="upper left") + plt.xlabel("Size") + plt.ylabel("\# Cycles") + plt.savefig("sampling2.pdf") + + def plot_degree_distributions(): plt.figure(figsize=(7, 3)) graph, degrees = build_graph("kiva") fd_degrees = list(degrees[user] for user in graph) sd_degrees = list(degrees[user] for user in sd_users(graph)) n, bins, patches = plt.hist(fd_degrees, bins=50, cumulative=True, - label="Initial users", normed=True, + label="Core set", normed=True, alpha=0.5, histtype="stepfilled") n, bins, patches = plt.hist(sd_degrees, bins=50, cumulative=True, histtype="stepfilled", normed=True, alpha=0.5, @@ -30,25 +87,27 @@ def plot_degree_distributions(): def plot_all_performances(): - plt.figure(figsize=(7, 14)) + plt.figure(figsize=(6, 6)) for i, dataset in enumerate(DATASETS): values = [map(float, line.strip().split("\t")) for line in open(dataset + "_performance.txt")] a, im, rd, rdf, aps = zip(*values) a, im, rd, rdf, aps = [map(mq, l) for l in (a, im, rd, rdf, aps)] a = np.arange(0, 1.001, 0.1) - ax = plt.subplot(5, 2, i + 1) + ax = plt.subplot(2, 2, i + 1) #ax.set_yscale("log") - plt.plot(a, im, label="Max deg.") - plt.plot(a, rd, label="Rand.") - plt.plot(a, rdf, label="Rand. friend") + plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0)) + plt.plot(a, im, label="Inf. Max") + plt.plot(a, rd, label="Rand. Node") + plt.plot(a, rdf, label="Rand. Friend") plt.plot(a, aps, label="Adapt. Seeding") - plt.xlabel("Budget (fraction of the total number of users)") + plt.xlabel("Budget") plt.ylabel("Performance") + titl = dataset if dataset == "sw": - titl = "SmallWord" - if dataset == "coachella": - titl = "Conf. Model" + titl = "SmallWorld" + #if dataset == "coachella": + # titl = "Conf. Model" if dataset == "kk": titl = "Kronecker" if dataset == "b-a": @@ -58,7 +117,7 @@ def plot_all_performances(): plt.legend(loc="upper center", ncol=4, bbox_to_anchor=(0, 0, 1, 1.03), bbox_transform=plt.gcf().transFigure) plt.tight_layout() - plt.savefig("test2.pdf") + plt.savefig("perf10.pdf") def compare_performance(fn): @@ -81,7 +140,7 @@ def compare_performance(fn): def compare_performance2(fn): plots = {} - plt.figure() + plt.figure(figsize=(5, 3)) for dataset in DATASETS: values = [map(float, line.strip().split("\t")) for line in open(dataset + "_performance.txt")] @@ -97,7 +156,7 @@ def compare_performance2(fn): width = 0.35 plt.bar(ind, means, width, linewidth=0.1) plt.errorbar([i + width / 2. for i in ind], means, [mini, maxi], elinewidth=1.2, fmt="none") - plt.xticks([i + width / 2. for i in ind], a[1:]) + plt.xticks([i + width / 2. for i in ind], [100, 150, 200, 250, 300, 350, 400, 450, 500, 550]) plt.xlim(-width, len(ind) - 1 + 2 * width) plt.xlabel("Budget") plt.ylabel("Relative improvement") @@ -116,7 +175,7 @@ def compare_dist(): sd.append(np.mean(sd_degrees)) ind = range(len(DATASETS)) width = 0.35 - plt.bar(ind, fd, width, label="Initial users", color=next(cm)) + plt.bar(ind, fd, width, label="Core users", color=next(cm)) plt.bar([i + width for i in ind], sd, width, label="Friends", color=next(cm)) plt.xlim(-width, len(ind) - 1 + 3 * width) @@ -128,6 +187,7 @@ def compare_dist(): def plot_perf_prob(): plt.figure() + rcParams["font.size"] = 10 with open("peet_performance_p.txt") as f: values = [map(float, line.strip().split("\t")) for line in f] values = zip(*values) @@ -138,36 +198,38 @@ def plot_perf_prob(): with open("peet_performance.txt") as f: values = [map(float, line.strip().split("\t")) for line in f] values = zip(*values) - plt.gca().set_yscale("log") + #plt.gca().set_yscale("log") + plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0)) plt.xlabel("Budget") plt.ylabel("Performance") - plt.plot(values[0], values[1], label="Max. degree") - plt.legend(loc="lower right", fontsize="small", ncol=2) + plt.plot(values[0], values[1], label="Inf. Max.") + plt.legend(loc="upper left", fontsize="small", ncol=2) xlim(xmax=450) plt.savefig("prob.pdf") def plot_hbo_likes(): plt.figure() - rcParams["font.size"] = 6 + rcParams["font.size"] = 10 with open("hbo_likes_performance.txt") as f: values = [map(float, line.strip().split("\t")) for line in f] a, im, aps, apso = zip(*values) a = np.arange(0, 1.001, 0.1) - plt.gca().set_yscale("log") - #plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0)) - plt.plot(a, map(mq, im), label="Max. degr.") - plt.plot(a, map(mq, aps), label="Adapt. seed. (rest.)") + #plt.gca().set_yscale("log") + plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0)) + plt.plot(a, map(mq, im), label="Inf. Max.") + plt.plot(a, map(mq, aps), label="Adapt. seed. (subgraph)") plt.plot(a, map(mq, apso), label="Adapt. seed.") plt.xlabel("Budget") plt.ylabel("Performance") xlim(xmax=1.1) - plt.legend(loc="lower right") + plt.legend(loc="upper left") plt.savefig("hbo_likes.pdf") def plot_3d(): - for dist in ["beta", "gauss"]: + rcParams["font.size"] = 7 + for dist in ["beta", "gauss", "power", "deg"]: fig = plt.figure() with open("coachella_performance_p_" + dist + ".txt") as f: values = [map(float, line.strip().split("\t")) for line in f] @@ -180,7 +242,7 @@ def plot_3d(): ax.plot_wireframe(x, y, perfs, linewidth=0.1) ticklabel_format(style='sci', axis='z', scilimits=(0, 0)) xlabel("Budget (fraction of nodes)") - ylabel("Distribution mean") + ylabel("Mean") ax.set_zlabel("Performance") ax.invert_xaxis() plt.savefig(dist + ".pdf") @@ -232,8 +294,8 @@ def plot_time(): if __name__ == "__main__": SYNTH_DATASETS = ["b-a", "kk", "sw", "coachella"] - DATASETS = SYNTH_DATASETS - plot_all_performances() + DATASETS = ["lp", "gp", "google", "coachella"] + #plot_all_performances() #plot_3d() #plot_hbo_likes() #compare_performance() @@ -243,5 +305,6 @@ if __name__ == "__main__": #plot_degree_distributions() #for style in plt.style.available: # plt.style.use(style) - # compare_performance("performance_" + style + ".pdf") - #compare_performance2("comp4_" + ".pdf") + # compare_performance2("comp4_" + style + ".pdf") + sampling() + #voter() -- cgit v1.2.3-70-g09d2