diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2014-10-24 12:16:51 -0400 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2014-10-24 12:16:51 -0400 |
| commit | ece1d828d53d6123fcecb5ea8bf9b126d1728ccc (patch) | |
| tree | b669382d0e5f1234556d1aeb7fa919891510b24d /facebook_analysis/seed.py | |
| parent | 7426d8ff0e7969eb1a86bdb5bec8a0c971309e2b (diff) | |
| download | fast-seeding-ece1d828d53d6123fcecb5ea8bf9b126d1728ccc.tar.gz | |
Add code
Diffstat (limited to 'facebook_analysis/seed.py')
| -rw-r--r-- | facebook_analysis/seed.py | 247 |
1 files changed, 247 insertions, 0 deletions
diff --git a/facebook_analysis/seed.py b/facebook_analysis/seed.py new file mode 100644 index 0000000..7e2b851 --- /dev/null +++ b/facebook_analysis/seed.py @@ -0,0 +1,247 @@ +from analyze import sd_users, build_graph, DATASETS, SYNTH_DATASETS +import matplotlib.pyplot as plt +from matplotlib import rcParams, cm +from matplotlib.colors import Normalize +from matplotlib.pyplot import plot, legend, savefig, xlabel, ylabel,\ + hist, title, subplot, tight_layout, ticklabel_format, xlim, ylim +from mpl_toolkits.mplot3d import Axes3D +import numpy as np +import itertools + +mq = lambda x: x * 4 + + +def plot_degree_distributions(): + plt.figure(figsize=(7, 3)) + graph, degrees = build_graph("kiva") + fd_degrees = list(degrees[user] for user in graph) + sd_degrees = list(degrees[user] for user in sd_users(graph)) + n, bins, patches = plt.hist(fd_degrees, bins=50, cumulative=True, + label="Initial users", normed=True, + alpha=0.5, histtype="stepfilled") + n, bins, patches = plt.hist(sd_degrees, bins=50, cumulative=True, + histtype="stepfilled", normed=True, alpha=0.5, + label="Friends") + ylim(ymax=1.1) + plt.xlabel("Degree") + plt.ylabel("Probability") + plt.legend(loc="lower right") + plt.savefig("dist.pdf") + + +def plot_all_performances(): + plt.figure(figsize=(7, 14)) + for i, dataset in enumerate(DATASETS): + values = [map(float, line.strip().split("\t")) + for line in open(dataset + "_performance.txt")] + a, im, rd, rdf, aps = zip(*values) + a, im, rd, rdf, aps = [map(mq, l) for l in (a, im, rd, rdf, aps)] + a = np.arange(0, 1.001, 0.1) + ax = plt.subplot(5, 2, i + 1) + #ax.set_yscale("log") + plt.plot(a, im, label="Max deg.") + plt.plot(a, rd, label="Rand.") + plt.plot(a, rdf, label="Rand. friend") + plt.plot(a, aps, label="Adapt. Seeding") + plt.xlabel("Budget (fraction of the total number of users)") + plt.ylabel("Performance") + if dataset == "sw": + titl = "SmallWord" + if dataset == "coachella": + titl = "Conf. Model" + if dataset == "kk": + titl = "Kronecker" + if dataset == "b-a": + titl = "Barabasi-Albert" + plt.title(titl) + xlim(xmax=1.1) + plt.legend(loc="upper center", ncol=4, bbox_to_anchor=(0, 0, 1, 1.03), + bbox_transform=plt.gcf().transFigure) + plt.tight_layout() + plt.savefig("test2.pdf") + + +def compare_performance(fn): + plots = {} + plt.figure() + for dataset in DATASETS: + values = [map(float, line.strip().split("\t")) + for line in open(dataset + "_performance.txt")] + a, im, rd, rdf, aps = zip(*values) + plots[dataset] = [j * 1. / i for (j, i) in zip(aps, im)[1:]] + a = map(mq, a) + for dataset in DATASETS: + plt.plot(a[1:], plots[dataset], label=dataset) + xlim(xmax=550) + plt.xlabel("Budget") + plt.ylabel("Performance") + plt.legend(loc="lower right", ncol=2, fontsize="small") + plt.savefig(fn) + + +def compare_performance2(fn): + plots = {} + plt.figure() + for dataset in DATASETS: + values = [map(float, line.strip().split("\t")) + for line in open(dataset + "_performance.txt")] + a, im, rd, rdf, aps = zip(*values) + plots[dataset] = [j * 1. / i for (j, i) in zip(aps, im)[1:]] + a = map(mq, a) + a = map(int, a) + z = zip(*plots.itervalues()) + means = [np.mean(w) for w in z] + maxi = [np.max(w) for w in z] + mini = [np.min(w) for w in z] + ind = range(len(a[1:])) + width = 0.35 + plt.bar(ind, means, width, linewidth=0.1) + plt.errorbar([i + width / 2. for i in ind], means, [mini, maxi], elinewidth=1.2, fmt="none") + plt.xticks([i + width / 2. for i in ind], a[1:]) + plt.xlim(-width, len(ind) - 1 + 2 * width) + plt.xlabel("Budget") + plt.ylabel("Relative improvement") + plt.savefig(fn) + + +def compare_dist(): + fd, sd = [], [] + plt.figure(figsize=(5, 3)) + cm = iter(rcParams["axes.color_cycle"]) + for dataset in DATASETS: + graph, degrees = build_graph(dataset) + fd_degrees = list(degrees[user] for user in graph) + sd_degrees = list(degrees[user] for user in sd_users(graph)) + fd.append(np.mean(fd_degrees)) + sd.append(np.mean(sd_degrees)) + ind = range(len(DATASETS)) + width = 0.35 + plt.bar(ind, fd, width, label="Initial users", color=next(cm)) + plt.bar([i + width for i in ind], sd, width, label="Friends", + color=next(cm)) + plt.xlim(-width, len(ind) - 1 + 3 * width) + plt.xticks([i + width for i in ind], DATASETS) + plt.ylabel("Avg. degree") + plt.legend() + plt.savefig("para.pdf") + + +def plot_perf_prob(): + plt.figure() + with open("peet_performance_p.txt") as f: + values = [map(float, line.strip().split("\t")) for line in f] + values = zip(*values) + a = [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] + for i in [0, 1, 2, 3, 5, 9]: + plt.plot(values[0], values[i + 1], label="$p = " + str(a[i]) + "$") + plt.legend() + with open("peet_performance.txt") as f: + values = [map(float, line.strip().split("\t")) for line in f] + values = zip(*values) + plt.gca().set_yscale("log") + plt.xlabel("Budget") + plt.ylabel("Performance") + plt.plot(values[0], values[1], label="Max. degree") + plt.legend(loc="lower right", fontsize="small", ncol=2) + xlim(xmax=450) + plt.savefig("prob.pdf") + + +def plot_hbo_likes(): + plt.figure() + rcParams["font.size"] = 6 + with open("hbo_likes_performance.txt") as f: + values = [map(float, line.strip().split("\t")) for line in f] + a, im, aps, apso = zip(*values) + a = np.arange(0, 1.001, 0.1) + plt.gca().set_yscale("log") + #plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0)) + plt.plot(a, map(mq, im), label="Max. degr.") + plt.plot(a, map(mq, aps), label="Adapt. seed. (rest.)") + plt.plot(a, map(mq, apso), label="Adapt. seed.") + plt.xlabel("Budget") + plt.ylabel("Performance") + xlim(xmax=1.1) + plt.legend(loc="lower right") + plt.savefig("hbo_likes.pdf") + + +def plot_3d(): + for dist in ["beta", "gauss"]: + fig = plt.figure() + with open("coachella_performance_p_" + dist + ".txt") as f: + values = [map(float, line.strip().split("\t")) for line in f] + k = np.arange(0, 1.001, 0.1) + ps = np.arange(0.01, 0.99, 0.1) + x, y = np.meshgrid(k, ps) + perfs = [value[1:] for value in values] + perfs = zip(*perfs) + ax = fig.add_subplot(111, projection='3d') + ax.plot_wireframe(x, y, perfs, linewidth=0.1) + ticklabel_format(style='sci', axis='z', scilimits=(0, 0)) + xlabel("Budget (fraction of nodes)") + ylabel("Distribution mean") + ax.set_zlabel("Performance") + ax.invert_xaxis() + plt.savefig(dist + ".pdf") + plt.show() + + +def plot_time(): + plt.figure() + rcParams["font.size"] = 6 + a1 = np.loadtxt("time_aps_100.txt") + a2 = np.loadtxt("time_aps_500.txt") + lp1 = np.loadtxt("time_lp_100.txt") + lp2 = np.loadtxt("time_lp_500.txt") + subplot(2, 2, 1) + plot(a1[:, 0], a1[:, 1], "-", label="Comb.") + plot(lp1[:, 0], lp1[:, 1], "-", label="LP") + xlabel("n") + ylabel("time (s)") + xlim(0, 100000) + legend(loc="upper left") + ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) + subplot(2, 2, 2) + plot(a1[:, 0], a1[:, 2], "-", label="Comb.") + plot(lp1[:, 0], lp1[:, 2], "-", label="LP") + ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) + xlabel("n") + ylabel("\# cycles") + xlim(0, 100000) + legend(loc="upper left") + subplot(2, 2, 3) + plot(a2[:, 0], a2[:, 1], "-", label="Comb.") + plot(lp2[:, 0], lp2[:, 1], "-", label="LP") + ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) + xlabel("n") + ylabel("time (s)") + xlim(0, 100000) + legend(loc="upper left") + subplot(2, 2, 4) + plot(a2[:, 0], a2[:, 2], "-", label="Comb.") + plot(lp2[:, 0], lp2[:, 2], "-", label="LP") + ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) + xlabel("n") + ylabel("\# cycles") + xlim(0, 100000) + legend(loc="upper left") + tight_layout(h_pad=-0.5) + savefig("time.pdf") + + +if __name__ == "__main__": + SYNTH_DATASETS = ["b-a", "kk", "sw", "coachella"] + DATASETS = SYNTH_DATASETS + plot_all_performances() + #plot_3d() + #plot_hbo_likes() + #compare_performance() + #plot_perf_prob() + #compare_dist() + #plot_time() + #plot_degree_distributions() + #for style in plt.style.available: + # plt.style.use(style) + # compare_performance("performance_" + style + ".pdf") + #compare_performance2("comp4_" + ".pdf") |
