summaryrefslogtreecommitdiffstats
path: root/facebook_analysis/seed.py
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2014-10-24 12:16:51 -0400
committerThibaut Horel <thibaut.horel@gmail.com>2014-10-24 12:16:51 -0400
commitece1d828d53d6123fcecb5ea8bf9b126d1728ccc (patch)
treeb669382d0e5f1234556d1aeb7fa919891510b24d /facebook_analysis/seed.py
parent7426d8ff0e7969eb1a86bdb5bec8a0c971309e2b (diff)
downloadfast-seeding-ece1d828d53d6123fcecb5ea8bf9b126d1728ccc.tar.gz
Add code
Diffstat (limited to 'facebook_analysis/seed.py')
-rw-r--r--facebook_analysis/seed.py247
1 files changed, 247 insertions, 0 deletions
diff --git a/facebook_analysis/seed.py b/facebook_analysis/seed.py
new file mode 100644
index 0000000..7e2b851
--- /dev/null
+++ b/facebook_analysis/seed.py
@@ -0,0 +1,247 @@
+from analyze import sd_users, build_graph, DATASETS, SYNTH_DATASETS
+import matplotlib.pyplot as plt
+from matplotlib import rcParams, cm
+from matplotlib.colors import Normalize
+from matplotlib.pyplot import plot, legend, savefig, xlabel, ylabel,\
+ hist, title, subplot, tight_layout, ticklabel_format, xlim, ylim
+from mpl_toolkits.mplot3d import Axes3D
+import numpy as np
+import itertools
+
+mq = lambda x: x * 4
+
+
+def plot_degree_distributions():
+ plt.figure(figsize=(7, 3))
+ graph, degrees = build_graph("kiva")
+ fd_degrees = list(degrees[user] for user in graph)
+ sd_degrees = list(degrees[user] for user in sd_users(graph))
+ n, bins, patches = plt.hist(fd_degrees, bins=50, cumulative=True,
+ label="Initial users", normed=True,
+ alpha=0.5, histtype="stepfilled")
+ n, bins, patches = plt.hist(sd_degrees, bins=50, cumulative=True,
+ histtype="stepfilled", normed=True, alpha=0.5,
+ label="Friends")
+ ylim(ymax=1.1)
+ plt.xlabel("Degree")
+ plt.ylabel("Probability")
+ plt.legend(loc="lower right")
+ plt.savefig("dist.pdf")
+
+
+def plot_all_performances():
+ plt.figure(figsize=(7, 14))
+ for i, dataset in enumerate(DATASETS):
+ values = [map(float, line.strip().split("\t"))
+ for line in open(dataset + "_performance.txt")]
+ a, im, rd, rdf, aps = zip(*values)
+ a, im, rd, rdf, aps = [map(mq, l) for l in (a, im, rd, rdf, aps)]
+ a = np.arange(0, 1.001, 0.1)
+ ax = plt.subplot(5, 2, i + 1)
+ #ax.set_yscale("log")
+ plt.plot(a, im, label="Max deg.")
+ plt.plot(a, rd, label="Rand.")
+ plt.plot(a, rdf, label="Rand. friend")
+ plt.plot(a, aps, label="Adapt. Seeding")
+ plt.xlabel("Budget (fraction of the total number of users)")
+ plt.ylabel("Performance")
+ if dataset == "sw":
+ titl = "SmallWord"
+ if dataset == "coachella":
+ titl = "Conf. Model"
+ if dataset == "kk":
+ titl = "Kronecker"
+ if dataset == "b-a":
+ titl = "Barabasi-Albert"
+ plt.title(titl)
+ xlim(xmax=1.1)
+ plt.legend(loc="upper center", ncol=4, bbox_to_anchor=(0, 0, 1, 1.03),
+ bbox_transform=plt.gcf().transFigure)
+ plt.tight_layout()
+ plt.savefig("test2.pdf")
+
+
+def compare_performance(fn):
+ plots = {}
+ plt.figure()
+ for dataset in DATASETS:
+ values = [map(float, line.strip().split("\t"))
+ for line in open(dataset + "_performance.txt")]
+ a, im, rd, rdf, aps = zip(*values)
+ plots[dataset] = [j * 1. / i for (j, i) in zip(aps, im)[1:]]
+ a = map(mq, a)
+ for dataset in DATASETS:
+ plt.plot(a[1:], plots[dataset], label=dataset)
+ xlim(xmax=550)
+ plt.xlabel("Budget")
+ plt.ylabel("Performance")
+ plt.legend(loc="lower right", ncol=2, fontsize="small")
+ plt.savefig(fn)
+
+
+def compare_performance2(fn):
+ plots = {}
+ plt.figure()
+ for dataset in DATASETS:
+ values = [map(float, line.strip().split("\t"))
+ for line in open(dataset + "_performance.txt")]
+ a, im, rd, rdf, aps = zip(*values)
+ plots[dataset] = [j * 1. / i for (j, i) in zip(aps, im)[1:]]
+ a = map(mq, a)
+ a = map(int, a)
+ z = zip(*plots.itervalues())
+ means = [np.mean(w) for w in z]
+ maxi = [np.max(w) for w in z]
+ mini = [np.min(w) for w in z]
+ ind = range(len(a[1:]))
+ width = 0.35
+ plt.bar(ind, means, width, linewidth=0.1)
+ plt.errorbar([i + width / 2. for i in ind], means, [mini, maxi], elinewidth=1.2, fmt="none")
+ plt.xticks([i + width / 2. for i in ind], a[1:])
+ plt.xlim(-width, len(ind) - 1 + 2 * width)
+ plt.xlabel("Budget")
+ plt.ylabel("Relative improvement")
+ plt.savefig(fn)
+
+
+def compare_dist():
+ fd, sd = [], []
+ plt.figure(figsize=(5, 3))
+ cm = iter(rcParams["axes.color_cycle"])
+ for dataset in DATASETS:
+ graph, degrees = build_graph(dataset)
+ fd_degrees = list(degrees[user] for user in graph)
+ sd_degrees = list(degrees[user] for user in sd_users(graph))
+ fd.append(np.mean(fd_degrees))
+ sd.append(np.mean(sd_degrees))
+ ind = range(len(DATASETS))
+ width = 0.35
+ plt.bar(ind, fd, width, label="Initial users", color=next(cm))
+ plt.bar([i + width for i in ind], sd, width, label="Friends",
+ color=next(cm))
+ plt.xlim(-width, len(ind) - 1 + 3 * width)
+ plt.xticks([i + width for i in ind], DATASETS)
+ plt.ylabel("Avg. degree")
+ plt.legend()
+ plt.savefig("para.pdf")
+
+
+def plot_perf_prob():
+ plt.figure()
+ with open("peet_performance_p.txt") as f:
+ values = [map(float, line.strip().split("\t")) for line in f]
+ values = zip(*values)
+ a = [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+ for i in [0, 1, 2, 3, 5, 9]:
+ plt.plot(values[0], values[i + 1], label="$p = " + str(a[i]) + "$")
+ plt.legend()
+ with open("peet_performance.txt") as f:
+ values = [map(float, line.strip().split("\t")) for line in f]
+ values = zip(*values)
+ plt.gca().set_yscale("log")
+ plt.xlabel("Budget")
+ plt.ylabel("Performance")
+ plt.plot(values[0], values[1], label="Max. degree")
+ plt.legend(loc="lower right", fontsize="small", ncol=2)
+ xlim(xmax=450)
+ plt.savefig("prob.pdf")
+
+
+def plot_hbo_likes():
+ plt.figure()
+ rcParams["font.size"] = 6
+ with open("hbo_likes_performance.txt") as f:
+ values = [map(float, line.strip().split("\t")) for line in f]
+ a, im, aps, apso = zip(*values)
+ a = np.arange(0, 1.001, 0.1)
+ plt.gca().set_yscale("log")
+ #plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
+ plt.plot(a, map(mq, im), label="Max. degr.")
+ plt.plot(a, map(mq, aps), label="Adapt. seed. (rest.)")
+ plt.plot(a, map(mq, apso), label="Adapt. seed.")
+ plt.xlabel("Budget")
+ plt.ylabel("Performance")
+ xlim(xmax=1.1)
+ plt.legend(loc="lower right")
+ plt.savefig("hbo_likes.pdf")
+
+
+def plot_3d():
+ for dist in ["beta", "gauss"]:
+ fig = plt.figure()
+ with open("coachella_performance_p_" + dist + ".txt") as f:
+ values = [map(float, line.strip().split("\t")) for line in f]
+ k = np.arange(0, 1.001, 0.1)
+ ps = np.arange(0.01, 0.99, 0.1)
+ x, y = np.meshgrid(k, ps)
+ perfs = [value[1:] for value in values]
+ perfs = zip(*perfs)
+ ax = fig.add_subplot(111, projection='3d')
+ ax.plot_wireframe(x, y, perfs, linewidth=0.1)
+ ticklabel_format(style='sci', axis='z', scilimits=(0, 0))
+ xlabel("Budget (fraction of nodes)")
+ ylabel("Distribution mean")
+ ax.set_zlabel("Performance")
+ ax.invert_xaxis()
+ plt.savefig(dist + ".pdf")
+ plt.show()
+
+
+def plot_time():
+ plt.figure()
+ rcParams["font.size"] = 6
+ a1 = np.loadtxt("time_aps_100.txt")
+ a2 = np.loadtxt("time_aps_500.txt")
+ lp1 = np.loadtxt("time_lp_100.txt")
+ lp2 = np.loadtxt("time_lp_500.txt")
+ subplot(2, 2, 1)
+ plot(a1[:, 0], a1[:, 1], "-", label="Comb.")
+ plot(lp1[:, 0], lp1[:, 1], "-", label="LP")
+ xlabel("n")
+ ylabel("time (s)")
+ xlim(0, 100000)
+ legend(loc="upper left")
+ ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
+ subplot(2, 2, 2)
+ plot(a1[:, 0], a1[:, 2], "-", label="Comb.")
+ plot(lp1[:, 0], lp1[:, 2], "-", label="LP")
+ ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
+ xlabel("n")
+ ylabel("\# cycles")
+ xlim(0, 100000)
+ legend(loc="upper left")
+ subplot(2, 2, 3)
+ plot(a2[:, 0], a2[:, 1], "-", label="Comb.")
+ plot(lp2[:, 0], lp2[:, 1], "-", label="LP")
+ ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
+ xlabel("n")
+ ylabel("time (s)")
+ xlim(0, 100000)
+ legend(loc="upper left")
+ subplot(2, 2, 4)
+ plot(a2[:, 0], a2[:, 2], "-", label="Comb.")
+ plot(lp2[:, 0], lp2[:, 2], "-", label="LP")
+ ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
+ xlabel("n")
+ ylabel("\# cycles")
+ xlim(0, 100000)
+ legend(loc="upper left")
+ tight_layout(h_pad=-0.5)
+ savefig("time.pdf")
+
+
+if __name__ == "__main__":
+ SYNTH_DATASETS = ["b-a", "kk", "sw", "coachella"]
+ DATASETS = SYNTH_DATASETS
+ plot_all_performances()
+ #plot_3d()
+ #plot_hbo_likes()
+ #compare_performance()
+ #plot_perf_prob()
+ #compare_dist()
+ #plot_time()
+ #plot_degree_distributions()
+ #for style in plt.style.available:
+ # plt.style.use(style)
+ # compare_performance("performance_" + style + ".pdf")
+ #compare_performance2("comp4_" + ".pdf")