Add code

author: Thibaut Horel <thibaut.horel@gmail.com> 2014-10-24 12:16:51 -0400
committer: Thibaut Horel <thibaut.horel@gmail.com> 2014-10-24 12:16:51 -0400
commit: ece1d828d53d6123fcecb5ea8bf9b126d1728ccc (patch)
tree: b669382d0e5f1234556d1aeb7fa919891510b24d /facebook_analysis/seed.py
parent: 7426d8ff0e7969eb1a86bdb5bec8a0c971309e2b (diff)
download: fast-seeding-ece1d828d53d6123fcecb5ea8bf9b126d1728ccc.tar.gz
1 files changed, 247 insertions, 0 deletions
diff --git a/facebook_analysis/seed.py b/facebook_analysis/seed.py
new file mode 100644
index 0000000..7e2b851
--- /dev/null
+++ b/facebook_analysis/seed.py
@@ -0,0 +1,247 @@
+from analyze import sd_users, build_graph, DATASETS, SYNTH_DATASETS
+import matplotlib.pyplot as plt
+from matplotlib import rcParams, cm
+from matplotlib.colors import Normalize
+from matplotlib.pyplot import plot, legend, savefig, xlabel, ylabel,\
+    hist, title, subplot, tight_layout, ticklabel_format, xlim, ylim
+from mpl_toolkits.mplot3d import Axes3D
+import numpy as np
+import itertools
+
+mq = lambda x: x * 4
+
+
+def plot_degree_distributions():
+    plt.figure(figsize=(7, 3))
+    graph, degrees = build_graph("kiva")
+    fd_degrees = list(degrees[user] for user in graph)
+    sd_degrees = list(degrees[user] for user in sd_users(graph))
+    n, bins, patches = plt.hist(fd_degrees, bins=50, cumulative=True,
+                                label="Initial users", normed=True,
+                                alpha=0.5, histtype="stepfilled")
+    n, bins, patches = plt.hist(sd_degrees, bins=50, cumulative=True,
+                                histtype="stepfilled", normed=True, alpha=0.5,
+                                label="Friends")
+    ylim(ymax=1.1)
+    plt.xlabel("Degree")
+    plt.ylabel("Probability")
+    plt.legend(loc="lower right")
+    plt.savefig("dist.pdf")
+
+
+def plot_all_performances():
+    plt.figure(figsize=(7, 14))
+    for i, dataset in enumerate(DATASETS):
+        values = [map(float, line.strip().split("\t"))
+                  for line in open(dataset + "_performance.txt")]
+        a, im, rd, rdf, aps = zip(*values)
+        a, im, rd, rdf, aps = [map(mq, l) for l in (a, im, rd, rdf, aps)]
+        a = np.arange(0, 1.001, 0.1)
+        ax = plt.subplot(5, 2, i + 1)
+        #ax.set_yscale("log")
+        plt.plot(a, im, label="Max deg.")
+        plt.plot(a, rd, label="Rand.")
+        plt.plot(a, rdf, label="Rand. friend")
+        plt.plot(a, aps, label="Adapt. Seeding")
+        plt.xlabel("Budget (fraction of the total number of users)")
+        plt.ylabel("Performance")
+        if dataset == "sw":
+            titl = "SmallWord"
+        if dataset == "coachella":
+            titl = "Conf. Model"
+        if dataset == "kk":
+            titl = "Kronecker"
+        if dataset == "b-a":
+            titl = "Barabasi-Albert"
+        plt.title(titl)
+        xlim(xmax=1.1)
+    plt.legend(loc="upper center", ncol=4, bbox_to_anchor=(0, 0, 1, 1.03),
+               bbox_transform=plt.gcf().transFigure)
+    plt.tight_layout()
+    plt.savefig("test2.pdf")
+
+
+def compare_performance(fn):
+    plots = {}
+    plt.figure()
+    for dataset in DATASETS:
+        values = [map(float, line.strip().split("\t"))
+                  for line in open(dataset + "_performance.txt")]
+        a, im, rd, rdf, aps = zip(*values)
+        plots[dataset] = [j * 1. / i for (j, i) in zip(aps, im)[1:]]
+    a = map(mq, a)
+    for dataset in DATASETS:
+        plt.plot(a[1:], plots[dataset], label=dataset)
+    xlim(xmax=550)
+    plt.xlabel("Budget")
+    plt.ylabel("Performance")
+    plt.legend(loc="lower right", ncol=2, fontsize="small")
+    plt.savefig(fn)
+
+
+def compare_performance2(fn):
+    plots = {}
+    plt.figure()
+    for dataset in DATASETS:
+        values = [map(float, line.strip().split("\t"))
+                  for line in open(dataset + "_performance.txt")]
+        a, im, rd, rdf, aps = zip(*values)
+        plots[dataset] = [j * 1. / i for (j, i) in zip(aps, im)[1:]]
+    a = map(mq, a)
+    a = map(int, a)
+    z = zip(*plots.itervalues())
+    means = [np.mean(w) for w in z]
+    maxi = [np.max(w) for w in z]
+    mini = [np.min(w) for w in z]
+    ind = range(len(a[1:]))
+    width = 0.35
+    plt.bar(ind, means, width, linewidth=0.1)
+    plt.errorbar([i + width / 2. for i in ind], means, [mini, maxi], elinewidth=1.2, fmt="none")
+    plt.xticks([i + width / 2. for i in ind], a[1:])
+    plt.xlim(-width, len(ind) - 1 + 2 * width)
+    plt.xlabel("Budget")
+    plt.ylabel("Relative improvement")
+    plt.savefig(fn)
+
+
+def compare_dist():
+    fd, sd = [], []
+    plt.figure(figsize=(5, 3))
+    cm = iter(rcParams["axes.color_cycle"])
+    for dataset in DATASETS:
+        graph, degrees = build_graph(dataset)
+        fd_degrees = list(degrees[user] for user in graph)
+        sd_degrees = list(degrees[user] for user in sd_users(graph))
+        fd.append(np.mean(fd_degrees))
+        sd.append(np.mean(sd_degrees))
+    ind = range(len(DATASETS))
+    width = 0.35
+    plt.bar(ind, fd, width, label="Initial users", color=next(cm))
+    plt.bar([i + width for i in ind], sd, width, label="Friends",
+            color=next(cm))
+    plt.xlim(-width, len(ind) - 1 + 3 * width)
+    plt.xticks([i + width for i in ind], DATASETS)
+    plt.ylabel("Avg. degree")
+    plt.legend()
+    plt.savefig("para.pdf")
+
+
+def plot_perf_prob():
+    plt.figure()
+    with open("peet_performance_p.txt") as f:
+        values = [map(float, line.strip().split("\t")) for line in f]
+        values = zip(*values)
+        a = [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+        for i in [0, 1, 2, 3, 5, 9]:
+            plt.plot(values[0], values[i + 1], label="$p = " + str(a[i]) + "$")
+        plt.legend()
+    with open("peet_performance.txt") as f:
+        values = [map(float, line.strip().split("\t")) for line in f]
+        values = zip(*values)
+        plt.gca().set_yscale("log")
+        plt.xlabel("Budget")
+        plt.ylabel("Performance")
+        plt.plot(values[0], values[1], label="Max. degree")
+        plt.legend(loc="lower right", fontsize="small", ncol=2)
+        xlim(xmax=450)
+        plt.savefig("prob.pdf")
+
+
+def plot_hbo_likes():
+    plt.figure()
+    rcParams["font.size"] = 6
+    with open("hbo_likes_performance.txt") as f:
+        values = [map(float, line.strip().split("\t")) for line in f]
+    a, im, aps, apso = zip(*values)
+    a = np.arange(0, 1.001, 0.1)
+    plt.gca().set_yscale("log")
+    #plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
+    plt.plot(a, map(mq, im), label="Max. degr.")
+    plt.plot(a, map(mq, aps), label="Adapt. seed. (rest.)")
+    plt.plot(a, map(mq, apso), label="Adapt. seed.")
+    plt.xlabel("Budget")
+    plt.ylabel("Performance")
+    xlim(xmax=1.1)
+    plt.legend(loc="lower right")
+    plt.savefig("hbo_likes.pdf")
+
+
+def plot_3d():
+    for dist in ["beta", "gauss"]:
+        fig = plt.figure()
+        with open("coachella_performance_p_" + dist + ".txt") as f:
+            values = [map(float, line.strip().split("\t")) for line in f]
+        k = np.arange(0, 1.001, 0.1)
+        ps = np.arange(0.01, 0.99, 0.1)
+        x, y = np.meshgrid(k, ps)
+        perfs = [value[1:] for value in values]
+        perfs = zip(*perfs)
+        ax = fig.add_subplot(111, projection='3d')
+        ax.plot_wireframe(x, y, perfs, linewidth=0.1)
+        ticklabel_format(style='sci', axis='z', scilimits=(0, 0))
+        xlabel("Budget (fraction of nodes)")
+        ylabel("Distribution mean")
+        ax.set_zlabel("Performance")
+        ax.invert_xaxis()
+        plt.savefig(dist + ".pdf")
+        plt.show()
+
+
+def plot_time():
+    plt.figure()
+    rcParams["font.size"] = 6
+    a1 = np.loadtxt("time_aps_100.txt")
+    a2 = np.loadtxt("time_aps_500.txt")
+    lp1 = np.loadtxt("time_lp_100.txt")
+    lp2 = np.loadtxt("time_lp_500.txt")
+    subplot(2, 2, 1)
+    plot(a1[:, 0], a1[:, 1], "-", label="Comb.")
+    plot(lp1[:, 0], lp1[:, 1], "-", label="LP")
+    xlabel("n")
+    ylabel("time (s)")
+    xlim(0, 100000)
+    legend(loc="upper left")
+    ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
+    subplot(2, 2, 2)
+    plot(a1[:, 0], a1[:, 2], "-", label="Comb.")
+    plot(lp1[:, 0], lp1[:, 2], "-", label="LP")
+    ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
+    xlabel("n")
+    ylabel("\# cycles")
+    xlim(0, 100000)
+    legend(loc="upper left")
+    subplot(2, 2, 3)
+    plot(a2[:, 0], a2[:, 1], "-", label="Comb.")
+    plot(lp2[:, 0], lp2[:, 1], "-", label="LP")
+    ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
+    xlabel("n")
+    ylabel("time (s)")
+    xlim(0, 100000)
+    legend(loc="upper left")
+    subplot(2, 2, 4)
+    plot(a2[:, 0], a2[:, 2], "-", label="Comb.")
+    plot(lp2[:, 0], lp2[:, 2], "-", label="LP")
+    ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
+    xlabel("n")
+    ylabel("\# cycles")
+    xlim(0, 100000)
+    legend(loc="upper left")
+    tight_layout(h_pad=-0.5)
+    savefig("time.pdf")
+
+
+if __name__ == "__main__":
+    SYNTH_DATASETS = ["b-a", "kk", "sw", "coachella"]
+    DATASETS = SYNTH_DATASETS
+    plot_all_performances()
+    #plot_3d()
+    #plot_hbo_likes()
+    #compare_performance()
+    #plot_perf_prob()
+    #compare_dist()
+    #plot_time()
+    #plot_degree_distributions()
+    #for style in plt.style.available:
+    #    plt.style.use(style)
+    #    compare_performance("performance_" + style + ".pdf")
+    #compare_performance2("comp4_" + ".pdf")
author	Thibaut Horel <thibaut.horel@gmail.com>	2014-10-24 12:16:51 -0400
committer	Thibaut Horel <thibaut.horel@gmail.com>	2014-10-24 12:16:51 -0400
commit	ece1d828d53d6123fcecb5ea8bf9b126d1728ccc (patch)
tree	b669382d0e5f1234556d1aeb7fa919891510b24d /facebook_analysis/seed.py
parent	7426d8ff0e7969eb1a86bdb5bec8a0c971309e2b (diff)
download	fast-seeding-ece1d828d53d6123fcecb5ea8bf9b126d1728ccc.tar.gz