From 36eb1fee5492e57368846cbf4e107f1e4cb31589 Mon Sep 17 00:00:00 2001
From: Thibaut Horel <thibaut.horel@gmail.com>
Date: Sat, 22 Nov 2014 21:08:41 -0500
Subject: WWW version

---
 facebook_analysis/seed.py | 125 ++++++++++++++++++++++++++++++++++------------
 1 file changed, 94 insertions(+), 31 deletions(-)

(limited to 'facebook_analysis/seed.py')

diff --git a/facebook_analysis/seed.py b/facebook_analysis/seed.py
index 7e2b851..cba45e1 100644
--- a/facebook_analysis/seed.py
+++ b/facebook_analysis/seed.py
@@ -1,23 +1,80 @@
 from analyze import sd_users, build_graph, DATASETS, SYNTH_DATASETS
 import matplotlib.pyplot as plt
 from matplotlib import rcParams, cm
-from matplotlib.colors import Normalize
 from matplotlib.pyplot import plot, legend, savefig, xlabel, ylabel,\
     hist, title, subplot, tight_layout, ticklabel_format, xlim, ylim
 from mpl_toolkits.mplot3d import Axes3D
 import numpy as np
-import itertools
 
 mq = lambda x: x * 4
 
 
+def voter():
+    with open("epinions_voter.txt") as f:
+        ep = [float(line) for line in f]
+    with open("epinions_voter_influence.txt") as f:
+        epr = [float(line) for line in f]
+    with open("slashdot_voter.txt") as f:
+        sl = [float(line) for line in f]
+    with open("slashdot_voter_influence.txt") as f:
+        slr = [float(line) for line in f]
+    a = range(1, 51)
+    plt.figure(figsize=(7, 3))
+    plt.subplot(1, 2, 1)
+    plt.plot(a, ep, label="Adapt. Seeding")
+    plt.plot(a, epr, label="Inf. Max.")
+    plt.legend()
+    plt.title("Epinions")
+    plt.xlabel("t")
+    plt.ylabel("Performance")
+    plt.subplot(1, 2, 2)
+    plt.plot(a, sl, label="Adapt. Seeding")
+    plt.plot(a, slr, label="Inf. Max")
+    plt.legend()
+    plt.title("Slashdot")
+    plt.xlabel("t")
+    plt.ylabel("Performance")
+    plt.savefig("voter.pdf")
+
+
+def sampling():
+    with open("hbo_sampling.txt") as f:
+        values = [line.strip().replace(",", "").split() for line in f]
+        ks, ts, cs = zip(*values)
+        ks = map(int, ks)
+        ts = map(float, ts)
+        cs = map(float, cs)
+    with open("hbo_sans_sampling.txt") as f:
+        values = [line.strip().replace(",", "").split() for line in f]
+        k, t, c = zip(*values)
+        k = map(int, k)
+        t = map(float, t)
+        c = map(float, c)
+    plt.figure(figsize=(7, 3))
+    plt.subplot(1, 2, 1)
+    plt.gca().set_yscale("log")
+    plt.plot(ks, ts, label="Sampling based")
+    plt.plot(ks, t, label="Comb. alg.")
+    plt.xlabel("Size")
+    plt.ylabel("Time (s)")
+    plt.legend(loc="upper left")
+    plt.subplot(1, 2, 2)
+    plt.gca().set_yscale("log")
+    plt.plot(ks, cs, label="Sampling based")
+    plt.plot(ks, c, label="Comb. alg.")
+    plt.legend(loc="upper left")
+    plt.xlabel("Size")
+    plt.ylabel("\# Cycles")
+    plt.savefig("sampling2.pdf")
+
+
 def plot_degree_distributions():
     plt.figure(figsize=(7, 3))
     graph, degrees = build_graph("kiva")
     fd_degrees = list(degrees[user] for user in graph)
     sd_degrees = list(degrees[user] for user in sd_users(graph))
     n, bins, patches = plt.hist(fd_degrees, bins=50, cumulative=True,
-                                label="Initial users", normed=True,
+                                label="Core set", normed=True,
                                 alpha=0.5, histtype="stepfilled")
     n, bins, patches = plt.hist(sd_degrees, bins=50, cumulative=True,
                                 histtype="stepfilled", normed=True, alpha=0.5,
@@ -30,25 +87,27 @@ def plot_degree_distributions():
 
 
 def plot_all_performances():
-    plt.figure(figsize=(7, 14))
+    plt.figure(figsize=(6, 6))
     for i, dataset in enumerate(DATASETS):
         values = [map(float, line.strip().split("\t"))
                   for line in open(dataset + "_performance.txt")]
         a, im, rd, rdf, aps = zip(*values)
         a, im, rd, rdf, aps = [map(mq, l) for l in (a, im, rd, rdf, aps)]
         a = np.arange(0, 1.001, 0.1)
-        ax = plt.subplot(5, 2, i + 1)
+        ax = plt.subplot(2, 2, i + 1)
         #ax.set_yscale("log")
-        plt.plot(a, im, label="Max deg.")
-        plt.plot(a, rd, label="Rand.")
-        plt.plot(a, rdf, label="Rand. friend")
+        plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
+        plt.plot(a, im, label="Inf. Max")
+        plt.plot(a, rd, label="Rand. Node")
+        plt.plot(a, rdf, label="Rand. Friend")
         plt.plot(a, aps, label="Adapt. Seeding")
-        plt.xlabel("Budget (fraction of the total number of users)")
+        plt.xlabel("Budget")
         plt.ylabel("Performance")
+        titl = dataset
         if dataset == "sw":
-            titl = "SmallWord"
-        if dataset == "coachella":
-            titl = "Conf. Model"
+            titl = "SmallWorld"
+        #if dataset == "coachella":
+        #    titl = "Conf. Model"
         if dataset == "kk":
             titl = "Kronecker"
         if dataset == "b-a":
@@ -58,7 +117,7 @@ def plot_all_performances():
     plt.legend(loc="upper center", ncol=4, bbox_to_anchor=(0, 0, 1, 1.03),
                bbox_transform=plt.gcf().transFigure)
     plt.tight_layout()
-    plt.savefig("test2.pdf")
+    plt.savefig("perf10.pdf")
 
 
 def compare_performance(fn):
@@ -81,7 +140,7 @@ def compare_performance(fn):
 
 def compare_performance2(fn):
     plots = {}
-    plt.figure()
+    plt.figure(figsize=(5, 3))
     for dataset in DATASETS:
         values = [map(float, line.strip().split("\t"))
                   for line in open(dataset + "_performance.txt")]
@@ -97,7 +156,7 @@ def compare_performance2(fn):
     width = 0.35
     plt.bar(ind, means, width, linewidth=0.1)
     plt.errorbar([i + width / 2. for i in ind], means, [mini, maxi], elinewidth=1.2, fmt="none")
-    plt.xticks([i + width / 2. for i in ind], a[1:])
+    plt.xticks([i + width / 2. for i in ind], [100, 150, 200, 250, 300, 350, 400, 450, 500, 550])
     plt.xlim(-width, len(ind) - 1 + 2 * width)
     plt.xlabel("Budget")
     plt.ylabel("Relative improvement")
@@ -116,7 +175,7 @@ def compare_dist():
         sd.append(np.mean(sd_degrees))
     ind = range(len(DATASETS))
     width = 0.35
-    plt.bar(ind, fd, width, label="Initial users", color=next(cm))
+    plt.bar(ind, fd, width, label="Core users", color=next(cm))
     plt.bar([i + width for i in ind], sd, width, label="Friends",
             color=next(cm))
     plt.xlim(-width, len(ind) - 1 + 3 * width)
@@ -128,6 +187,7 @@ def compare_dist():
 
 def plot_perf_prob():
     plt.figure()
+    rcParams["font.size"] = 10
     with open("peet_performance_p.txt") as f:
         values = [map(float, line.strip().split("\t")) for line in f]
         values = zip(*values)
@@ -138,36 +198,38 @@ def plot_perf_prob():
     with open("peet_performance.txt") as f:
         values = [map(float, line.strip().split("\t")) for line in f]
         values = zip(*values)
-        plt.gca().set_yscale("log")
+        #plt.gca().set_yscale("log")
+        plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
         plt.xlabel("Budget")
         plt.ylabel("Performance")
-        plt.plot(values[0], values[1], label="Max. degree")
-        plt.legend(loc="lower right", fontsize="small", ncol=2)
+        plt.plot(values[0], values[1], label="Inf. Max.")
+        plt.legend(loc="upper left", fontsize="small", ncol=2)
         xlim(xmax=450)
         plt.savefig("prob.pdf")
 
 
 def plot_hbo_likes():
     plt.figure()
-    rcParams["font.size"] = 6
+    rcParams["font.size"] = 10
     with open("hbo_likes_performance.txt") as f:
         values = [map(float, line.strip().split("\t")) for line in f]
     a, im, aps, apso = zip(*values)
     a = np.arange(0, 1.001, 0.1)
-    plt.gca().set_yscale("log")
-    #plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
-    plt.plot(a, map(mq, im), label="Max. degr.")
-    plt.plot(a, map(mq, aps), label="Adapt. seed. (rest.)")
+    #plt.gca().set_yscale("log")
+    plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
+    plt.plot(a, map(mq, im), label="Inf. Max.")
+    plt.plot(a, map(mq, aps), label="Adapt. seed. (subgraph)")
     plt.plot(a, map(mq, apso), label="Adapt. seed.")
     plt.xlabel("Budget")
     plt.ylabel("Performance")
     xlim(xmax=1.1)
-    plt.legend(loc="lower right")
+    plt.legend(loc="upper left")
     plt.savefig("hbo_likes.pdf")
 
 
 def plot_3d():
-    for dist in ["beta", "gauss"]:
+    rcParams["font.size"] = 7
+    for dist in ["beta", "gauss", "power", "deg"]:
         fig = plt.figure()
         with open("coachella_performance_p_" + dist + ".txt") as f:
             values = [map(float, line.strip().split("\t")) for line in f]
@@ -180,7 +242,7 @@ def plot_3d():
         ax.plot_wireframe(x, y, perfs, linewidth=0.1)
         ticklabel_format(style='sci', axis='z', scilimits=(0, 0))
         xlabel("Budget (fraction of nodes)")
-        ylabel("Distribution mean")
+        ylabel("Mean")
         ax.set_zlabel("Performance")
         ax.invert_xaxis()
         plt.savefig(dist + ".pdf")
@@ -232,8 +294,8 @@ def plot_time():
 
 if __name__ == "__main__":
     SYNTH_DATASETS = ["b-a", "kk", "sw", "coachella"]
-    DATASETS = SYNTH_DATASETS
-    plot_all_performances()
+    DATASETS = ["lp", "gp", "google", "coachella"]
+    #plot_all_performances()
     #plot_3d()
     #plot_hbo_likes()
     #compare_performance()
@@ -243,5 +305,6 @@ if __name__ == "__main__":
     #plot_degree_distributions()
     #for style in plt.style.available:
     #    plt.style.use(style)
-    #    compare_performance("performance_" + style + ".pdf")
-    #compare_performance2("comp4_" + ".pdf")
+    #    compare_performance2("comp4_" + style +  ".pdf")
+    sampling()
+    #voter()
-- 
cgit v1.2.3-70-g09d2