4 files changed, 154 insertions, 38 deletions
diff --git a/facebook_analysis/Makefile b/facebook_analysis/Makefile
index e3df848..0b011a2 100644
--- a/facebook_analysis/Makefile
+++ b/facebook_analysis/Makefile
@@ -1,3 +1,7 @@
 all:
 	python2 setup.py build_ext --inplace
 	cython2 -a ads.pyx
+
+clean:
+	python2 setup.py clean
+	rm ads.c ads.html ads.so
diff --git a/facebook_analysis/ads.pyx b/facebook_analysis/ads.pyx
index 2682456..40d6391 100644
--- a/facebook_analysis/ads.pyx
+++ b/facebook_analysis/ads.pyx
@@ -91,14 +91,16 @@ cdef float merge_opt_p_sample(list l1, list l2, int t, dict degrees, float p):
     i = j = 0
     n = s = 0.
     cdef int k
+    cdef int l
     cdef dict a
     cdef float r
     k = 0
     r = 0
+    l = 0
     a = {}
-    for k in xrange(len(degrees)**2):
-        for d in degrees:
-            a[d] = random.random()
+    for k in xrange(t**2):
+        for l in xrange(t):
+            random.random()
         while n < t:
             if i == n1 and j == n2:
                 break
diff --git a/facebook_analysis/analyze.py b/facebook_analysis/analyze.py
index c5e6feb..9b7f893 100644
--- a/facebook_analysis/analyze.py
+++ b/facebook_analysis/analyze.py
@@ -10,10 +10,14 @@ import pulp
 import sys
 from random import seed, betavariate, normalvariate
 import matplotlib.pyplot as plt
+from scipy.sparse import coo_matrix
+from sklearn.preprocessing import normalize as nm
 
 DATA_DIR = "../facebook_data"
-DATASETS = ["hbo", "nyt",  "lp", "google", "lmpt", "gp", "kiva", "coachella",
-            "peet", "gap"]
+#DATASETS = ["hbo", "nyt",  "lp", "google", "lmpt", "gp", "kiva", "coachella",
+#            "peet", "gap"]
+DATASETS = ["hbo", "nyt",  "lp", "google", "gp", "kiva", "coachella",
+            "gap"]
 SYNTH_DIR = "../apgl"
 SYNTH_DATASETS = ["b-a", "kk", "sw"]
 
@@ -74,6 +78,48 @@ def build_graph(dataset):
         return build_graph1(dataset)
 
 
+def build_graph3(dataset):
+    d = {}
+    e = {}
+    with open(dataset + ".txt") as f:
+        for line in f:
+            u, v = map(int, line.strip().split())
+            d[u, v] = 1
+            d[v, u] = 1
+            d[u, u] = 1
+            if u in e:
+                e[u].append(v)
+            else:
+                e[u] = [v]
+    i, j = zip(*d.keys())
+    v = d.values()
+    m = coo_matrix((v, (i, j)), dtype="float")
+    m = nm(m, norm='l1', axis=1, copy=False)
+    return m, e
+
+
+def voter(mat, node, t):
+    n = mat.shape[0]
+    v = np.zeros(n)
+    u = np.ones(n)
+    v[node] = 1
+    for i in xrange(t):
+        v = mat.dot(v)
+    return v.dot(u)
+
+
+def influence_exp(dataset, size):
+    mat, graph = build_graph3(dataset)
+    sp = sample(graph.keys(), size)
+    graph = {s: graph[s] for s in sp}
+    sd = list(sd_users(graph))
+    sd += graph.keys()
+    for t in xrange(100):
+        degrees = {s: voter(mat, s, t) for s in sd}
+        #aps(graph, degrees, size)
+        print im(graph, degrees, size)
+
+
 def print_graph(dataset):
     graph, degrees = build_graph(dataset)
     with open(dataset + "_single_graph.txt", "w") as f:
@@ -257,7 +303,7 @@ def lp_time():
 
 
 def aps_time():
-    graph, degrees = build_graph("big")
+    graph, degrees = build_graph("hbo")
     sp = sample(graph.keys(), int(sys.argv[2]))
     graph = {s: graph[s] for s in sp}
     a = int(sys.argv[1])
@@ -326,7 +372,7 @@ def stats():
 if __name__ == "__main__":
     #for dataset in SYNTH_DATASETS:
     #    compute_performance(dataset)
-    compute_performance_p("coachella", "deg")
+    #compute_performance_p("coachella", "power")
     #compute_performance("coachella")
     #hbo_likes()
     #lp_perf()
@@ -338,3 +384,4 @@ if __name__ == "__main__":
     # with open("coachella_degrees.txt", "w") as fh:
     #     for deg in degrees.itervalues():
     #         fh.write(str(deg) + "\n")
+    influence_exp("slashdot", 100)
diff --git a/facebook_analysis/seed.py b/facebook_analysis/seed.py
index 7e2b851..cba45e1 100644
--- a/facebook_analysis/seed.py
+++ b/facebook_analysis/seed.py
@@ -1,23 +1,80 @@
 from analyze import sd_users, build_graph, DATASETS, SYNTH_DATASETS
 import matplotlib.pyplot as plt
 from matplotlib import rcParams, cm
-from matplotlib.colors import Normalize
 from matplotlib.pyplot import plot, legend, savefig, xlabel, ylabel,\
     hist, title, subplot, tight_layout, ticklabel_format, xlim, ylim
 from mpl_toolkits.mplot3d import Axes3D
 import numpy as np
-import itertools
 
 mq = lambda x: x * 4
 
 
+def voter():
+    with open("epinions_voter.txt") as f:
+        ep = [float(line) for line in f]
+    with open("epinions_voter_influence.txt") as f:
+        epr = [float(line) for line in f]
+    with open("slashdot_voter.txt") as f:
+        sl = [float(line) for line in f]
+    with open("slashdot_voter_influence.txt") as f:
+        slr = [float(line) for line in f]
+    a = range(1, 51)
+    plt.figure(figsize=(7, 3))
+    plt.subplot(1, 2, 1)
+    plt.plot(a, ep, label="Adapt. Seeding")
+    plt.plot(a, epr, label="Inf. Max.")
+    plt.legend()
+    plt.title("Epinions")
+    plt.xlabel("t")
+    plt.ylabel("Performance")
+    plt.subplot(1, 2, 2)
+    plt.plot(a, sl, label="Adapt. Seeding")
+    plt.plot(a, slr, label="Inf. Max")
+    plt.legend()
+    plt.title("Slashdot")
+    plt.xlabel("t")
+    plt.ylabel("Performance")
+    plt.savefig("voter.pdf")
+
+
+def sampling():
+    with open("hbo_sampling.txt") as f:
+        values = [line.strip().replace(",", "").split() for line in f]
+        ks, ts, cs = zip(*values)
+        ks = map(int, ks)
+        ts = map(float, ts)
+        cs = map(float, cs)
+    with open("hbo_sans_sampling.txt") as f:
+        values = [line.strip().replace(",", "").split() for line in f]
+        k, t, c = zip(*values)
+        k = map(int, k)
+        t = map(float, t)
+        c = map(float, c)
+    plt.figure(figsize=(7, 3))
+    plt.subplot(1, 2, 1)
+    plt.gca().set_yscale("log")
+    plt.plot(ks, ts, label="Sampling based")
+    plt.plot(ks, t, label="Comb. alg.")
+    plt.xlabel("Size")
+    plt.ylabel("Time (s)")
+    plt.legend(loc="upper left")
+    plt.subplot(1, 2, 2)
+    plt.gca().set_yscale("log")
+    plt.plot(ks, cs, label="Sampling based")
+    plt.plot(ks, c, label="Comb. alg.")
+    plt.legend(loc="upper left")
+    plt.xlabel("Size")
+    plt.ylabel("\# Cycles")
+    plt.savefig("sampling2.pdf")
+
+
 def plot_degree_distributions():
     plt.figure(figsize=(7, 3))
     graph, degrees = build_graph("kiva")
     fd_degrees = list(degrees[user] for user in graph)
     sd_degrees = list(degrees[user] for user in sd_users(graph))
     n, bins, patches = plt.hist(fd_degrees, bins=50, cumulative=True,
-                                label="Initial users", normed=True,
+                                label="Core set", normed=True,
                                 alpha=0.5, histtype="stepfilled")
     n, bins, patches = plt.hist(sd_degrees, bins=50, cumulative=True,
                                 histtype="stepfilled", normed=True, alpha=0.5,
@@ -30,25 +87,27 @@ def plot_degree_distributions():
 
 
 def plot_all_performances():
-    plt.figure(figsize=(7, 14))
+    plt.figure(figsize=(6, 6))
     for i, dataset in enumerate(DATASETS):
         values = [map(float, line.strip().split("\t"))
                   for line in open(dataset + "_performance.txt")]
         a, im, rd, rdf, aps = zip(*values)
         a, im, rd, rdf, aps = [map(mq, l) for l in (a, im, rd, rdf, aps)]
         a = np.arange(0, 1.001, 0.1)
-        ax = plt.subplot(5, 2, i + 1)
+        ax = plt.subplot(2, 2, i + 1)
         #ax.set_yscale("log")
-        plt.plot(a, im, label="Max deg.")
-        plt.plot(a, rd, label="Rand.")
-        plt.plot(a, rdf, label="Rand. friend")
+        plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
+        plt.plot(a, im, label="Inf. Max")
+        plt.plot(a, rd, label="Rand. Node")
+        plt.plot(a, rdf, label="Rand. Friend")
         plt.plot(a, aps, label="Adapt. Seeding")
-        plt.xlabel("Budget (fraction of the total number of users)")
+        plt.xlabel("Budget")
         plt.ylabel("Performance")
+        titl = dataset
         if dataset == "sw":
-            titl = "SmallWord"
-        if dataset == "coachella":
-            titl = "Conf. Model"
+            titl = "SmallWorld"
+        #if dataset == "coachella":
+        #    titl = "Conf. Model"
         if dataset == "kk":
             titl = "Kronecker"
         if dataset == "b-a":
@@ -58,7 +117,7 @@ def plot_all_performances():
     plt.legend(loc="upper center", ncol=4, bbox_to_anchor=(0, 0, 1, 1.03),
                bbox_transform=plt.gcf().transFigure)
     plt.tight_layout()
-    plt.savefig("test2.pdf")
+    plt.savefig("perf10.pdf")
 
 
 def compare_performance(fn):
@@ -81,7 +140,7 @@ def compare_performance(fn):
 
 def compare_performance2(fn):
     plots = {}
-    plt.figure()
+    plt.figure(figsize=(5, 3))
     for dataset in DATASETS:
         values = [map(float, line.strip().split("\t"))
                   for line in open(dataset + "_performance.txt")]
@@ -97,7 +156,7 @@ def compare_performance2(fn):
     width = 0.35
     plt.bar(ind, means, width, linewidth=0.1)
     plt.errorbar([i + width / 2. for i in ind], means, [mini, maxi], elinewidth=1.2, fmt="none")
-    plt.xticks([i + width / 2. for i in ind], a[1:])
+    plt.xticks([i + width / 2. for i in ind], [100, 150, 200, 250, 300, 350, 400, 450, 500, 550])
     plt.xlim(-width, len(ind) - 1 + 2 * width)
     plt.xlabel("Budget")
     plt.ylabel("Relative improvement")
@@ -116,7 +175,7 @@ def compare_dist():
         sd.append(np.mean(sd_degrees))
     ind = range(len(DATASETS))
     width = 0.35
-    plt.bar(ind, fd, width, label="Initial users", color=next(cm))
+    plt.bar(ind, fd, width, label="Core users", color=next(cm))
     plt.bar([i + width for i in ind], sd, width, label="Friends",
             color=next(cm))
     plt.xlim(-width, len(ind) - 1 + 3 * width)
@@ -128,6 +187,7 @@ def compare_dist():
 
 def plot_perf_prob():
     plt.figure()
+    rcParams["font.size"] = 10
     with open("peet_performance_p.txt") as f:
         values = [map(float, line.strip().split("\t")) for line in f]
         values = zip(*values)
@@ -138,36 +198,38 @@ def plot_perf_prob():
     with open("peet_performance.txt") as f:
         values = [map(float, line.strip().split("\t")) for line in f]
         values = zip(*values)
-        plt.gca().set_yscale("log")
+        #plt.gca().set_yscale("log")
+        plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
         plt.xlabel("Budget")
         plt.ylabel("Performance")
-        plt.plot(values[0], values[1], label="Max. degree")
-        plt.legend(loc="lower right", fontsize="small", ncol=2)
+        plt.plot(values[0], values[1], label="Inf. Max.")
+        plt.legend(loc="upper left", fontsize="small", ncol=2)
         xlim(xmax=450)
         plt.savefig("prob.pdf")
 
 
 def plot_hbo_likes():
     plt.figure()
-    rcParams["font.size"] = 6
+    rcParams["font.size"] = 10
     with open("hbo_likes_performance.txt") as f:
         values = [map(float, line.strip().split("\t")) for line in f]
     a, im, aps, apso = zip(*values)
     a = np.arange(0, 1.001, 0.1)
-    plt.gca().set_yscale("log")
-    #plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
-    plt.plot(a, map(mq, im), label="Max. degr.")
-    plt.plot(a, map(mq, aps), label="Adapt. seed. (rest.)")
+    #plt.gca().set_yscale("log")
+    plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
+    plt.plot(a, map(mq, im), label="Inf. Max.")
+    plt.plot(a, map(mq, aps), label="Adapt. seed. (subgraph)")
     plt.plot(a, map(mq, apso), label="Adapt. seed.")
     plt.xlabel("Budget")
     plt.ylabel("Performance")
     xlim(xmax=1.1)
-    plt.legend(loc="lower right")
+    plt.legend(loc="upper left")
     plt.savefig("hbo_likes.pdf")
 
 
 def plot_3d():
-    for dist in ["beta", "gauss"]:
+    rcParams["font.size"] = 7
+    for dist in ["beta", "gauss", "power", "deg"]:
         fig = plt.figure()
         with open("coachella_performance_p_" + dist + ".txt") as f:
             values = [map(float, line.strip().split("\t")) for line in f]
@@ -180,7 +242,7 @@ def plot_3d():
         ax.plot_wireframe(x, y, perfs, linewidth=0.1)
         ticklabel_format(style='sci', axis='z', scilimits=(0, 0))
         xlabel("Budget (fraction of nodes)")
-        ylabel("Distribution mean")
+        ylabel("Mean")
         ax.set_zlabel("Performance")
         ax.invert_xaxis()
         plt.savefig(dist + ".pdf")
@@ -232,8 +294,8 @@ def plot_time():
 
 if __name__ == "__main__":
     SYNTH_DATASETS = ["b-a", "kk", "sw", "coachella"]
-    DATASETS = SYNTH_DATASETS
-    plot_all_performances()
+    DATASETS = ["lp", "gp", "google", "coachella"]
+    #plot_all_performances()
     #plot_3d()
     #plot_hbo_likes()
     #compare_performance()
@@ -243,5 +305,6 @@ if __name__ == "__main__":
     #plot_degree_distributions()
     #for style in plt.style.available:
     #    plt.style.use(style)
-    #    compare_performance("performance_" + style + ".pdf")
-    #compare_performance2("comp4_" + ".pdf")
+    #    compare_performance2("comp4_" + style +  ".pdf")
+    sampling()
+    #voter()