summaryrefslogtreecommitdiffstats
path: root/facebook_analysis
diff options
context:
space:
mode:
Diffstat (limited to 'facebook_analysis')
-rw-r--r--facebook_analysis/Makefile4
-rw-r--r--facebook_analysis/ads.pyx8
-rw-r--r--facebook_analysis/analyze.py55
-rw-r--r--facebook_analysis/seed.py125
4 files changed, 154 insertions, 38 deletions
diff --git a/facebook_analysis/Makefile b/facebook_analysis/Makefile
index e3df848..0b011a2 100644
--- a/facebook_analysis/Makefile
+++ b/facebook_analysis/Makefile
@@ -1,3 +1,7 @@
all:
python2 setup.py build_ext --inplace
cython2 -a ads.pyx
+
+clean:
+ python2 setup.py clean
+ rm ads.c ads.html ads.so
diff --git a/facebook_analysis/ads.pyx b/facebook_analysis/ads.pyx
index 2682456..40d6391 100644
--- a/facebook_analysis/ads.pyx
+++ b/facebook_analysis/ads.pyx
@@ -91,14 +91,16 @@ cdef float merge_opt_p_sample(list l1, list l2, int t, dict degrees, float p):
i = j = 0
n = s = 0.
cdef int k
+ cdef int l
cdef dict a
cdef float r
k = 0
r = 0
+ l = 0
a = {}
- for k in xrange(len(degrees)**2):
- for d in degrees:
- a[d] = random.random()
+ for k in xrange(t**2):
+ for l in xrange(t):
+ random.random()
while n < t:
if i == n1 and j == n2:
break
diff --git a/facebook_analysis/analyze.py b/facebook_analysis/analyze.py
index c5e6feb..9b7f893 100644
--- a/facebook_analysis/analyze.py
+++ b/facebook_analysis/analyze.py
@@ -10,10 +10,14 @@ import pulp
import sys
from random import seed, betavariate, normalvariate
import matplotlib.pyplot as plt
+from scipy.sparse import coo_matrix
+from sklearn.preprocessing import normalize as nm
DATA_DIR = "../facebook_data"
-DATASETS = ["hbo", "nyt", "lp", "google", "lmpt", "gp", "kiva", "coachella",
- "peet", "gap"]
+#DATASETS = ["hbo", "nyt", "lp", "google", "lmpt", "gp", "kiva", "coachella",
+# "peet", "gap"]
+DATASETS = ["hbo", "nyt", "lp", "google", "gp", "kiva", "coachella",
+ "gap"]
SYNTH_DIR = "../apgl"
SYNTH_DATASETS = ["b-a", "kk", "sw"]
@@ -74,6 +78,48 @@ def build_graph(dataset):
return build_graph1(dataset)
+def build_graph3(dataset):
+ d = {}
+ e = {}
+ with open(dataset + ".txt") as f:
+ for line in f:
+ u, v = map(int, line.strip().split())
+ d[u, v] = 1
+ d[v, u] = 1
+ d[u, u] = 1
+ if u in e:
+ e[u].append(v)
+ else:
+ e[u] = [v]
+ i, j = zip(*d.keys())
+ v = d.values()
+ m = coo_matrix((v, (i, j)), dtype="float")
+ m = nm(m, norm='l1', axis=1, copy=False)
+ return m, e
+
+
+def voter(mat, node, t):
+ n = mat.shape[0]
+ v = np.zeros(n)
+ u = np.ones(n)
+ v[node] = 1
+ for i in xrange(t):
+ v = mat.dot(v)
+ return v.dot(u)
+
+
+def influence_exp(dataset, size):
+ mat, graph = build_graph3(dataset)
+ sp = sample(graph.keys(), size)
+ graph = {s: graph[s] for s in sp}
+ sd = list(sd_users(graph))
+ sd += graph.keys()
+ for t in xrange(100):
+ degrees = {s: voter(mat, s, t) for s in sd}
+ #aps(graph, degrees, size)
+ print im(graph, degrees, size)
+
+
def print_graph(dataset):
graph, degrees = build_graph(dataset)
with open(dataset + "_single_graph.txt", "w") as f:
@@ -257,7 +303,7 @@ def lp_time():
def aps_time():
- graph, degrees = build_graph("big")
+ graph, degrees = build_graph("hbo")
sp = sample(graph.keys(), int(sys.argv[2]))
graph = {s: graph[s] for s in sp}
a = int(sys.argv[1])
@@ -326,7 +372,7 @@ def stats():
if __name__ == "__main__":
#for dataset in SYNTH_DATASETS:
# compute_performance(dataset)
- compute_performance_p("coachella", "deg")
+ #compute_performance_p("coachella", "power")
#compute_performance("coachella")
#hbo_likes()
#lp_perf()
@@ -338,3 +384,4 @@ if __name__ == "__main__":
# with open("coachella_degrees.txt", "w") as fh:
# for deg in degrees.itervalues():
# fh.write(str(deg) + "\n")
+ influence_exp("slashdot", 100)
diff --git a/facebook_analysis/seed.py b/facebook_analysis/seed.py
index 7e2b851..cba45e1 100644
--- a/facebook_analysis/seed.py
+++ b/facebook_analysis/seed.py
@@ -1,23 +1,80 @@
from analyze import sd_users, build_graph, DATASETS, SYNTH_DATASETS
import matplotlib.pyplot as plt
from matplotlib import rcParams, cm
-from matplotlib.colors import Normalize
from matplotlib.pyplot import plot, legend, savefig, xlabel, ylabel,\
hist, title, subplot, tight_layout, ticklabel_format, xlim, ylim
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
-import itertools
mq = lambda x: x * 4
+def voter():
+ with open("epinions_voter.txt") as f:
+ ep = [float(line) for line in f]
+ with open("epinions_voter_influence.txt") as f:
+ epr = [float(line) for line in f]
+ with open("slashdot_voter.txt") as f:
+ sl = [float(line) for line in f]
+ with open("slashdot_voter_influence.txt") as f:
+ slr = [float(line) for line in f]
+ a = range(1, 51)
+ plt.figure(figsize=(7, 3))
+ plt.subplot(1, 2, 1)
+ plt.plot(a, ep, label="Adapt. Seeding")
+ plt.plot(a, epr, label="Inf. Max.")
+ plt.legend()
+ plt.title("Epinions")
+ plt.xlabel("t")
+ plt.ylabel("Performance")
+ plt.subplot(1, 2, 2)
+ plt.plot(a, sl, label="Adapt. Seeding")
+ plt.plot(a, slr, label="Inf. Max")
+ plt.legend()
+ plt.title("Slashdot")
+ plt.xlabel("t")
+ plt.ylabel("Performance")
+ plt.savefig("voter.pdf")
+
+
+def sampling():
+ with open("hbo_sampling.txt") as f:
+ values = [line.strip().replace(",", "").split() for line in f]
+ ks, ts, cs = zip(*values)
+ ks = map(int, ks)
+ ts = map(float, ts)
+ cs = map(float, cs)
+ with open("hbo_sans_sampling.txt") as f:
+ values = [line.strip().replace(",", "").split() for line in f]
+ k, t, c = zip(*values)
+ k = map(int, k)
+ t = map(float, t)
+ c = map(float, c)
+ plt.figure(figsize=(7, 3))
+ plt.subplot(1, 2, 1)
+ plt.gca().set_yscale("log")
+ plt.plot(ks, ts, label="Sampling based")
+ plt.plot(ks, t, label="Comb. alg.")
+ plt.xlabel("Size")
+ plt.ylabel("Time (s)")
+ plt.legend(loc="upper left")
+ plt.subplot(1, 2, 2)
+ plt.gca().set_yscale("log")
+ plt.plot(ks, cs, label="Sampling based")
+ plt.plot(ks, c, label="Comb. alg.")
+ plt.legend(loc="upper left")
+ plt.xlabel("Size")
+ plt.ylabel("\# Cycles")
+ plt.savefig("sampling2.pdf")
+
+
def plot_degree_distributions():
plt.figure(figsize=(7, 3))
graph, degrees = build_graph("kiva")
fd_degrees = list(degrees[user] for user in graph)
sd_degrees = list(degrees[user] for user in sd_users(graph))
n, bins, patches = plt.hist(fd_degrees, bins=50, cumulative=True,
- label="Initial users", normed=True,
+ label="Core set", normed=True,
alpha=0.5, histtype="stepfilled")
n, bins, patches = plt.hist(sd_degrees, bins=50, cumulative=True,
histtype="stepfilled", normed=True, alpha=0.5,
@@ -30,25 +87,27 @@ def plot_degree_distributions():
def plot_all_performances():
- plt.figure(figsize=(7, 14))
+ plt.figure(figsize=(6, 6))
for i, dataset in enumerate(DATASETS):
values = [map(float, line.strip().split("\t"))
for line in open(dataset + "_performance.txt")]
a, im, rd, rdf, aps = zip(*values)
a, im, rd, rdf, aps = [map(mq, l) for l in (a, im, rd, rdf, aps)]
a = np.arange(0, 1.001, 0.1)
- ax = plt.subplot(5, 2, i + 1)
+ ax = plt.subplot(2, 2, i + 1)
#ax.set_yscale("log")
- plt.plot(a, im, label="Max deg.")
- plt.plot(a, rd, label="Rand.")
- plt.plot(a, rdf, label="Rand. friend")
+ plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
+ plt.plot(a, im, label="Inf. Max")
+ plt.plot(a, rd, label="Rand. Node")
+ plt.plot(a, rdf, label="Rand. Friend")
plt.plot(a, aps, label="Adapt. Seeding")
- plt.xlabel("Budget (fraction of the total number of users)")
+ plt.xlabel("Budget")
plt.ylabel("Performance")
+ titl = dataset
if dataset == "sw":
- titl = "SmallWord"
- if dataset == "coachella":
- titl = "Conf. Model"
+ titl = "SmallWorld"
+ #if dataset == "coachella":
+ # titl = "Conf. Model"
if dataset == "kk":
titl = "Kronecker"
if dataset == "b-a":
@@ -58,7 +117,7 @@ def plot_all_performances():
plt.legend(loc="upper center", ncol=4, bbox_to_anchor=(0, 0, 1, 1.03),
bbox_transform=plt.gcf().transFigure)
plt.tight_layout()
- plt.savefig("test2.pdf")
+ plt.savefig("perf10.pdf")
def compare_performance(fn):
@@ -81,7 +140,7 @@ def compare_performance(fn):
def compare_performance2(fn):
plots = {}
- plt.figure()
+ plt.figure(figsize=(5, 3))
for dataset in DATASETS:
values = [map(float, line.strip().split("\t"))
for line in open(dataset + "_performance.txt")]
@@ -97,7 +156,7 @@ def compare_performance2(fn):
width = 0.35
plt.bar(ind, means, width, linewidth=0.1)
plt.errorbar([i + width / 2. for i in ind], means, [mini, maxi], elinewidth=1.2, fmt="none")
- plt.xticks([i + width / 2. for i in ind], a[1:])
+ plt.xticks([i + width / 2. for i in ind], [100, 150, 200, 250, 300, 350, 400, 450, 500, 550])
plt.xlim(-width, len(ind) - 1 + 2 * width)
plt.xlabel("Budget")
plt.ylabel("Relative improvement")
@@ -116,7 +175,7 @@ def compare_dist():
sd.append(np.mean(sd_degrees))
ind = range(len(DATASETS))
width = 0.35
- plt.bar(ind, fd, width, label="Initial users", color=next(cm))
+ plt.bar(ind, fd, width, label="Core users", color=next(cm))
plt.bar([i + width for i in ind], sd, width, label="Friends",
color=next(cm))
plt.xlim(-width, len(ind) - 1 + 3 * width)
@@ -128,6 +187,7 @@ def compare_dist():
def plot_perf_prob():
plt.figure()
+ rcParams["font.size"] = 10
with open("peet_performance_p.txt") as f:
values = [map(float, line.strip().split("\t")) for line in f]
values = zip(*values)
@@ -138,36 +198,38 @@ def plot_perf_prob():
with open("peet_performance.txt") as f:
values = [map(float, line.strip().split("\t")) for line in f]
values = zip(*values)
- plt.gca().set_yscale("log")
+ #plt.gca().set_yscale("log")
+ plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
plt.xlabel("Budget")
plt.ylabel("Performance")
- plt.plot(values[0], values[1], label="Max. degree")
- plt.legend(loc="lower right", fontsize="small", ncol=2)
+ plt.plot(values[0], values[1], label="Inf. Max.")
+ plt.legend(loc="upper left", fontsize="small", ncol=2)
xlim(xmax=450)
plt.savefig("prob.pdf")
def plot_hbo_likes():
plt.figure()
- rcParams["font.size"] = 6
+ rcParams["font.size"] = 10
with open("hbo_likes_performance.txt") as f:
values = [map(float, line.strip().split("\t")) for line in f]
a, im, aps, apso = zip(*values)
a = np.arange(0, 1.001, 0.1)
- plt.gca().set_yscale("log")
- #plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
- plt.plot(a, map(mq, im), label="Max. degr.")
- plt.plot(a, map(mq, aps), label="Adapt. seed. (rest.)")
+ #plt.gca().set_yscale("log")
+ plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
+ plt.plot(a, map(mq, im), label="Inf. Max.")
+ plt.plot(a, map(mq, aps), label="Adapt. seed. (subgraph)")
plt.plot(a, map(mq, apso), label="Adapt. seed.")
plt.xlabel("Budget")
plt.ylabel("Performance")
xlim(xmax=1.1)
- plt.legend(loc="lower right")
+ plt.legend(loc="upper left")
plt.savefig("hbo_likes.pdf")
def plot_3d():
- for dist in ["beta", "gauss"]:
+ rcParams["font.size"] = 7
+ for dist in ["beta", "gauss", "power", "deg"]:
fig = plt.figure()
with open("coachella_performance_p_" + dist + ".txt") as f:
values = [map(float, line.strip().split("\t")) for line in f]
@@ -180,7 +242,7 @@ def plot_3d():
ax.plot_wireframe(x, y, perfs, linewidth=0.1)
ticklabel_format(style='sci', axis='z', scilimits=(0, 0))
xlabel("Budget (fraction of nodes)")
- ylabel("Distribution mean")
+ ylabel("Mean")
ax.set_zlabel("Performance")
ax.invert_xaxis()
plt.savefig(dist + ".pdf")
@@ -232,8 +294,8 @@ def plot_time():
if __name__ == "__main__":
SYNTH_DATASETS = ["b-a", "kk", "sw", "coachella"]
- DATASETS = SYNTH_DATASETS
- plot_all_performances()
+ DATASETS = ["lp", "gp", "google", "coachella"]
+ #plot_all_performances()
#plot_3d()
#plot_hbo_likes()
#compare_performance()
@@ -243,5 +305,6 @@ if __name__ == "__main__":
#plot_degree_distributions()
#for style in plt.style.available:
# plt.style.use(style)
- # compare_performance("performance_" + style + ".pdf")
- #compare_performance2("comp4_" + ".pdf")
+ # compare_performance2("comp4_" + style + ".pdf")
+ sampling()
+ #voter()