summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2014-11-22 21:08:41 -0500
committerThibaut Horel <thibaut.horel@gmail.com>2014-11-22 21:08:41 -0500
commit36eb1fee5492e57368846cbf4e107f1e4cb31589 (patch)
tree6380028284779e10d01fb9ff51f3c561ae9ce57c
parent4f7d4804234f5515a4dded8b05d9568653b7ae3c (diff)
downloadfast-seeding-36eb1fee5492e57368846cbf4e107f1e4cb31589.tar.gz
WWW version
-rw-r--r--.gitignore4
-rw-r--r--facebook_analysis/Makefile4
-rw-r--r--facebook_analysis/ads.pyx8
-rw-r--r--facebook_analysis/analyze.py55
-rw-r--r--facebook_analysis/seed.py125
-rw-r--r--paper/images/adaptive_seeding.pdfbin0 -> 15465 bytes
-rw-r--r--paper/images/beta.pdfbin0 -> 150198 bytes
-rw-r--r--paper/images/comp2.pdfbin0 -> 108450 bytes
-rw-r--r--paper/images/deg.pdfbin0 -> 150213 bytes
-rw-r--r--paper/images/dist.pdfbin106817 -> 106821 bytes
-rw-r--r--paper/images/gauss.pdfbin0 -> 150195 bytes
-rw-r--r--paper/images/hbo_likes.pdfbin106401 -> 136434 bytes
-rw-r--r--paper/images/para.pdfbin105344 -> 105140 bytes
-rw-r--r--paper/images/perf2.pdfbin0 -> 145513 bytes
-rw-r--r--paper/images/perf3.pdfbin0 -> 148148 bytes
-rw-r--r--paper/images/perf_synth.pdfbin0 -> 140975 bytes
-rw-r--r--paper/images/power.pdfbin0 -> 150214 bytes
-rw-r--r--paper/images/prob.pdfbin194160 -> 223953 bytes
-rw-r--r--paper/images/sampling.pdfbin0 -> 138009 bytes
-rw-r--r--paper/images/synthetic.pdfbin0 -> 110720 bytes
-rw-r--r--paper/images/voter.pdf (renamed from paper/images/perf.pdf)bin121589 -> 108920 bytes
-rw-r--r--paper/main.bib (renamed from paper/paper.bib)151
-rw-r--r--paper/main.tex25
-rw-r--r--paper/sections/abstract.tex7
-rw-r--r--paper/sections/adaptivity.tex221
-rw-r--r--paper/sections/algorithms.log1500
-rw-r--r--paper/sections/algorithms.tex508
-rw-r--r--paper/sections/appendix.tex349
-rw-r--r--paper/sections/combinatorial.tex299
-rw-r--r--paper/sections/experiments.log0
-rw-r--r--paper/sections/experiments.tex475
-rw-r--r--paper/sections/introduction.log0
-rw-r--r--paper/sections/introduction.tex301
-rw-r--r--paper/sections/lp.log0
-rw-r--r--paper/sections/model.tex67
-rw-r--r--paper/sections/performance.tex1
-rw-r--r--paper/sections/related.tex15
37 files changed, 1669 insertions, 2446 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..6af7529
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+facebook_scraping/*.txt
+facebook_scraping/*.csv
+facebook_analysis/*.txt
+facebook_analysis/*.pdf
diff --git a/facebook_analysis/Makefile b/facebook_analysis/Makefile
index e3df848..0b011a2 100644
--- a/facebook_analysis/Makefile
+++ b/facebook_analysis/Makefile
@@ -1,3 +1,7 @@
all:
python2 setup.py build_ext --inplace
cython2 -a ads.pyx
+
+clean:
+ python2 setup.py clean
+ rm ads.c ads.html ads.so
diff --git a/facebook_analysis/ads.pyx b/facebook_analysis/ads.pyx
index 2682456..40d6391 100644
--- a/facebook_analysis/ads.pyx
+++ b/facebook_analysis/ads.pyx
@@ -91,14 +91,16 @@ cdef float merge_opt_p_sample(list l1, list l2, int t, dict degrees, float p):
i = j = 0
n = s = 0.
cdef int k
+ cdef int l
cdef dict a
cdef float r
k = 0
r = 0
+ l = 0
a = {}
- for k in xrange(len(degrees)**2):
- for d in degrees:
- a[d] = random.random()
+ for k in xrange(t**2):
+ for l in xrange(t):
+ random.random()
while n < t:
if i == n1 and j == n2:
break
diff --git a/facebook_analysis/analyze.py b/facebook_analysis/analyze.py
index c5e6feb..9b7f893 100644
--- a/facebook_analysis/analyze.py
+++ b/facebook_analysis/analyze.py
@@ -10,10 +10,14 @@ import pulp
import sys
from random import seed, betavariate, normalvariate
import matplotlib.pyplot as plt
+from scipy.sparse import coo_matrix
+from sklearn.preprocessing import normalize as nm
DATA_DIR = "../facebook_data"
-DATASETS = ["hbo", "nyt", "lp", "google", "lmpt", "gp", "kiva", "coachella",
- "peet", "gap"]
+#DATASETS = ["hbo", "nyt", "lp", "google", "lmpt", "gp", "kiva", "coachella",
+# "peet", "gap"]
+DATASETS = ["hbo", "nyt", "lp", "google", "gp", "kiva", "coachella",
+ "gap"]
SYNTH_DIR = "../apgl"
SYNTH_DATASETS = ["b-a", "kk", "sw"]
@@ -74,6 +78,48 @@ def build_graph(dataset):
return build_graph1(dataset)
+def build_graph3(dataset):
+ d = {}
+ e = {}
+ with open(dataset + ".txt") as f:
+ for line in f:
+ u, v = map(int, line.strip().split())
+ d[u, v] = 1
+ d[v, u] = 1
+ d[u, u] = 1
+ if u in e:
+ e[u].append(v)
+ else:
+ e[u] = [v]
+ i, j = zip(*d.keys())
+ v = d.values()
+ m = coo_matrix((v, (i, j)), dtype="float")
+ m = nm(m, norm='l1', axis=1, copy=False)
+ return m, e
+
+
+def voter(mat, node, t):
+ n = mat.shape[0]
+ v = np.zeros(n)
+ u = np.ones(n)
+ v[node] = 1
+ for i in xrange(t):
+ v = mat.dot(v)
+ return v.dot(u)
+
+
+def influence_exp(dataset, size):
+ mat, graph = build_graph3(dataset)
+ sp = sample(graph.keys(), size)
+ graph = {s: graph[s] for s in sp}
+ sd = list(sd_users(graph))
+ sd += graph.keys()
+ for t in xrange(100):
+ degrees = {s: voter(mat, s, t) for s in sd}
+ #aps(graph, degrees, size)
+ print im(graph, degrees, size)
+
+
def print_graph(dataset):
graph, degrees = build_graph(dataset)
with open(dataset + "_single_graph.txt", "w") as f:
@@ -257,7 +303,7 @@ def lp_time():
def aps_time():
- graph, degrees = build_graph("big")
+ graph, degrees = build_graph("hbo")
sp = sample(graph.keys(), int(sys.argv[2]))
graph = {s: graph[s] for s in sp}
a = int(sys.argv[1])
@@ -326,7 +372,7 @@ def stats():
if __name__ == "__main__":
#for dataset in SYNTH_DATASETS:
# compute_performance(dataset)
- compute_performance_p("coachella", "deg")
+ #compute_performance_p("coachella", "power")
#compute_performance("coachella")
#hbo_likes()
#lp_perf()
@@ -338,3 +384,4 @@ if __name__ == "__main__":
# with open("coachella_degrees.txt", "w") as fh:
# for deg in degrees.itervalues():
# fh.write(str(deg) + "\n")
+ influence_exp("slashdot", 100)
diff --git a/facebook_analysis/seed.py b/facebook_analysis/seed.py
index 7e2b851..cba45e1 100644
--- a/facebook_analysis/seed.py
+++ b/facebook_analysis/seed.py
@@ -1,23 +1,80 @@
from analyze import sd_users, build_graph, DATASETS, SYNTH_DATASETS
import matplotlib.pyplot as plt
from matplotlib import rcParams, cm
-from matplotlib.colors import Normalize
from matplotlib.pyplot import plot, legend, savefig, xlabel, ylabel,\
hist, title, subplot, tight_layout, ticklabel_format, xlim, ylim
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
-import itertools
mq = lambda x: x * 4
+def voter():
+ with open("epinions_voter.txt") as f:
+ ep = [float(line) for line in f]
+ with open("epinions_voter_influence.txt") as f:
+ epr = [float(line) for line in f]
+ with open("slashdot_voter.txt") as f:
+ sl = [float(line) for line in f]
+ with open("slashdot_voter_influence.txt") as f:
+ slr = [float(line) for line in f]
+ a = range(1, 51)
+ plt.figure(figsize=(7, 3))
+ plt.subplot(1, 2, 1)
+ plt.plot(a, ep, label="Adapt. Seeding")
+ plt.plot(a, epr, label="Inf. Max.")
+ plt.legend()
+ plt.title("Epinions")
+ plt.xlabel("t")
+ plt.ylabel("Performance")
+ plt.subplot(1, 2, 2)
+ plt.plot(a, sl, label="Adapt. Seeding")
+ plt.plot(a, slr, label="Inf. Max")
+ plt.legend()
+ plt.title("Slashdot")
+ plt.xlabel("t")
+ plt.ylabel("Performance")
+ plt.savefig("voter.pdf")
+
+
+def sampling():
+ with open("hbo_sampling.txt") as f:
+ values = [line.strip().replace(",", "").split() for line in f]
+ ks, ts, cs = zip(*values)
+ ks = map(int, ks)
+ ts = map(float, ts)
+ cs = map(float, cs)
+ with open("hbo_sans_sampling.txt") as f:
+ values = [line.strip().replace(",", "").split() for line in f]
+ k, t, c = zip(*values)
+ k = map(int, k)
+ t = map(float, t)
+ c = map(float, c)
+ plt.figure(figsize=(7, 3))
+ plt.subplot(1, 2, 1)
+ plt.gca().set_yscale("log")
+ plt.plot(ks, ts, label="Sampling based")
+ plt.plot(ks, t, label="Comb. alg.")
+ plt.xlabel("Size")
+ plt.ylabel("Time (s)")
+ plt.legend(loc="upper left")
+ plt.subplot(1, 2, 2)
+ plt.gca().set_yscale("log")
+ plt.plot(ks, cs, label="Sampling based")
+ plt.plot(ks, c, label="Comb. alg.")
+ plt.legend(loc="upper left")
+ plt.xlabel("Size")
+ plt.ylabel("\# Cycles")
+ plt.savefig("sampling2.pdf")
+
+
def plot_degree_distributions():
plt.figure(figsize=(7, 3))
graph, degrees = build_graph("kiva")
fd_degrees = list(degrees[user] for user in graph)
sd_degrees = list(degrees[user] for user in sd_users(graph))
n, bins, patches = plt.hist(fd_degrees, bins=50, cumulative=True,
- label="Initial users", normed=True,
+ label="Core set", normed=True,
alpha=0.5, histtype="stepfilled")
n, bins, patches = plt.hist(sd_degrees, bins=50, cumulative=True,
histtype="stepfilled", normed=True, alpha=0.5,
@@ -30,25 +87,27 @@ def plot_degree_distributions():
def plot_all_performances():
- plt.figure(figsize=(7, 14))
+ plt.figure(figsize=(6, 6))
for i, dataset in enumerate(DATASETS):
values = [map(float, line.strip().split("\t"))
for line in open(dataset + "_performance.txt")]
a, im, rd, rdf, aps = zip(*values)
a, im, rd, rdf, aps = [map(mq, l) for l in (a, im, rd, rdf, aps)]
a = np.arange(0, 1.001, 0.1)
- ax = plt.subplot(5, 2, i + 1)
+ ax = plt.subplot(2, 2, i + 1)
#ax.set_yscale("log")
- plt.plot(a, im, label="Max deg.")
- plt.plot(a, rd, label="Rand.")
- plt.plot(a, rdf, label="Rand. friend")
+ plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
+ plt.plot(a, im, label="Inf. Max")
+ plt.plot(a, rd, label="Rand. Node")
+ plt.plot(a, rdf, label="Rand. Friend")
plt.plot(a, aps, label="Adapt. Seeding")
- plt.xlabel("Budget (fraction of the total number of users)")
+ plt.xlabel("Budget")
plt.ylabel("Performance")
+ titl = dataset
if dataset == "sw":
- titl = "SmallWord"
- if dataset == "coachella":
- titl = "Conf. Model"
+ titl = "SmallWorld"
+ #if dataset == "coachella":
+ # titl = "Conf. Model"
if dataset == "kk":
titl = "Kronecker"
if dataset == "b-a":
@@ -58,7 +117,7 @@ def plot_all_performances():
plt.legend(loc="upper center", ncol=4, bbox_to_anchor=(0, 0, 1, 1.03),
bbox_transform=plt.gcf().transFigure)
plt.tight_layout()
- plt.savefig("test2.pdf")
+ plt.savefig("perf10.pdf")
def compare_performance(fn):
@@ -81,7 +140,7 @@ def compare_performance(fn):
def compare_performance2(fn):
plots = {}
- plt.figure()
+ plt.figure(figsize=(5, 3))
for dataset in DATASETS:
values = [map(float, line.strip().split("\t"))
for line in open(dataset + "_performance.txt")]
@@ -97,7 +156,7 @@ def compare_performance2(fn):
width = 0.35
plt.bar(ind, means, width, linewidth=0.1)
plt.errorbar([i + width / 2. for i in ind], means, [mini, maxi], elinewidth=1.2, fmt="none")
- plt.xticks([i + width / 2. for i in ind], a[1:])
+ plt.xticks([i + width / 2. for i in ind], [100, 150, 200, 250, 300, 350, 400, 450, 500, 550])
plt.xlim(-width, len(ind) - 1 + 2 * width)
plt.xlabel("Budget")
plt.ylabel("Relative improvement")
@@ -116,7 +175,7 @@ def compare_dist():
sd.append(np.mean(sd_degrees))
ind = range(len(DATASETS))
width = 0.35
- plt.bar(ind, fd, width, label="Initial users", color=next(cm))
+ plt.bar(ind, fd, width, label="Core users", color=next(cm))
plt.bar([i + width for i in ind], sd, width, label="Friends",
color=next(cm))
plt.xlim(-width, len(ind) - 1 + 3 * width)
@@ -128,6 +187,7 @@ def compare_dist():
def plot_perf_prob():
plt.figure()
+ rcParams["font.size"] = 10
with open("peet_performance_p.txt") as f:
values = [map(float, line.strip().split("\t")) for line in f]
values = zip(*values)
@@ -138,36 +198,38 @@ def plot_perf_prob():
with open("peet_performance.txt") as f:
values = [map(float, line.strip().split("\t")) for line in f]
values = zip(*values)
- plt.gca().set_yscale("log")
+ #plt.gca().set_yscale("log")
+ plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
plt.xlabel("Budget")
plt.ylabel("Performance")
- plt.plot(values[0], values[1], label="Max. degree")
- plt.legend(loc="lower right", fontsize="small", ncol=2)
+ plt.plot(values[0], values[1], label="Inf. Max.")
+ plt.legend(loc="upper left", fontsize="small", ncol=2)
xlim(xmax=450)
plt.savefig("prob.pdf")
def plot_hbo_likes():
plt.figure()
- rcParams["font.size"] = 6
+ rcParams["font.size"] = 10
with open("hbo_likes_performance.txt") as f:
values = [map(float, line.strip().split("\t")) for line in f]
a, im, aps, apso = zip(*values)
a = np.arange(0, 1.001, 0.1)
- plt.gca().set_yscale("log")
- #plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
- plt.plot(a, map(mq, im), label="Max. degr.")
- plt.plot(a, map(mq, aps), label="Adapt. seed. (rest.)")
+ #plt.gca().set_yscale("log")
+ plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
+ plt.plot(a, map(mq, im), label="Inf. Max.")
+ plt.plot(a, map(mq, aps), label="Adapt. seed. (subgraph)")
plt.plot(a, map(mq, apso), label="Adapt. seed.")
plt.xlabel("Budget")
plt.ylabel("Performance")
xlim(xmax=1.1)
- plt.legend(loc="lower right")
+ plt.legend(loc="upper left")
plt.savefig("hbo_likes.pdf")
def plot_3d():
- for dist in ["beta", "gauss"]:
+ rcParams["font.size"] = 7
+ for dist in ["beta", "gauss", "power", "deg"]:
fig = plt.figure()
with open("coachella_performance_p_" + dist + ".txt") as f:
values = [map(float, line.strip().split("\t")) for line in f]
@@ -180,7 +242,7 @@ def plot_3d():
ax.plot_wireframe(x, y, perfs, linewidth=0.1)
ticklabel_format(style='sci', axis='z', scilimits=(0, 0))
xlabel("Budget (fraction of nodes)")
- ylabel("Distribution mean")
+ ylabel("Mean")
ax.set_zlabel("Performance")
ax.invert_xaxis()
plt.savefig(dist + ".pdf")
@@ -232,8 +294,8 @@ def plot_time():
if __name__ == "__main__":
SYNTH_DATASETS = ["b-a", "kk", "sw", "coachella"]
- DATASETS = SYNTH_DATASETS
- plot_all_performances()
+ DATASETS = ["lp", "gp", "google", "coachella"]
+ #plot_all_performances()
#plot_3d()
#plot_hbo_likes()
#compare_performance()
@@ -243,5 +305,6 @@ if __name__ == "__main__":
#plot_degree_distributions()
#for style in plt.style.available:
# plt.style.use(style)
- # compare_performance("performance_" + style + ".pdf")
- #compare_performance2("comp4_" + ".pdf")
+ # compare_performance2("comp4_" + style + ".pdf")
+ sampling()
+ #voter()
diff --git a/paper/images/adaptive_seeding.pdf b/paper/images/adaptive_seeding.pdf
new file mode 100644
index 0000000..0b20ac0
--- /dev/null
+++ b/paper/images/adaptive_seeding.pdf
Binary files differ
diff --git a/paper/images/beta.pdf b/paper/images/beta.pdf
new file mode 100644
index 0000000..3264317
--- /dev/null
+++ b/paper/images/beta.pdf
Binary files differ
diff --git a/paper/images/comp2.pdf b/paper/images/comp2.pdf
new file mode 100644
index 0000000..6505ace
--- /dev/null
+++ b/paper/images/comp2.pdf
Binary files differ
diff --git a/paper/images/deg.pdf b/paper/images/deg.pdf
new file mode 100644
index 0000000..4cb2298
--- /dev/null
+++ b/paper/images/deg.pdf
Binary files differ
diff --git a/paper/images/dist.pdf b/paper/images/dist.pdf
index 18d6467..0ac9c59 100644
--- a/paper/images/dist.pdf
+++ b/paper/images/dist.pdf
Binary files differ
diff --git a/paper/images/gauss.pdf b/paper/images/gauss.pdf
new file mode 100644
index 0000000..7f4c1f6
--- /dev/null
+++ b/paper/images/gauss.pdf
Binary files differ
diff --git a/paper/images/hbo_likes.pdf b/paper/images/hbo_likes.pdf
index c5b7ab4..fb3eee7 100644
--- a/paper/images/hbo_likes.pdf
+++ b/paper/images/hbo_likes.pdf
Binary files differ
diff --git a/paper/images/para.pdf b/paper/images/para.pdf
index 95b0f1a..eae1c16 100644
--- a/paper/images/para.pdf
+++ b/paper/images/para.pdf
Binary files differ
diff --git a/paper/images/perf2.pdf b/paper/images/perf2.pdf
new file mode 100644
index 0000000..fe08879
--- /dev/null
+++ b/paper/images/perf2.pdf
Binary files differ
diff --git a/paper/images/perf3.pdf b/paper/images/perf3.pdf
new file mode 100644
index 0000000..4df052d
--- /dev/null
+++ b/paper/images/perf3.pdf
Binary files differ
diff --git a/paper/images/perf_synth.pdf b/paper/images/perf_synth.pdf
new file mode 100644
index 0000000..fdf6bdf
--- /dev/null
+++ b/paper/images/perf_synth.pdf
Binary files differ
diff --git a/paper/images/power.pdf b/paper/images/power.pdf
new file mode 100644
index 0000000..4ecb11e
--- /dev/null
+++ b/paper/images/power.pdf
Binary files differ
diff --git a/paper/images/prob.pdf b/paper/images/prob.pdf
index 8c38488..a4125fe 100644
--- a/paper/images/prob.pdf
+++ b/paper/images/prob.pdf
Binary files differ
diff --git a/paper/images/sampling.pdf b/paper/images/sampling.pdf
new file mode 100644
index 0000000..517cf2e
--- /dev/null
+++ b/paper/images/sampling.pdf
Binary files differ
diff --git a/paper/images/synthetic.pdf b/paper/images/synthetic.pdf
new file mode 100644
index 0000000..cd7893c
--- /dev/null
+++ b/paper/images/synthetic.pdf
Binary files differ
diff --git a/paper/images/perf.pdf b/paper/images/voter.pdf
index 35c7769..68a3ec7 100644
--- a/paper/images/perf.pdf
+++ b/paper/images/voter.pdf
Binary files differ
diff --git a/paper/paper.bib b/paper/main.bib
index d3ff37e..56c0ed0 100644
--- a/paper/paper.bib
+++ b/paper/main.bib
@@ -6,7 +6,6 @@
year = {2007},
pages = {281-286},
ee = {http://dx.doi.org/10.1007/978-3-540-77105-0_27},
- crossref = {DBLP:conf/wine/2007},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@@ -69,8 +68,6 @@
}
@proceedings{DBLP:conf/stoc/2011,
- editor = {Lance Fortnow and
- Salil P. Vadhan},
title = {Proceedings of the 43rd ACM Symposium on Theory of Computing,
STOC 2011, San Jose, CA, USA, 6-8 June 2011},
booktitle = {STOC},
@@ -295,8 +292,8 @@
@inproceedings{dean2004approximating,
title={Approximating the stochastic knapsack problem: The benefit of adaptivity},
- author={Dean, Brian C and Goemans, Michel X and Vondrdk, J},
- booktitle={Foundations of Computer Science, 2004. Proceedings. 45th Annual IEEE Symposium on},
+ author={Dean, Brian C and Goemans, Michel X and Vondr{\'a}k, J},
+ booktitle={Foundations of Computer Science, 2004},
pages={208--217},
year={2004},
organization={IEEE}
@@ -733,16 +730,6 @@
}
-@inproceedings{MR07,
- author = {Elchanan Mossel and
- S{\'e}bastien Roch},
- title = {On the submodularity of influence in social networks},
- booktitle = {STOC},
- year = {2007},
- pages = {128-134},
-}
-
-
@inproceedings{B07,
author = {Liad Blumrosen},
title = {Implementing the Maximum of Monotone Algorithms},
@@ -751,8 +738,6 @@
pages = {30-35},
}
-
-
@inproceedings{JM07,
author = {Kamal Jain and Mohammad Mahdian},
title = {Cost Sharing},
@@ -1152,17 +1137,11 @@ Characterization of Implementable Choice Rules", BOOKTITLE =
author = { Silvio Lattanzi and Yaron Singer
},
title = {The power of random neighbors in social networks},
- journal = {Working paper},
+ journal = {WSDM 2015},
}
-@article{borgs2012influence,
- title={Influence Maximization in Social Networks: Towards an Optimal Algorithmic Solution},
- author={Borgs, Christian and Brautbar, Michael and Chayes, Jennifer and Lucier, Brendan},
- journal={arXiv preprint arXiv:1212.0884},
- year={2012}
-}
-@inproceedings{borgs2014maximizing,
+@inproceedings{borgs2012influence,
title={Maximizing social influence in nearly optimal time},
author={Borgs, Christian and Brautbar, Michael and Chayes, Jennifer and Lucier, Brendan},
booktitle={SODA},
@@ -1233,7 +1212,6 @@ Characterization of Implementable Choice Rules", BOOKTITLE =
year = {2005},
pages = {1127-1138},
ee = {http://dx.doi.org/10.1007/11523468_91},
- crossref = {DBLP:conf/icalp/2005},
bibsource = {DBLP, http://dblp.uni-trier.de}
}
@@ -1272,3 +1250,124 @@ Characterization of Implementable Choice Rules", BOOKTITLE =
booktitle={NIPS '13: Advances in Neural Information Processing Systems},
year={2013}
}
+
+@inproceedings{kronecker,
+ author = {Jure Leskovec and
+ Deepayan Chakrabarti and
+ Jon M. Kleinberg and
+ Christos Faloutsos},
+ title = {Realistic, Mathematically Tractable Graph Generation and Evolution,
+ Using Kronecker Multiplication},
+ booktitle = {{PKDD} 2005},
+ year = {2005},
+ pages = {133--145},
+ crossref = {DBLP:conf/pkdd/2005},
+ url = {http://dx.doi.org/10.1007/11564126_17},
+ doi = {10.1007/11564126_17},
+ timestamp = {Tue, 28 Oct 2014 16:46:01 +0100},
+ biburl = {http://dblp.uni-trier.de/rec/bib/conf/pkdd/LeskovecCKF05},
+ bibsource = {dblp computer science bibliography, http://dblp.org}
+}
+
+@proceedings{DBLP:conf/pkdd/2005,
+ title = {Knowledge Discovery in Databases: {PKDD} 2005, 9th European Conference
+ on Principles and Practice of Knowledge Discovery in Databases, Porto,
+ Portugal, October 3-7, 2005, Proceedings},
+ series = {Lecture Notes in Computer Science},
+ year = {2005},
+ volume = {3721},
+ publisher = {Springer},
+ isbn = {3-540-29244-6},
+ timestamp = {Tue, 28 Oct 2014 16:46:01 +0100},
+ biburl = {http://dblp.uni-trier.de/rec/bib/conf/pkdd/2005},
+ bibsource = {dblp computer science bibliography, http://dblp.org}
+}
+
+
+@inproceedings{GWG12,
+ author = {Sharad Goel and
+ Duncan J. Watts and
+ Daniel G. Goldstein},
+ title = {The structure of online diffusion networks},
+ booktitle = {{EC} '12, Valencia, Spain,
+ June 4-8, 2012},
+ year = {2012},
+ pages = {623--638},
+ url = {http://doi.acm.org/10.1145/2229012.2229058},
+ doi = {10.1145/2229012.2229058},
+ timestamp = {Thu, 30 Oct 2014 18:50:03 +0100},
+ biburl = {http://dblp.uni-trier.de/rec/bib/conf/sigecom/GoelWG12},
+ bibsource = {dblp computer science bibliography, http://dblp.org}
+}
+
+@MISC{ZHGS10,
+ author = {Tauhid R. Zaman and Ralf Herbrich and Jurgen Van Gael and David Stern},
+ title = {Predicting Information Spreading in Twitter},
+ booktitle = {Workshop on Computational Social Science and the Wisdom of Crowds , NIPS},
+ year = {2010}
+}
+
+@inproceedings{CADKL14,
+ author = {Cheng, Justin and Adamic, Lada and Dow, P. Alex and Kleinberg, Jon Michael and Leskovec, Jure},
+ title = {Can Cascades Be Predicted?},
+ booktitle = {},
+ series = {WWW '14},
+ year = {2014},
+ isbn = {978-1-4503-2744-2},
+ location = {Seoul, Korea},
+ pages = {925--936},
+ numpages = {12},
+ url = {http://doi.acm.org/10.1145/2566486.2567997},
+ doi = {10.1145/2566486.2567997},
+ acmid = {2567997},
+ publisher = {ACM},
+ address = {New York, NY, USA},
+ keywords = {cascade prediction, contagion, information diffusion},
+}
+
+@inproceedings{mr,
+ author = {Ravi Kumar and
+ Benjamin Moseley and
+ Sergei Vassilvitskii and
+ Andrea Vattani},
+ title = {Fast greedy algorithms in mapreduce and streaming},
+ booktitle = {{SPAA} 2013},
+ year = {2013},
+ pages = {1--10},
+ crossref = {DBLP:conf/spaa/2013},
+ url = {http://doi.acm.org/10.1145/2486159.2486168},
+ doi = {10.1145/2486159.2486168},
+ timestamp = {Tue, 04 Nov 2014 04:50:52 +0100},
+ biburl = {http://dblp.uni-trier.de/rec/bib/conf/spaa/KumarMVV13},
+ bibsource = {dblp computer science bibliography, http://dblp.org}
+}
+
+@proceedings{DBLP:conf/spaa/2013,
+ editor = {Guy E. Blelloch and
+ Berthold V{\"{o}}cking},
+ title = {25th {ACM} Symposium on Parallelism in Algorithms and Architectures,
+ {SPAA} '13, Montreal, QC, Canada - July 23 - 25, 2013},
+ year = {2013},
+ publisher = {{ACM}},
+ url = {http://dl.acm.org/citation.cfm?id=2486159},
+ isbn = {978-1-4503-1572-2},
+ timestamp = {Tue, 04 Nov 2014 04:50:52 +0100},
+ biburl = {http://dblp.uni-trier.de/rec/bib/conf/spaa/2013},
+ bibsource = {dblp computer science bibliography, http://dblp.org}
+}
+
+@misc{snapnets,
+ author = {Jure Leskovec and Andrej Krevl},
+ title = {{SNAP Datasets}: {Stanford} Large Network Dataset Collection},
+ howpublished = {\url{http://snap.stanford.edu/data}},
+ month = jun,
+ year = 2014
+}
+
+@misc{full,
+ author = {Thibaut Horel and
+ Yaron Singer},
+ title = {Scalable Methods for Adaptively Seeding a Social Network},
+ year = {2014},
+ howpublished = {\url{http://thibaut.horel.org/sas.pdf}},
+}
diff --git a/paper/main.tex b/paper/main.tex
index e25212a..60751e5 100644
--- a/paper/main.tex
+++ b/paper/main.tex
@@ -3,6 +3,7 @@
\usepackage{amsmath,amsfonts}
%yaron: removed class amsthm
\usepackage[pagebackref=true,breaklinks=true,colorlinks=true]{hyperref}
+\usepackage[english]{babel}
\usepackage{algorithmic,algorithm}
\usepackage{booktabs}
\usepackage{graphicx}
@@ -43,28 +44,30 @@ Yaron Singer\\
\section{Introduction}
\input{sections/introduction}
-\input{sections/related}
\section{Model}
+\label{sec:model}
\input{sections/model}
+\section{Non-adaptive Optimization}
+\label{sec:adaptivity}
+\input{sections/adaptivity}
+
\section{Algorithms}
+\label{sec:algorithms}
\input{sections/algorithms}
-%\section{The Combinatorial Algorithm}
-%\input{sections/combinatorial}
-
\section{Experiments}
+\label{sec:experiments}
\input{sections/experiments}
+\section{Related work}
+\input{sections/related}
-\footnotesize{
\bibliographystyle{abbrv}
-\bibliography{paper}
-%\balancecolumns
-%\appendix
-%\balancecolumns % GM June 2007
-%\input{sections/appendix}
-}
+\bibliography{main}
+\appendix
+\input{sections/appendix}
+\balancecolumns % GM June 2007
\end{document}
diff --git a/paper/sections/abstract.tex b/paper/sections/abstract.tex
index b166d6a..f5f5f64 100644
--- a/paper/sections/abstract.tex
+++ b/paper/sections/abstract.tex
@@ -1,6 +1,3 @@
-In recent years social networking platforms have developed into extraordinary channels for spreading and consuming information.
-Along with the rise of such infrastructure, there is continuous progress on techniques for spreading information effectively through influential users.
-
-In this paper, we describe scalable algorithms for a new method of information diffusion called adaptive seeding. In many applications, one is restricted to select influencers from a set of users who engaged with the topic being promoted, and due to the structure of social networks, these users often rank low in terms of their influence potential. To overcome this hurdle, adaptive seeding aims to select users in a manner which targets their influential neighbors.
+In recent years, social networking platforms have developed into extraordinary channels for spreading and consuming information. Along with the rise of such infrastructure, there is continuous progress on techniques for spreading information effectively through influential users. In many applications, one is restricted to select influencers from a set of users who engaged with the topic being promoted, and due to the structure of social networks, these users often rank low in terms of their influence potential. An alternative approach one can consider is an adaptive method which selects users in a manner which targets their influential neighbors. The advantage of such an approach is that it leverages the friendship paradox in social networks: while users are often not influential, they often know someone who is.
-Despite the various complexities involved with the optimization problem, we show that scalable adaptive seeding is achievable. In particular, we develop algorithms for linear influence models with provable approximation guarantees that can be gracefully parallelized. To show the effectiveness of our methods we collected data from various verticals social network users follow. For each vertical, we collected data on the users who responded to a certain post as well as their neighbors, and applied our methods on this data. Our experiments show that adaptive seeding is scalable, and importantly, that it obtains dramatic improvements over standard approaches of information dissemination.
+Despite the various complexities in such optimization problems, we show that scalable adaptive seeding is achievable. In particular, we develop algorithms for linear influence models with provable approximation guarantees that can be gracefully parallelized. To show the effectiveness of our methods we collected data from various verticals social network users follow. For each vertical, we collected data on the users who responded to a certain post as well as their neighbors, and applied our methods on this data. Our experiments show that adaptive seeding is scalable, and importantly, that it obtains dramatic improvements over standard approaches of information dissemination.
diff --git a/paper/sections/adaptivity.tex b/paper/sections/adaptivity.tex
new file mode 100644
index 0000000..d590408
--- /dev/null
+++ b/paper/sections/adaptivity.tex
@@ -0,0 +1,221 @@
+The challenging aspect of the adaptive seeding problem expressed in
+Equation~\ref{eq:problem} is its adaptivity: a seed set must be selected during
+the first stage such that in expectation a high influence value can be reached
+when adaptively selecting nodes on the second stage. A standard approach in
+stochastic optimization for overcoming this challenge is to use sampling to
+estimate the expectation of the influence value reachable on the second stage.
+However, as will be discussed in Section~\ref{sec:experiments}, this approach
+quickly becomes infeasible even with modest size graphs.
+
+In this section we develop an approach which avoids sampling and allows
+designing adaptive seeding algorithms that can be applied to large graphs. We
+show that for additive influence functions one can optimize a relaxation of the
+problem which we refer to as the \emph{non-adaptive} version of the problem.
+After defining the non-adaptive version, we show in sections~\ref{sec:gap} that
+the optimal solution for the non-adaptive version is an upper bound on the
+optimal solution of the adaptive seeding problem. We then argue in
+Section~\ref{sec:round} that any solution to the non-adaptive version of the
+problem can be converted to an adaptive solution, losing an arbitrarily small
+factor in the approximation ratio. Together, this implies that one can design
+algorithms for the non-adaptive problem instead, as we do in
+Section~\ref{sec:algorithms}.\newline
+
+%
+%, namely \emph{non-adaptive}. As we will discuss in
+%Sections~\ref{sec:gap} and~\ref{sec:round}, these non-adaptive policies can be
+%used as tool to construct adaptive solutions.
+%
+
+\noindent \textbf{Non-adaptive policies.} We say that a policy is \emph{non-adaptive} if it selects a set of nodes $S
+\subseteq X$ to be seeded in the first stage and a vector of probabilities
+$\mathbf{q}\in[0,1]^n$, such that each neighbor $u$ of $S$ which realizes is
+included in the solution independently with probability $q_u$. The constraint
+will now be that the budget is only respected in expectation, \emph{i.e.} $|S|
++ \textbf{p}^T\textbf{q} \leq k$. Formally the optimization problem for
+non-adaptive policies can be written as:
+\begin{equation}\label{eq:relaxed}
+ \begin{split}
+ %&\max_{}
+ \max_{\substack{S\subseteq X\\\textbf{q}\in[0,1]^n} }& \;
+ \sum_{R\subseteq\neigh{X}} \Big (\prod_{u\in R} p_uq_u\prod_{u\in\neigh{X}\setminus
+ R}(1-p_uq_u) \Big )
+ f(R)\\
+ \text{s.t. } & \; |S|+\textbf{p}^T\textbf{q} \leq k,\;
+q_u \leq \mathbf{1}\{u\in\neigh{S}\}
+\end{split}
+\end{equation}
+%$\textbf{p}\odot \textbf{q}$ denotes componentwise multiplication and $\textbf{q}_R$ denotes the positive entries of $\textbf{q}$ on nodes in $R$:
+%
+%\begin{displaymath}
+% \textbf{p}\circ \textbf{q}_R = \prod_{u\in R} p_uq_u\prod_{u\in\neigh{X}\setminus
+% R}(1-p_uq_u)
+%\end{displaymath}
+%
+%\begin{displaymath}
+% \Pr[R \ |\ \textbf{p}\odot \textbf{q} ] = \prod_{u\in R} p_uq_u\prod_{u\in\neigh{X}\setminus
+% R}(1-p_uq_u)
+%\end{displaymath}
+where we denote by $\mathbf{1}\{E\}$ the indicator variable of the event $E$.
+Note that because of the condition $q_u\leq \mathbf{1}\{u\in\neigh{S}\}$,
+the summand associated with $R$ in \eqref{eq:relaxed} vanishes whenever $R$
+contains $u\in\neigh{X}\setminus\neigh{S}$. Hence, the summation is restricted
+to $R\subseteq\neigh{S}$ as in \eqref{eq:problem}.
+
+%The relaxed non-adaptive optimization \eqref{eq:relaxed} problem can be written
+%more concisely using the \emph{multi-linear extension} of $f$:
+%\begin{displaymath}
+% \forall p\in[0,1]^m,\; F(p)
+% \defeq\mathbb{E}_{p_R}\big[f(R)\big]
+% =\sum_{R\subseteq\neigh{X}} p_R f(R)
+%\end{displaymath}
+%This \emph{extension by expectation} is known to present cross-convecity
+%properties which can exploited in maximization problems when $f$ is
+%a submodular function \cite{dughmi, vondrak}. Using this definition, \eqref{eq:relaxed}
+%can be re-written as:
+%\begin{equation}\label{eq:relaxed-multi}
+% \begin{split}
+% &\max_{S\subseteq X} \max_{q\in[0,1]^n}F(p\circ q)\\
+% &\text{s.t. }|S|+\sum_{u\in\neigh{X}}p_uq_u \leq k,\;
+%q_u \leq \mathbf{1}_{\{u\in\neigh{S}\}}
+%\end{split}
+%\end{equation}
+%
+%When $f$ is an ìnfluence function from the triggering model, it has been proved
+%in \cite{singer} that adaptive seeding has a bounded \emph{adaptivity gap}:
+%denoting by $OPT$ the optimal solution of \eqref{eq:problem} and by $OPT_{NA}$
+%the optimal solution of \eqref{eq:relaxed}, we have $OPT\leq\alpha OPT_{NA}$.
+%This inequality justifies using non-adaptive policies to approximate solutions
+%to the adaptive seeding problem.
+
+\subsection{Adaptivity Gap}\label{sec:gap}
+
+We will now justify the use of non-adaptive strategies by showing that the
+optimal solution for this form of non-adaptive strategies yields a higher value
+than adaptive ones. For brevity, given a probability vector $\pi\in[0,1]^m$ we
+write:
+\begin{equation}\label{eq:multi}
+ F(\pi) \defeq
+ \sum_{R\subseteq\neigh{X}}\left(\prod_{u\in
+ R}\pi_u\prod_{u\in\neigh{X}\setminus R}(1-\pi_u)\right)
+ f(R)
+\end{equation}
+as well as $\textbf{p}\otimes \textbf{q}$ to denote the component-wise
+multiplication between vectors $\textbf{p}$ and $\textbf{q}$. Finally, we write
+$\mathcal{F}_{A} \defeq \{S \subseteq X : |S|\leq k\}$, and $\mathcal{F}_{NA}
+\defeq\{(S,\textbf{q}), |S|+\textbf{p}^T\textbf{q} \leq k, q_u \leq
+\mathbf{1}_{\{u\in\neigh{S}\}}\}$ to denote the feasible regions of the
+adaptive and non-adaptive problems, respectively.
+
+%this form of non-adap
+%We already know that the adaptivity gap is bounded for a general class of
+%influence function. For adaptive functions, we get a stronger result:
+%\emph{relaxed-non adaptive policies are stronger than non-adaptive policies}.
+%
+%
+\begin{proposition}\label{prop:gap}
+For additive functions given by \eqref{eq:voter}, the value of the optimal
+adaptive policy is upper bounded by the optimal non-adaptive policy:
+ \begin{displaymath}
+ \begin{aligned}[t]
+ &\max_{S\subseteq X} \sum_{R\subseteq\neigh{S}} p_R
+ \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)\\
+ &\text{s.t. }S \in \mathcal{F}_{A}
+ \end{aligned}
+ \leq
+ \begin{aligned}[t]
+ &\max_{\substack{S\subseteq X\\\textbf{q}\in[0,1]^n}}
+ F(\mathbf{p}\otimes\mathbf{q})\\
+ &\text{s.t. } (S,\textbf{q}) \in \mathcal{F}_{NA}
+ \end{aligned}
+ \end{displaymath}
+\end{proposition}
+
+%\begin{proposition}\label{prop:gap}
+%Let $\mathcal{F}_{A} := \{T \subseteq X : |T|\leq k\}$, $\mathcal{F}_{NA} :=\{(T,\textbf{q}), |S|+\textbf{p}^T\textbf{q} \leq k, q_u \leq \mathbf{1}_{\{u\in\neigh{S}\}}\}$.
+%For additive functions of the form given by \eqref{eq:voter}, non-adaptive
+% policies are stronger than adaptive policies:
+% \begin{displaymath}
+% \begin{aligned}[t]
+% &\max_{S\subseteq X} \sum_{R\subseteq\neigh{S}} p_R
+% \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)\\
+% &\text{s.t. }|S|\leq k
+% \end{aligned}
+% \leq
+% \begin{aligned}[t]
+% &\max_{S\subseteq X} \max_{q\in[0,1]^n}F(p\circ q)\\
+% &\text{s.t. }|S|+\sum_{u\in\neigh{X}}p_uq_u \leq k,\;
+% q_u \leq \mathbf{1}_{\{u\in\neigh{S}\}}
+% \end{aligned}
+% \end{displaymath}
+%\end{proposition}
+
+The proof of this proposition can be found in the full version of the
+paper~\cite{full} and relies on the following fact: the optimal adaptive policy
+can be written as a feasible non-adaptive policy, hence it provides a lower
+bound on the value of the optimal non-adaptive policy.
+
+\subsection{From Non-Adaptive to Adaptive Solutions}\label{sec:round}
+
+From the above proposition we now know that optimal non-adaptive solutions have
+higher values than adaptive solutions. Given a non-adaptive solution
+$(S,\mathbf{q})$, a possible scheme would be to use $S$ as an adaptive
+solution. But since $(S, \mathbf{q})$ is a solution to the non-adaptive
+problem, Proposition~\ref{prop:gap} does not provide any guarantee on how well
+$S$ performs as an adaptive solution.
+
+However, we show that from a non-adaptive solution $(S,\mathbf{q})$, we can
+obtain a lower bound on the adaptive value of $S$, that is, the expected
+influence attainable in expectation over all possible arrivals of neighbors of
+$S$. Starting from $S$, in every realization of neighbors $R$, sample every
+node $u \in R \cap \mathcal{N}(S)$ with probability $q_{u}$, to obtain a random
+set of nodes $I_R \subseteq R \cap S$. $(S, \mathbf{q})$ being a non-adaptive
+solution, it could be that selecting $I_R$ exceeds our budget. Indeed, the only
+guarantee that we have is that $|S| + \mathbb{E}\big[|I_R|\big]\leq k$. As
+a consequence, an adaptive solution starting from $S$ might not be able to
+select $I_R$ on the second stage.
+
+%execute an adaptive policy by selecting a set $S$ in the first stage, and then in the second stage, while there is still budget remaining, select every neighbor $u$ of $S$ that realized with probability $q_{u}$
+
+
+%To do so, one needs to prove that in expectation over all the realizations, the average values obtainable in the second stage, are close to the value of the non-adaptive objective evaluated over $(S,\mathbf{q})$.
+
+%the objective function of the adaptive problem \eqref{eq:relaxed}. However, the set resulting from the sampling might exceed the budget on the second stage, hence preventing us from directly obtaining a feasible adaptive solution.
+%Given a a non-adaptive policy $(S,\mathbf{q})$ one can execute an adaptive policy by selecting a set $S$ in the first stage, and then in the second stage, while there is still budget remaining, select every neighbor $u$ of $S$ that realized with probability $q_{u}$.
+%In order to use the above lemma, one needs to show that in almost all realizations
+%non-adaptive policy
+
+%Fortunately, by using contention resolution schemes~\cite{vondrak} given a feasible solution $\mathbf{q}$ to the non-adaptive version, one can compute a \emph{feasible} random set $I$, such that:
+%\begin{equation}\label{eq:cr}
+%\mathbb{E}\big[f(I)\big]
+%\geq (1-\varepsilon) F(\mathbf{q})
+%\end{equation}
+
+Fortunately, the probability of exceeding the budget is small enough and with
+high probability $I_R$ will be feasible. This is exploited in \cite{vondrak} to
+design a randomized rounding method with approximation guarantees. These
+rounding methods are called \emph{contention resolution schemes}.
+%More precisely, we note that once the set $S$ is fixed, the feasibility constraint of problem~\eqref{eq:relaxed} becomes a single Knapsack constraint, for which \cite{vondrak} constructs a $(1-\varepsilon, 1-\varepsilon)$-balanced contention resolution scheme.
+Theorem~1.3 of this paper gives us a contention resolution scheme which will
+compute from $\mathbf{q}$ and for any realization $R$ a \emph{feasible} set
+$\tilde{I}_R$, such that:
+\begin{equation}
+ \label{eq:cr}
+ \mathbb{E}_R\big[f(\tilde{I}_R)\big] \geq (1-\varepsilon) F(\mathbf{q})
+\end{equation}
+What this means is that starting from a non-adaptive solution $(S,\mathbf{q})$,
+there is a way to construct a random \emph{feasible} subset on the second stage
+such that in expectation, this set attains almost the same influence value as
+the non-adaptive solution. Since the adaptive solution starting from $S$ will
+select optimally from the realizations $R\subseteq\neigh{S}$,
+$\mathbb{E}_R[f(\tilde{I}_R)]$ provides a lower bound on the adaptive value of
+$S$ that we denote by $A(S)$.
+
+More precisely, denoting by $\text{OPT}_A$ the optimal value of the adaptive
+problem~\eqref{eq:problem}, we have the following proposition whose proof can
+be found in the full version of this paper~\cite{full}.
+\begin{proposition}\label{prop:cr}
+ Let $(S,\textbf{q})$ be an $\alpha$-approximate solution to the
+ non-adaptive problem \eqref{eq:relaxed}, then $\mathrm{A}(S) \geq \alpha
+ \mathrm{OPT}_A$.
+\end{proposition}
+
diff --git a/paper/sections/algorithms.log b/paper/sections/algorithms.log
deleted file mode 100644
index 6265672..0000000
--- a/paper/sections/algorithms.log
+++ /dev/null
@@ -1,1500 +0,0 @@
-This is pdfTeX, Version 3.1415926-2.5-1.40.14 (TeX Live 2013) (format=pdflatex 2013.5.30) 19 FEB 2014 20:53
-entering extended mode
- restricted \write18 enabled.
- file:line:error style messages enabled.
- %&-line parsing enabled.
-**algorithms.tex
-(./algorithms.tex
-LaTeX2e <2011/06/27>
-Babel <3.9f> and hyphenation patterns for 78 languages loaded.
-
-./algorithms.tex:4: LaTeX Error: Missing \begin{document}.
-
-See the LaTeX manual or LaTeX Companion for explanation.
-Type H <return> for immediate help.
- ...
-
-l.4 W
- e will say that a policy is non-adaptive if it selects a set of nodes $...
-
-?
-Missing character: There is no W in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no w in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no y in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no y in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no - in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no v in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no s in font nullfont!
-LaTeX Font Info: External font `cmex10' loaded for size
-(Font) <7> on input line 4.
-LaTeX Font Info: External font `cmex10' loaded for size
-(Font) <5> on input line 4.
-Missing character: There is no t in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no b in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no g in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no v in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no b in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no b in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no , in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no . in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no . in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no g in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no b in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no w in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no z in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no u in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no u in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no y in font nullfont!
-Missing character: There is no w in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no b in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no b in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no y in font nullfont!
-Missing character: There is no . in font nullfont!
-Missing character: There is no T in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no w in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no w in font nullfont!
-Missing character: There is no b in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no b in font nullfont!
-Missing character: There is no u in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no g in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no y in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no x in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no , in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no . in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no . in font nullfont!
-Missing character: There is no . in font nullfont!
-Missing character: There is no N in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no u in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no g in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no - in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no v in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no u in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no y in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no q in font nullfont!
-Missing character: There is no u in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no w in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no w in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no w in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no w in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no m in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no - in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no v in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no u in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no v in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no . in font nullfont!
-Missing character: There is no F in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no m in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no y in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no m in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no z in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no b in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no m in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no - in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no v in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no b in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no w in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no : in font nullfont!
-
-Overfull \hbox (20.0pt too wide) in paragraph at lines 4--10
-[]
- []
-
-
-Overfull \hbox (17.26382pt too wide) in paragraph at lines 4--10
-\OML/cmm/m/it/10 S \OMS/cmsy/m/n/10 ^^R
- []
-
-
-Overfull \hbox (9.06943pt too wide) in paragraph at lines 4--10
-\OML/cmm/m/it/10 X$
- []
-
-
-Overfull \hbox (6.06941pt too wide) in paragraph at lines 4--10
-[]$
- []
-
-
-Overfull \hbox (5.72458pt too wide) in paragraph at lines 4--10
-\OML/cmm/m/it/10 u$
- []
-
-
-Overfull \hbox (6.70831pt too wide) in paragraph at lines 4--10
-\OML/cmm/m/it/10 S$
- []
-
-
-Overfull \hbox (9.69218pt too wide) in paragraph at lines 4--10
-\OML/cmm/m/it/10 q[]$
- []
-
-
-Overfull \hbox (20.04169pt too wide) in paragraph at lines 4--10
-\OMS/cmsy/m/n/10 j\OML/cmm/m/it/10 S\OMS/cmsy/m/n/10 j \OT1/cmr/m/n/10 +
- []
-
-
-Overfull \hbox (29.31343pt too wide) in paragraph at lines 4--10
-[][][] \OMS/cmsy/m/n/10 ^^T
- []
-
-
-Overfull \hbox (5.52084pt too wide) in paragraph at lines 4--10
-\OML/cmm/m/it/10 k$
- []
-
-
-./algorithms.tex:11: LaTeX Error: Missing \begin{document}.
-
-See the LaTeX manual or LaTeX Companion for explanation.
-Type H <return> for immediate help.
- ...
-
-l.11 \begin{equation}
- \label{eq:relaxed}
-?
-
-Overfull \hbox (20.0pt too wide) in paragraph at lines 11--11
-[]
- []
-
-
-./algorithms.tex:12: LaTeX Error: Environment split undefined.
-
-See the LaTeX manual or LaTeX Companion for explanation.
-Type H <return> for immediate help.
- ...
-
-l.12 \begin{split}
-
-?
-./algorithms.tex:14: Undefined control sequence.
-l.14 \max_{\substack
- {S\subseteq X\\q\in[0,1]^n} }& \;
-?
-./algorithms.tex:14: Misplaced alignment tab character &.
-l.14 ...x_{\substack{S\subseteq X\\q\in[0,1]^n} }&
- \;
-?
-./algorithms.tex:15: Undefined control sequence.
-l.15 \sum_{R\subseteq\neigh
- {X}}\textbf{p}\odot\textbf{q}_Rf(R)\\
-?
-./algorithms.tex:16: Undefined control sequence.
-<recently read> \text
-
-l.16 \text
- {s.t. } & \; |S|+\textbf{p}^T\textbf{q} \leq k,\;
-?
-./algorithms.tex:16: Misplaced alignment tab character &.
-l.16 \text{s.t. } &
- \; |S|+\textbf{p}^T\textbf{q} \leq k,\;
-?
-./algorithms.tex:17: Undefined control sequence.
-l.17 q_u \leq \mathbf{1}_{\{u\in\neigh
- {S}\}}
-?
-
-./algorithms.tex:18: LaTeX Error: \begin{equation} on input line 11 ended by \e
-nd{split}.
-
-See the LaTeX manual or LaTeX Companion for explanation.
-Type H <return> for immediate help.
- ...
-
-l.18 \end{split}
-
-?
-
-Overfull \hbox (255.8566pt too wide) detected at line 19
-[][] [] [] \OMS/cmsy/m/n/10 ^^L [][]\OML/cmm/m/it/10 f\OT1/cmr/m/n/10 (\OML/cm
-m/m/it/10 R\OT1/cmr/m/n/10 )[] [] \OMS/cmsy/m/n/10 j\OML/cmm/m/it/10 S\OMS/cmsy
-/m/n/10 j \OT1/cmr/m/n/10 + [][][] \OMS/cmsy/m/n/10 ^^T \OML/cmm/m/it/10 k; q[
-] \OMS/cmsy/m/n/10 ^^T \OT1/cmr/bx/n/10 1[]
- []
-
-
-./algorithms.tex:21: LaTeX Error: Missing \begin{document}.
-
-See the LaTeX manual or LaTeX Companion for explanation.
-Type H <return> for immediate help.
- ...
-
-l.21 w
- here $n\defeq|\neigh{X}|$,$\textbf{p}\odot \textbf{q}$ denotes compone...
-
-?
-Missing character: There is no w in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no e in font nullfont!
-./algorithms.tex:21: Undefined control sequence.
-l.21 where $n\defeq
- |\neigh{X}|$,$\textbf{p}\odot \textbf{q}$ denotes compone...
-
-?
-./algorithms.tex:21: Undefined control sequence.
-l.21 where $n\defeq|\neigh
- {X}|$,$\textbf{p}\odot \textbf{q}$ denotes compone...
-
-?
-Missing character: There is no , in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no m in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no w in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no m in font nullfont!
-Missing character: There is no u in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no v in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no : in font nullfont!
-
-Overfull \hbox (20.0pt too wide) in paragraph at lines 21--22
-[]
- []
-
-
-Overfull \hbox (34.794pt too wide) in paragraph at lines 21--22
-\OML/cmm/m/it/10 n\OMS/cmsy/m/n/10 j\OML/cmm/m/it/10 X\OMS/cmsy/m/n/10 j$$[] ^^
-L
- []
-
-
-Overfull \hbox (6.06941pt too wide) in paragraph at lines 21--22
-[]$
- []
-
-
-Overfull \hbox (12.6295pt too wide) in paragraph at lines 21--22
-[][]$
- []
-
-
-Overfull \hbox (6.06941pt too wide) in paragraph at lines 21--22
-[]$
- []
-
-
-Overfull \hbox (7.67015pt too wide) in paragraph at lines 21--22
-\OML/cmm/m/it/10 R$
- []
-
-
-./algorithms.tex:23: LaTeX Error: Missing \begin{document}.
-
-See the LaTeX manual or LaTeX Companion for explanation.
-Type H <return> for immediate help.
- ...
-
-l.23 \begin{displaymath}
-
-?
-
-Overfull \hbox (20.0pt too wide) in paragraph at lines 23--23
-[][]
- []
-
-./algorithms.tex:24: Undefined control sequence.
-l.24 \txtbf
- {p}\circ \textbf{q}_R = \prod_{u\in R} p_uq_u\prod_{u\in\neig...
-
-?
-./algorithms.tex:24: Undefined control sequence.
-l.24 ...R = \prod_{u\in R} p_uq_u\prod_{u\in\neigh
- {X}\setminus
-?
-
-Overfull \hbox (143.33928pt too wide) detected at line 26
-\OML/cmm/m/it/10 p \OMS/cmsy/m/n/10 ^^N [][] \OT1/cmr/m/n/10 = [] \OML/cmm/m/it
-/10 p[]q[] []\OT1/cmr/m/n/10 (1 \OMS/cmsy/m/n/10 ^^@ \OML/cmm/m/it/10 p[]q[]\OT
-1/cmr/m/n/10 )
- []
-
-
-./algorithms.tex:28: LaTeX Error: Missing \begin{document}.
-
-See the LaTeX manual or LaTeX Companion for explanation.
-Type H <return> for immediate help.
- ...
-
-l.28 N
- ote that because of the condition $q_u\leq \mathbf{1}_{\{u\in\neigh{S}...
-
-?
-Missing character: There is no N in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no b in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no u in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-./algorithms.tex:28: Undefined control sequence.
-l.28 ...ondition $q_u\leq \mathbf{1}_{\{u\in\neigh
- {S}\}}$,
-?
-Missing character: There is no , in font nullfont!
-Missing character: There is no w in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no v in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no m in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no f in font nullfont!
-./algorithms.tex:30: Undefined control sequence.
-<recently read> \neigh
-
-l.30 $\neigh
- {X}\setminus\neigh{S}$. Hence, the summation in \eqref{eq:relaxe...
-
-?
-./algorithms.tex:30: Undefined control sequence.
-l.30 $\neigh{X}\setminus\neigh
- {S}$. Hence, the summation in \eqref{eq:relaxe...
-
-?
-Missing character: There is no . in font nullfont!
-Missing character: There is no H in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no , in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no u in font nullfont!
-Missing character: There is no m in font nullfont!
-Missing character: There is no m in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no n in font nullfont!
-./algorithms.tex:30: Undefined control sequence.
-l.30 ...\neigh{S}$. Hence, the summation in \eqref
- {eq:relaxed} is
-?
-Missing character: There is no e in font nullfont!
-Missing character: There is no q in font nullfont!
-Missing character: There is no : in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no x in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no o in font nullfont!
-./algorithms.tex:31: Undefined control sequence.
-l.31 restricted to $R\subseteq\neigh
- {S}$ as in \eqref{eq:problem}.
-?
-Missing character: There is no a in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no n in font nullfont!
-./algorithms.tex:31: Undefined control sequence.
-l.31 ...cted to $R\subseteq\neigh{S}$ as in \eqref
- {eq:problem}.
-?
-Missing character: There is no e in font nullfont!
-Missing character: There is no q in font nullfont!
-Missing character: There is no : in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no b in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no m in font nullfont!
-Missing character: There is no . in font nullfont!
-
-Overfull \hbox (20.0pt too wide) in paragraph at lines 28--32
-[]
- []
-
-
-Overfull \hbox (20.2477pt too wide) in paragraph at lines 28--32
-\OML/cmm/m/it/10 q[] \OMS/cmsy/m/n/10 ^^T
- []
-
-
-Overfull \hbox (29.85446pt too wide) in paragraph at lines 28--32
-\OT1/cmr/bx/n/10 1[]$
- []
-
-
-Overfull \hbox (10.03127pt too wide) in paragraph at lines 28--32
-\OML/cmm/m/it/10 p \OMS/cmsy/m/n/10 ^^N
- []
-
-
-Overfull \hbox (21.57973pt too wide) in paragraph at lines 28--32
-\OML/cmm/m/it/10 q[] \OT1/cmr/m/n/10 =
- []
-
-
-Overfull \hbox (5.00002pt too wide) in paragraph at lines 28--32
-\OT1/cmr/m/n/10 0$
- []
-
-
-Overfull \hbox (7.67015pt too wide) in paragraph at lines 28--32
-\OML/cmm/m/it/10 R$
- []
-
-
-Overfull \hbox (14.06944pt too wide) in paragraph at lines 28--32
-\OML/cmm/m/it/10 X \OMS/cmsy/m/n/10 n
- []
-
-
-Overfull \hbox (6.70831pt too wide) in paragraph at lines 28--32
-\OML/cmm/m/it/10 S$
- []
-
-
-Overfull \hbox (18.22566pt too wide) in paragraph at lines 28--32
-\OML/cmm/m/it/10 R \OMS/cmsy/m/n/10 ^^R
- []
-
-
-Overfull \hbox (6.70831pt too wide) in paragraph at lines 28--32
-\OML/cmm/m/it/10 S$
- []
-
-./algorithms.tex:60: Undefined control sequence.
-l.60 \subsection
- {Adaptivity gap for additive functions}
-?
-
-./algorithms.tex:60: LaTeX Error: Missing \begin{document}.
-
-See the LaTeX manual or LaTeX Companion for explanation.
-Type H <return> for immediate help.
- ...
-
-l.60 \subsection{A
- daptivity gap for additive functions}
-?
-Missing character: There is no A in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no v in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no y in font nullfont!
-Missing character: There is no g in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no v in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no u in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no W in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no w in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no w in font nullfont!
-Missing character: There is no j in font nullfont!
-Missing character: There is no u in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no y in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no u in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no - in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no v in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no g in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no b in font nullfont!
-Missing character: There is no y in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no w in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no g in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no m in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no u in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no m in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no - in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no v in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no g in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no y in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no g in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no v in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no u in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no v in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no . in font nullfont!
-
-Overfull \hbox (20.0pt too wide) in paragraph at lines 60--62
-[]
- []
-
-
-./algorithms.tex:63: LaTeX Error: Missing \begin{document}.
-
-See the LaTeX manual or LaTeX Companion for explanation.
-Type H <return> for immediate help.
- ...
-
-l.63 t
- his form of non-adap
-?
-Missing character: There is no t in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no m in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no - in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no W in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no y in font nullfont!
-Missing character: There is no k in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no w in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no v in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no y in font nullfont!
-Missing character: There is no g in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no b in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no u in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no g in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no u in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no u in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no . in font nullfont!
-Missing character: There is no F in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no v in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no u in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no , in font nullfont!
-Missing character: There is no w in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no g in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no g in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no u in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no : in font nullfont!
-Missing character: There is no . in font nullfont!
-
-Overfull \hbox (20.0pt too wide) in paragraph at lines 63--67
-[]
- []
-
-
-Overfull \hbox (33.38863pt too wide) in paragraph at lines 63--67
-\OT1/cmr/m/it/10 relaxed-
- []
-
-
-Overfull \hbox (16.35547pt too wide) in paragraph at lines 63--67
-\OT1/cmr/m/it/10 non
- []
-
-
-Overfull \hbox (24.02208pt too wide) in paragraph at lines 63--67
-\OT1/cmr/m/it/10 adap-
- []
-
-
-Overfull \hbox (15.58876pt too wide) in paragraph at lines 63--67
-\OT1/cmr/m/it/10 tive
- []
-
-
-Overfull \hbox (18.91098pt too wide) in paragraph at lines 63--67
-\OT1/cmr/m/it/10 poli-
- []
-
-
-Overfull \hbox (16.35544pt too wide) in paragraph at lines 63--67
-\OT1/cmr/m/it/10 cies
- []
-
-
-Overfull \hbox (13.41655pt too wide) in paragraph at lines 63--67
-\OT1/cmr/m/it/10 are
- []
-
-
-Overfull \hbox (35.26639pt too wide) in paragraph at lines 63--67
-\OT1/cmr/m/it/10 stronger
- []
-
-
-Overfull \hbox (19.16655pt too wide) in paragraph at lines 63--67
-\OT1/cmr/m/it/10 than
- []
-
-
-Overfull \hbox (19.93321pt too wide) in paragraph at lines 63--67
-\OT1/cmr/m/it/10 non-
- []
-
-
-Overfull \hbox (36.0331pt too wide) in paragraph at lines 63--67
-\OT1/cmr/m/it/10 adaptive
- []
-
-
-Overfull \hbox (18.91098pt too wide) in paragraph at lines 63--67
-\OT1/cmr/m/it/10 poli-
- []
-
-
-Overfull \hbox (16.35544pt too wide) in paragraph at lines 63--67
-\OT1/cmr/m/it/10 cies
- []
-
-
-./algorithms.tex:69: LaTeX Error: Environment proposition undefined.
-
-See the LaTeX manual or LaTeX Companion for explanation.
-Type H <return> for immediate help.
- ...
-
-l.69 \begin{proposition}
- \label{prop:gap}
-?
-
-./algorithms.tex:70: LaTeX Error: Missing \begin{document}.
-
-See the LaTeX manual or LaTeX Companion for explanation.
-Type H <return> for immediate help.
- ...
-
-l.70 F
- or additive functions of the form given by \eqref{eq:voter}, non-a...
-
-?
-Missing character: There is no F in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no v in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no u in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no f in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no m in font nullfont!
-Missing character: There is no g in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no v in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no b in font nullfont!
-Missing character: There is no y in font nullfont!
-./algorithms.tex:70: Undefined control sequence.
-l.70 ...tive functions of the form given by \eqref
- {eq:voter}, non-adaptive
-?
-Missing character: There is no e in font nullfont!
-Missing character: There is no q in font nullfont!
-Missing character: There is no : in font nullfont!
-Missing character: There is no v in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no , in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no - in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no v in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no c in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no s in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no g in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no r in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no h in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no n in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no d in font nullfont!
-Missing character: There is no a in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no t in font nullfont!
-Missing character: There is no i in font nullfont!
-Missing character: There is no v in font nullfont!
-Missing character: There is no e in font nullfont!
-Missing character: There is no p in font nullfont!
-Missing character: There is no o in font nullfont!
-Missing character: There is no l in font nullfont!
-Missing character: There is no i in font n \ No newline at end of file
diff --git a/paper/sections/algorithms.tex b/paper/sections/algorithms.tex
index e59de2a..810653f 100644
--- a/paper/sections/algorithms.tex
+++ b/paper/sections/algorithms.tex
@@ -1,219 +1,22 @@
-%In our context, a non-adaptive policy is simply a policy that makes all its decision before the realizations occur. That is, before the algorithm sees which neighbors arrive, it decides on how to allocate its seeding budget. This is in contrast to the optimal adaptive policy which waits for nodes to arrive.
-%
-
-We start by introducing \emph{non-adaptive policies} for the adaptive seeding problem. These policies are used as a tool to obtain adaptive solutions as will be discussed in Sections~\ref{sec:gap} and~\ref{sec:round}.
-We will say that a policy is non-adaptive if it selects a set of nodes $S
-\subseteq X$ to be seeded in the first stage and a vector of probabilities
-$\mathbf{q}\in[0,1]^n$, s.t. each neighbor $u$ of $S$ which realizes
-is included in the solution independently with probability $q_u$. The
-constraint will now be that the budget is only respected in expectation, i.e.
-$|S| + \textbf{p}^T\textbf{q} \leq k$. Formally the optimization problem for non-adaptive policies can be written as:
-\begin{equation}\label{eq:relaxed}
- \begin{split}
- %&\max_{}
- \max_{\substack{S\subseteq X\\q\in[0,1]^n} }& \;
- \sum_{R\subseteq\neigh{X}} \Big (\prod_{u\in R} p_uq_u\prod_{u\in\neigh{X}\setminus
- R}(1-p_uq_u) \Big )
- f(R)\\
- \text{s.t. } & \; |S|+\textbf{p}^T\textbf{q} \leq k,\;
-q_u \leq \mathbf{1}_{\{u\in\neigh{S}\}}
-\end{split}
-\end{equation}
-
-%$\textbf{p}\odot \textbf{q}$ denotes componentwise multiplication and $\textbf{q}_R$ denotes the positive entries of $\textbf{q}$ on nodes in $R$:
-%
-%\begin{displaymath}
-% \textbf{p}\circ \textbf{q}_R = \prod_{u\in R} p_uq_u\prod_{u\in\neigh{X}\setminus
-% R}(1-p_uq_u)
-%\end{displaymath}
-%
-%\begin{displaymath}
-% \Pr[R \ |\ \textbf{p}\odot \textbf{q} ] = \prod_{u\in R} p_uq_u\prod_{u\in\neigh{X}\setminus
-% R}(1-p_uq_u)
-%\end{displaymath}
-
-Note that because of the condition $q_u\leq \mathbf{1}_{\{u\in\neigh{S}\}}$,
-the summand associated with $R$ in \eqref{eq:relaxed} vanishes whenever $R$
-contains $u\in\neigh{X}\setminus\neigh{S}$. Hence, the summation is restricted
-to $R\subseteq\neigh{S}$ as in \eqref{eq:problem}.
-
-%The relaxed non-adaptive optimization \eqref{eq:relaxed} problem can be written
-%more concisely using the \emph{multi-linear extension} of $f$:
-%\begin{displaymath}
-% \forall p\in[0,1]^m,\; F(p)
-% \defeq\mathbb{E}_{p_R}\big[f(R)\big]
-% =\sum_{R\subseteq\neigh{X}} p_R f(R)
-%\end{displaymath}
-%This \emph{extension by expectation} is known to present cross-convecity
-%properties which can exploited in maximization problems when $f$ is
-%a submodular function \cite{dughmi, vondrak}. Using this definition, \eqref{eq:relaxed}
-%can be re-written as:
-%\begin{equation}\label{eq:relaxed-multi}
-% \begin{split}
-% &\max_{S\subseteq X} \max_{q\in[0,1]^n}F(p\circ q)\\
-% &\text{s.t. }|S|+\sum_{u\in\neigh{X}}p_uq_u \leq k,\;
-%q_u \leq \mathbf{1}_{\{u\in\neigh{S}\}}
-%\end{split}
-%\end{equation}
-%
-%When $f$ is an ìnfluence function from the triggering model, it has been proved
-%in \cite{singer} that adaptive seeding has a bounded \emph{adaptivity gap}:
-%denoting by $OPT$ the optimal solution of \eqref{eq:problem} and by $OPT_{NA}$
-%the optimal solution of \eqref{eq:relaxed}, we have $OPT\leq\alpha OPT_{NA}$.
-%This inequality justifies using non-adaptive policies to approximate solutions
-%to the adaptive seeding problem.
-
-\subsection{Adaptivity gap for additive functions}\label{sec:gap}
-
-We will now justify the use of non-adaptive strategies by showing that the optimal solution for this form of non-adaptive strategies yields a higher value than adaptive ones.
-For brevity, given a probability vector $\mathbf{p}$ we will write:
-\begin{equation}\label{eq:multi}
- F(\textbf{p}) \defeq\mathbb{E}_{\mathbf{p}}\big[f(R)\big]
- =\sum_{R\subseteq\neigh{X}}p_R f(R)
-\end{equation}
-as well as $\textbf{p}\circ \textbf{q}$ to denote the component-wise multiplication between vectors $\textbf{p}$ and $\textbf{q}$. Finally, we will use $\mathcal{F}_{A} := \{S \subseteq X : |S|\leq k\}$, and $\mathcal{F}_{NA} :=\{(S,\textbf{q}), |S|+\textbf{p}^T\textbf{q} \leq k, q_u \leq \mathbf{1}_{\{u\in\neigh{S}\}}\}$ to denote the feasible regions of the adaptive and non adaptive problems, respectively.
-
-%this form of non-adap
-%We already know that the adaptivity gap is bounded for a general class of
-%influence function. For adaptive functions, we get a stronger result:
-%\emph{relaxed-non adaptive policies are stronger than non-adaptive policies}.
-%
-%
-\begin{proposition}\label{prop:gap}
-For additive functions given by \eqref{eq:voter}, non-adaptive
- policies are stronger than adaptive policies:
- \begin{displaymath}
- \begin{aligned}[t]
- &\max_{S\subseteq X} \sum_{R\subseteq\neigh{S}} p_R
- \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)\\
- &\text{s.t. }S \in \mathcal{F}_{A}
- \end{aligned}
- \leq
- \begin{aligned}[t]
- &\max_{\substack{S\subseteq X\\q\in[0,1]^n}}
- F(\mathbf{p}\circ\mathbf{q})\\
- &\text{s.t. } (S,q) \in \mathcal{F}_{NA}
- \end{aligned}
- \end{displaymath}
-\end{proposition}
-
-%\begin{proposition}\label{prop:gap}
-%Let $\mathcal{F}_{A} := \{T \subseteq X : |T|\leq k\}$, $\mathcal{F}_{NA} :=\{(T,\textbf{q}), |S|+\textbf{p}^T\textbf{q} \leq k, q_u \leq \mathbf{1}_{\{u\in\neigh{S}\}}\}$.
-%For additive functions of the form given by \eqref{eq:voter}, non-adaptive
-% policies are stronger than adaptive policies:
-% \begin{displaymath}
-% \begin{aligned}[t]
-% &\max_{S\subseteq X} \sum_{R\subseteq\neigh{S}} p_R
-% \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)\\
-% &\text{s.t. }|S|\leq k
-% \end{aligned}
-% \leq
-% \begin{aligned}[t]
-% &\max_{S\subseteq X} \max_{q\in[0,1]^n}F(p\circ q)\\
-% &\text{s.t. }|S|+\sum_{u\in\neigh{X}}p_uq_u \leq k,\;
-% q_u \leq \mathbf{1}_{\{u\in\neigh{S}\}}
-% \end{aligned}
-% \end{displaymath}
-%\end{proposition}
-
-\begin{proof}
- We will first show that the optimal adaptive policy can be interpreted as
- a non-adaptive policy. Let $S$ be the optimal adaptive
- solution and define $\delta_R:\neigh{X}\mapsto \{0,1\}$:
- \begin{displaymath}
- \delta_R(u) \defeq \begin{cases}
- 1&\text{if } u\in\argmax\big\{f(T);\; T\subseteq R,\; |T|\leq
- k-|S|\big\} \\
- 0&\text{otherwise}
- \end{cases},
- \end{displaymath}
- one can write
- \begin{displaymath}
- \begin{split}
- \sum_{R\subseteq\neigh{S}} p_R
- \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)
- &=
- \sum_{R\subseteq\neigh{S}} p_R
- \sum_{u\in\neigh{X}}\delta_R(u)w_u\\
- &=
- \sum_{u\in\neigh{X}}w_u\sum_{R\subseteq\neigh{S}}p_R\delta_R(u).
- \end{split}
- \end{displaymath}
-
- Let us now define for $u\in\neigh{X}$:
- \begin{displaymath}
- q_u \defeq \begin{cases}
- \sum_{R\subseteq\neigh{S}}\frac{p_R}{p_u}\delta_R(u)
- &\text{if }p_u\neq 0\\
- 0&\text{otherwise}
- \end{cases}.
- \end{displaymath}
- This allows us to write:
- \begin{displaymath}
- \sum_{R\subseteq\neigh{S}} p_R
- \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)
- = \sum_{u\in\neigh{X}}p_uq_uw_u = F(\mathbf{p}\circ\mathbf{q})
- \end{displaymath}
- where the last equality is obtained from \eqref{eq:multi} by successively using the linearity of the expectation and the linearity of $f$.
-
- Furthermore, observe that $q_u\in[0,1]$, $q_u=0$ if $u\notin\neigh{S}$ and:
- \begin{displaymath}
- \begin{split}
- |S|+\sum_{u\in\neigh{X}}p_uq_u
- &= |S|+\sum_{R\subseteq\neigh{S}}p_R\sum_{u\in\neigh{X}}\delta_R(u)\\
- &\leq |S| + \sum_{R\subseteq\neigh{S}}p_R(k-|S|)\leq k
- \end{split}
- \end{displaymath}
-
- Hence, $(S,\mathbf{q})\in\mathcal{F}_{NA}$. In other words, we have written the optimal adaptive solution as a relaxed
- non-adaptive solution. This conclude the proof of the proposition.
-\end{proof}
-
-\subsection{From non-adaptive to adaptive solutions}\label{sec:round}
-From the above proposition we now know that optimal non-adaptive solutions have
-higher values than adaptive solutions. A priori, this does not imply that non-adaptive solutions are good adaptive solutions. More precisely, a non-adaptive solution is a pair $(S, \mathbf{q})$ such that starting from $S$ on the first stage and sampling nodes with probability $\mathbf{q}$ on the second stage will result in a solution which is as good as $\mathrm{A}(S)$ in expectation, where $\mathrm{A}$ denotes the objective function of the adaptive problem \eqref{eq:relaxed}. However, the set resulting from the sampling might exceed the budget on the second stage, hence preventing us from directly obtaining a feasible adaptive solution.
-
-Fortunately, the probability of exceeding the budget is small enough and with high probability the sampled set will be feasible. This property is leveraged in \cite{vondrak} to design a randomized rounding method with approximation guarantees. This randomized roundings are called \emph{contention resolution schemes}. More precisely, we note that once the set $S$ is fixed, the feasibility constraint of problem~\eqref{eq:relaxed} becomes a single Knapsack constraint, for which \cite{vondrak} constructs a $(1-\varepsilon, 1-\varepsilon)$-balanced contention resolution scheme. Applying Theorem~1.3 of the same paper, this contention resolution scheme will compute from $\mathbf{q}$ a \emph{feasible} random set $I$, such that:
-\begin{equation}\label{eq:cr}
-\mathbb{E}\big[f(I)\big]
-\geq (1-\varepsilon) F(\mathbf{q})
-\end{equation}
-
-Equation~\eqref{eq:cr} allows us to prove the following proposition, where $\mathrm{OPT}_{NA}$ denotes the optimal value of problem~\eqref{eq:relaxed} and $\mathrm{OPT}_A$ denotes the optimal value of problem~\eqref{eq:problem}.
-
-\begin{proposition}\label{prop:cr}
- Let $(S,\textbf{q})$ be an $\alpha$-approximate solution to the non-adaptive problem \eqref{eq:relaxed}, then $\mathrm{A}(S) \geq \alpha \mathrm{OPT}_A$.
-\end{proposition}
-
-\begin{proof}
- Using the definition of $\mathrm{A}(S)$, one can write:
- \begin{displaymath}
- \mathrm{A}(S) = \sum_{R\subseteq\neigh{S}} p_R
- \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)
- \geq \sum_{R\subseteq\neigh{S}} p_R \mathbf{E}\big[f(I)\big]
- \end{displaymath}
- where the inequality comes from the fact that $I$ is a feasible random set: $|I|\leq k-|S|$, hence the expected value of $f(I)$ is bounded by the maximum of $f$ over feasible sets.
-
-Equation~\eqref{eq:cr} then implies:
-\begin{equation}\label{eq:tmp}
- \mathrm{A}(S)
- \geq (1-\varepsilon)\sum_{R\subseteq\neigh{S}} p_R F(\mathbf{q})
- = (1-\varepsilon)F(\mathbf{p}\circ\mathbf{q}).
-\end{equation}
-
-Equation~\eqref{eq:tmp} holds for any $\varepsilon\geq 0$. In particular, for $\varepsilon$ smaller than $\inf_{S\neq T} |A(S)-A(T)|$, we obtain that $\mathrm{A}(S)\geq F(\mathbf{p}\circ\mathbf{q})$. Note that such a $\varepsilon$ is at most polynomially small in the size of the instance.
-$(S, \mathbf{q})$ is an $\alpha$-approximate non adaptive solution, hence $F(\mathbf{p}\circ\mathbf{q}) \geq \alpha\mathrm{OPT}_{NA}$. We can then conclude by applying Proposition~\ref{prop:gap}.
-\end{proof}
-
-Therefore the adaptive seeding problem reduces to the non-adaptive problem. We will now discuss two approaches to construct non-adaptive solutions. The first is an LP-based approach, and the second is a combinatorial algorithm. Both approaches have the same $(1-1/e)$ approximation ratio, which is then translated to a $(1-1/e)$ approximation ratio for the adaptive seeding problem~\eqref{eq:problem} via Proposition~\ref{prop:cr}.
+Section~\ref{sec:adaptivity} shows that the adaptive seeding problem reduces to
+the non-adaptive problem. We will now discuss two approaches to construct
+approximate non-adaptive solutions. The first is an LP-based approach, and the
+second is a combinatorial algorithm. Both approaches have the same $(1-1/e)$
+approximation ratio, which is then translated to a $(1-1/e)$ approximation
+ratio for the adaptive seeding problem~\eqref{eq:problem} via
+Proposition~\ref{prop:cr}. As we will show in Section~\ref{sec:experiments},
+both algorithms have their advantages and disadvantages in terms of
+scalability.
\subsection{An LP-Based Approach}
-Note that due to linearity of expectation, for a linear function $f$ of the form given by \eqref{eq:voter} we have:
+\label{sec:lp}
+Note that due to linearity of expectation, for a linear function $f$ of the
+form given by \eqref{eq:voter} we have:
\begin{equation}\label{eq:multi-voter}
\begin{split}
F(\textbf{p})
- &=\mathbb{E}_{p_R}\big[f(R)\big]
- =\mathbb{E}_{p_R}\Bigg[\sum_{u\in\neigh{X}}w_u\mathbf{1}_{\{u\in
+ &=\mathbb{E}_{R}\big[f(R)\big]
+ =\mathbb{E}_{R}\Bigg[\sum_{u\in\neigh{X}}w_u\mathbf{1}_{\{u\in
R\}}\Bigg]\\
&=\sum_{u\in\neigh{X}}w_u\mathbb{P}[u\in R]
=\sum_{u\in\neigh{X}}p_uw_u
@@ -223,10 +26,10 @@ Note that due to linearity of expectation, for a linear function $f$ of the form
Thus, the non-adaptive optimization problem \eqref{eq:relaxed} can be written as:
\begin{displaymath}
\begin{split}
- \max_{\substack{S\subseteq X\\q\in[0,1]^n} }
+ \max_{\substack{S\subseteq X\\\mathbf{q}\in[0,1]^n} }
& \sum_{u\in\neigh{X}}p_uq_uw_u\\
\text{s.t. } & |S|+ \textbf{p}^T\textbf{q} \leq k,\;
- q_u \leq \mathbf{1}_{\{u\in\neigh{S}\}}
+ q_u \leq \mathbf{1}\{u\in\neigh{S}\}
\end{split}
\end{displaymath}
@@ -235,49 +38,70 @@ $\lambda_v\in[0,1]$ for each $v\in X$. We obtain the following
LP for the adaptive seeding problem:
\begin{equation}\label{eq:lp}
\begin{split}
- \max_{\substack{q\in[0,1]^n\\\lambda\in[0,1]^m}}
+ \max_{\substack{\mathbf{q}\in[0,1]^n\\\boldsymbol\lambda\in[0,1]^m}}
& \;\sum_{u\in\neigh{X}}p_uq_uw_u\\
\text{s.t. } & \sum_{v\in X}\lambda_v+\textbf{p}^T\textbf{q} \leq k,\;
q_u \leq \sum_{v\in\neigh{u}} \lambda_v
\end{split}
\end{equation}
-An optimal solution to the above problem can be found using standard LP-solvers, in polynomial time. The solution returned by the LP is \emph{fractional}, and requires a rounding procedure to return a feasible solution to our problem, where $S$ is integral. We briefly describe our approach which uses the \emph{pipage rounding} method.\newline
-
-\noindent \textbf{Pipage Rounding.}
-The pipage rounding method~\cite{pipage} is a deterministic rounding method that can be applied to a variety of problems. In particular, it can be applied to LP-relaxations of the \textsc{Max-K-Cover} problem where we are given a family of sets that cover elements of a universe and the goal is to find $k$ subsets whose union has the maximal cardinality. The LP-relaxation is a fractional solution over subsets, and the pipage rounding procedure then rounds the allocation in linear time, and the integral solution is guaranteed to be within a factor of $(1-1/e)$ of the fractional solution.
-We make the following key observation: for any given $\textbf{q}$, one can remove all elements in $\mathcal{N}(X)$ for which $q_{u}=0$, without changing the value of any solution $(\lambda,\textbf{q})$.
-Our rounding procedure can therefore be described as follows: given a solution $(\lambda,\textbf{q})$ we remove all nodes $u \in \mathcal{N}(X)$ for which $q_{u}=0$, which leaves us with a fractional solution to a (weighted) version of the \textsc{Max-K-Cover} problem where nodes in $X$ are the sets and the universe is the set of weighted nodes in $\mathcal{N}(X)$ that were not removed. We can therefore apply pipage rounding and lose only a factor of $(1-1/e)$ in quality of the solution.
+An optimal solution to the above problem can be found in polynomial time using
+standard LP-solvers. The solution returned by the LP is \emph{fractional}, and
+requires a rounding procedure to return a feasible solution to our problem,
+where $S$ is integral. To round the solution we use the pipage rounding
+method~\cite{pipage}. We defer the details to the full version of the paper~\cite{full}.
\begin{lemma}
- For \mbox{\textsc{AdaptiveSeeding-LP}} as defined in \eqref{eq:lp}, any fractional solution $(\lambda, \mathbf{q})\in[0,1]^m\times[0,1]^n$ can be rounded to an integral solution $\bar{\lambda} \in \{0,1\}^{m}$ s.t. $(1-1/e) F(\mathbf{p}\circ\mathbf{q}) \leq A(\bar{\lambda})$ in $O(m + n)$ steps.
+ For \mbox{\textsc{AdaptiveSeeding-LP}} defined in \eqref{eq:lp}, any fractional solution $(\boldsymbol\lambda, \mathbf{q})\in[0,1]^m\times[0,1]^n$ can be rounded to an integral solution $\bar{\boldsymbol\lambda} \in \{0,1\}^{m}$ s.t. $(1-1/e) F(\mathbf{p}\circ\mathbf{q}) \leq A(\bar{\lambda})$ in $O(m + n)$ steps.
\end{lemma}
\subsection{A Combinatorial Algorithm}
-In this section, we will introduce a combinatorial algorithm with an identical approximation guarantee to the LP-based approach. The main idea is to reduce the problem to a monotone submodular maximization problem and apply a variant of the celebrated greedy algorithm~\cite{nemhauser}.
-This property is quite a remarkable feature of linear influence models; for influence models such as independent cascade and linear threshold, the adaptive seeding problem does not reduce to submodular maximization, and a completely different approach is required~\cite{singer}. In contrast with standard influence maximization, the submodularity of the non-adaptive seeding problem is not simply a consequence of properties of the influence function. It also strongly relies on the combinatorial structure of the two stage optimization.
+\label{sec:comb}
+
+In this section, we introduce a combinatorial algorithm with an identical
+approximation guarantee to the LP-based approach. However, its running time,
+stated in Proposition~\ref{prop:running_time} can be better than the one given
+by LP solvers depending on the relative sizes of the budget and the number of
+nodes in the graph. Furthermore, as we discuss at the end of this section, this
+algorithm is amenable to parallelization.
+
+The main idea is to reduce the problem to a monotone submodular maximization
+problem and apply a variant of the celebrated greedy
+algorithm~\cite{nemhauser}.
+%This property is quite a remarkable feature of
+%linear influence models; for influence models such as independent cascade and
+%linear threshold, the adaptive seeding problem does not reduce to submodular
+%maximization, and a completely different approach is required~\cite{singer}.
+In contrast to standard influence maximization, the submodularity of the
+non-adaptive seeding problem is not simply a consequence of properties of the
+influence function; it also strongly relies on the combinatorial structure of
+the two-stage optimization.
-Intuitively, we can think of our problem as trying to find a set $S$ in the first stage, for which the nodes that can be seeded on the second stage have the largest possible value. To formalize this, for a budget $b\in\reals^+$ used in the second stage and a set of neighbors $T\subseteq\mathcal{N}(X)$, we will use
-$\mathcal{O}(T,b)$ to denote the solution to:
+Intuitively, we can think of our problem as trying to find a set $S$ in the
+first stage, for which the nodes that can be seeded on the second stage have
+the largest possible value. To formalize this, for a budget $b\in\reals^+$
+used in the second stage and a set of neighbors $T\subseteq\mathcal{N}(X)$, we
+will use $\mathcal{O}(T,b)$ to denote the solution to:
\begin{equation}\label{eq:knap}
\begin{split}
\mathcal{O}(T,b)\defeq
- \max_{q\in[0,1]^n} & \sum_{u\in\neigh{X}} p_uq_uw_u\\
- \text{s.t. } & \mathbf{p}^T\mathbf{q}\leq b\text{ and } q_u=0\text{ if
- }u\notin T
+ \max_{\textbf{q}\in[0,1]^n} & \sum_{u\in\neigh{X} \cap T} p_uq_uw_u\\
+ \text{s.t. } & \mathbf{p}^T\mathbf{q}\leq b
+ %\text{ and } q_u=0\text{ if}u\notin T
\end{split}
\end{equation}
The optimization problem \eqref{eq:relaxed} for
non-adaptive policies can now be written as:
\begin{equation}\label{eq:sub}
- \begin{split}
- \max_{S\subseteq X} &\; \mathcal{O}\big(\neigh{S},k-|S|\big)\\
- \text{s.t. } & |S|\leq k
- \end{split}
+ \max_{S\subseteq X} \; \mathcal{O}\big(\neigh{S},k-|S|\big)
+ \quad \text{s.t. } |S|\leq k
\end{equation}
-We start by proving in Proposition~\ref{prop:sub} that for fixed $t$, $\mathcal{O}(\neigh{\cdot}, t)$ is submodular. This proposition relies on lemmas~\ref{lemma:nd} and~\ref{lemma:sub} about the properties of $\mathcal{O}(T,b)$ first seen as a function $b$, then as a function of $T$.
+We start by proving in Proposition~\ref{prop:sub} that for fixed $t$,
+$\mathcal{O}(\neigh{\cdot}, t)$ is submodular. This proposition relies on
+lemmas~\ref{lemma:nd} and~\ref{lemma:sub} about the properties of
+$\mathcal{O}(T,b)$.
\begin{lemma}\label{lemma:nd}
Let $T \subseteq \mathcal{N}(X)$ and $x \in \mathcal{N}(X)$, then
@@ -285,169 +109,51 @@ We start by proving in Proposition~\ref{prop:sub} that for fixed $t$, $\mathcal{
non-decreasing in $b$.
\end{lemma}
-\begin{proof}
-\emph{W.l.o.g} we can rename and order the pairs in $T$ so that $w_1\geq w_2\geq\ldots\geq w_m $.
-Then, $\mathcal{O}(T,b)$ has the following simple piecewise linear expression:
-\begin{displaymath}\label{eq:pw}
- \mathcal{O}(T,b) =
- \begin{cases}
- b w_1&\text{if }0\leq b<p_1\\
- \displaystyle
- \sum_{k=1}^{i-1}p_k(w_k-w_i)
- + b w_i
- &\displaystyle\text{if } 0\leq b - \sum_{k=1}^{i-1}p_k< p_i\\
- \displaystyle
- \sum_{k=1}^m p_kw_k
- &\displaystyle\text{if } b\geq\sum_{i=1}^m p_k
-
- \end{cases}
-\end{displaymath}
-
-Let us define for $t\in\reals^+$, $n(t)\defeq \inf\Big\{i\text{ s.t.
-} \sum_{k=1}^i p_k > t\Big \}$ with $n(t)=+\infty$ when the set is empty. In
-particular, note that $x\mapsto n(t)$ is non-decreasing. Denoting
-$\partial_+\mathcal{O}_T$ the right derivative of $\mathcal{O}(T,\cdot)$, one
-can write $\partial_+\mathcal{O}_T(t)=w_{n(t)}$, with the convention that
-$w_\infty = 0$.
-
-Writing $i \defeq \sup\Big\{j\text{ s.t.
-} w_j\geq w_x\Big\}$, it is easy to see that
-$\partial_+\mathcal{O}_{T\cup\{x\}}\geq\partial_+\mathcal{O}_T$. Indeed:
-\begin{enumerate}
- \item if $n(t)\leq i$ then $\partial_+\mathcal{O}_{T\cup\{x\}}(t)
- = \partial_+\mathcal{O}_T(t)= w_{n(t)}$.
- \item if $n(t)\geq i+1$ and $n(t-c)\leq i$ then $\partial_+\mathcal{O}_{T\cup\{x\}}(t)
- = w_x\geq w_{n(t)}= \partial_+\mathcal{O}_T(t)$.
- \item if $n(t-c)\geq i+1$, then $\partial_+\mathcal{O}_{T\cup\{x\}}
- = w_{n(t-c)}\geq w_{n(t)}=\partial_+\mathcal{O}_T(t)$.
-\end{enumerate}
-
-Let us now consider $b$ and $c$ such that $b\leq c$. Then, using the integral
-representation of $\mathcal{O}(T\cup\{x\},\cdot)$ and $\mathcal{O}(T,\cdot)$, we get:
+The proof of this lemma can be found in the full version of the paper~\cite{full}. The main
+idea consists in writing:
\begin{multline*}
- \mathcal{O}(T\cup\{x\},c)-\mathcal{O}(T\cup\{x\},b)=\int_b^c\partial_+\mathcal{O}_{T\cup\{x\}}(t)dt\\
- \geq\int_b^c\partial_+\mathcal{O}_T(t)dt=\mathcal{O}(T,c)-\mathcal{O}(T,b)
+ \mathcal{O}(T\cup\{x\},c)-\mathcal{O}(T\cup\{x\},b)=\int_b^c\partial_+\mathcal{O}_{T\cup\{x\}}(t)dt
+\end{multline*}
+where $\partial_+\mathcal{O}_T$ denotes the right derivative of
+$\mathcal{O}(T,\cdot)$. For a fixed $T$ and $b$, $\mathcal{O}(T,b)$ defines
+a fractional Knapsack problem over the set $T$. Knowing the form of the optimal
+fractional solution, we can verify that
+$\partial_+\mathcal{O}_{T\cup\{x\}}\geq\partial_+\mathcal{O}_T$ and obtain:
+\begin{multline*}
+ \mathcal{O}(T\cup\{x\},c)-\mathcal{O}(T\cup\{x\},b)\geq
+ \mathcal{O}(T,c)-\mathcal{O}(T,b)
\end{multline*}
-Re-ordering the terms, $\mathcal{O}(T\cup\{x\},c)-\mathcal{O}(T,c)\geq
-\mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)$
-which concludes the proof of the lemma.
-\end{proof}
\begin{lemma}\label{lemma:sub}
For any $b\in\reals^+$, $\mathcal{O}(T,b)$ is submodular in $T$, $T\subseteq\neigh{X}$.
\end{lemma}
-\begin{proof}
- Let $T\subseteq\neigh{X}$ and $x, y\in\neigh{X}\setminus T$. Using the
- second-order characterization of submodular functions, it suffices to show
- that:
- \begin{displaymath}\label{eq:so}
+The proof of this lemma is more technical. For $T\subseteq\neigh{X}$ and $x,
+y\in\neigh{X}\setminus T$, we need to show that:
+ \begin{displaymath}
\mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)\geq
\mathcal{O}(T\cup\{y, x\},b)-\mathcal{O}(T\cup\{y\},b)
\end{displaymath}
+ This can be done by partitioning the set $T$ into ``high value
+ items'' (those with weight greater than $w_x$) and ``low value items'' and
+ carefully applying Lemma~\ref{lemma:nd} to the associated subproblems.
+ The proof is in the full version of the paper~\cite{full}.
- We distinguish two cases based on the relative position of $w_x$ and $w_y$.
- The following notations will be useful: $S_T^x \defeq \big\{u\in
- T\text{ s.t. }w_x\leq w_u\big\}$ and $P_T^x\defeq
- T\setminus S_T^x$.
-
- \textbf{Case 1:} If $w_y\geq w_x$, then one can
- write:
- \begin{gather*}
- \mathcal{O}(T\cup\{y,x\},b) = \mathcal{O}(P_T^y\cup\{y\},b_1)+
- \mathcal{O}(S_T^y\cup\{x\},b_2)\\
- \mathcal{O}(T\cup\{y\},b) = \mathcal{O}(P_T^y\cup\{y\},b_1)
- + \mathcal{O}(S_T^y,b_2)
- \end{gather*}
- where $b_1$ is the fraction of the budget $b$ spent on $P_T^y\cup\{y\}$ and
- $b_2=b-b_1$.
-
- Similarly:
- \begin{gather*}
- \mathcal{O}(T\cup\{x\},b) = \mathcal{O}(P_T^y, c_1) + \mathcal{O}(S_T^y\cup\{x\},c_2)\\
- \mathcal{O}(T, b) = \mathcal{O}(P_T^y, c_1) + \mathcal{O}(S_T^y,c_2)
- \end{gather*}
- where $c_1$ is the fraction of the budget $b$ spent on $P_T^y$ and $c_2
- = b - c_1$.
-
- Note that $b_1\geq c_1$: an optimal solution will first spent as much
- budget as possible on $P_T^y\cup\{y\}$ before adding elements in
- $S_T^y\cup\{x\}$.
-
- In this case:
- \begin{displaymath}
- \begin{split}
- \mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)&=
- \mathcal{O}(S_T^y\cup\{x\},c_2)+\mathcal{O}(S_T^y,c_2)\\
- &\geq \mathcal{O}(S_T^y\cup\{x\},b_2)+\mathcal{O}(S_T^y,b_2)\\
- & = \mathcal{O}(T\cup\{y, x\},b)-\mathcal{O}(T\cup\{y\},b)
- \end{split}
- \end{displaymath}
- where the inequality comes from Lemma~\ref{lemma:nd} and
- $c_2\geq b_2$.
-
- \textbf{Case 2:} If $w_x > w_y$, we now decompose
- the solution on $P_T^x$ and $S_T^x$:
- \begin{gather*}
- \mathcal{O}(T\cup\{x\},b) = \mathcal{O}(P_T^x\cup\{x\},b_1)
- + \mathcal{O}(S_T^x,b_2)\\
- \mathcal{O}(T,b) = \mathcal{O}(P_T^x,c_1)+\mathcal{O}(S_T^x,c_2)\\
- %\intertext{and}
- \mathcal{O}(T\cup\{y, x\},b) = \mathcal{O}(P_T^x\cup\{x\},b_1)
- + \mathcal{O}(S_T^x\cup\{y\},b_2)\\
- \mathcal{O}(T\cup\{y\},b) = \mathcal{O}(P_T^x,c_1)+\mathcal{O}(S_T^x\cup\{y\},c_2)
- \end{gather*}
- with $b_1+b_2=b$, $c_1+c_2=b$ and $b_2\leq c_2$.
-
- In this case again:
- \begin{multline*}
- \mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)=
- \mathcal{O}(S_T^x,b_2)-\mathcal{O}(S_T^x,c_2)\\
- \geq \mathcal{O}(S_T^x\cup\{y\},b_2)-\mathcal{O}(S_T^x\cup\{y\},c_2)\\
- = \mathcal{O}(T\cup\{y, x\},b)-\mathcal{O}(T\cup\{y\},b)
- \end{multline*}
- where the inequality uses Lemma~\ref{lemma:nd} and $c_2\geq b_2$.
-
- In both cases, we were able to obtain the second-order characterization of submodularity. This concludes the proof of the lemma.
-\end{proof}
+Finally, Lemma~\ref{lemma:sub} can be used to show Proposition~\ref{prop:sub}
+whose proof can be found in the full version~\cite{full}.
\begin{proposition}\label{prop:sub}
- Let $b\in\mathbf{R}^+$, then $\mathcal{O}(\neigh{S},b)$ is submodular in $S$, $S\subseteq X$.
+ Let $b\in\mathbf{R}^+$, then $\mathcal{O}(\neigh{S},b)$ is monotone and
+ submodular in $S$, $S\subseteq X$.
\end{proposition}
-\begin{proof}
- Let us consider $S$ and $T$ such that $S\subseteq T\subseteq X$ and $x\in
- X\setminus T$. In particular, note that $\neigh{S}\subseteq\neigh{T}$.
-
- Let us write $\neigh{S\cup\{x\}}=\neigh{S}\cup R$ with $\neigh{S}\cap
- R=\emptyset$ and similarly, $\neigh{T\cup\{x\}}=\neigh{T}\cup R'$ with
- $\neigh{T}\cap R'=\emptyset$. It is clear that $R'\subseteq R$. Writing $R'=\{u_1,\ldots,u_k\}$:
- \begin{multline*}
- \mathcal{O}(\neigh{T\cup\{x\}},b)- \mathcal{O}(\neigh{T},b)\\
- =\sum_{i=1}^k\mathcal{O}(\neigh{T}\cup\{u_1,\ldots u_i\},b)
- -\mathcal{O}(\neigh{T}\cup\{u_1,\ldots u_{i-1}\},b)\\
- \leq \sum_{i=1}^k\mathcal{O}(\neigh{S}\cup\{u_1,\ldots u_i\},b)
- -\mathcal{O}(\neigh{S}\cup\{u_1,\ldots u_{i-1}\},b)\\
- =\mathcal{O}(\neigh{S}\cup R',b)-\mathcal{O}(\neigh{S},b)
- \end{multline*}
- where the inequality comes from the submodularity of $\mathcal{O}(\cdot,b)$ proved in Lemma~\ref{lemma:sub}. This same function is also obviously set-increasing, hence:
- \begin{multline*}
- \mathcal{O}(\neigh{S}\cup R',b)-\mathcal{O}(\neigh{S},b)\\
- \leq \mathcal{O}(\neigh{S}\cup R,b)-\mathcal{O}(\neigh{S},b)\\
- =\mathcal{O}(\neigh{S\cup\{x\}},b)-\mathcal{O}(\neigh{S},b)
- \end{multline*}
- This concludes the proof of the proposition.
-\end{proof}
-
We can now use Proposition~\ref{prop:sub} to reduce \eqref{eq:sub} to a monotone submodular maximization problem. First, we note that~\eqref{eq:sub} can be rewritten:
\begin{equation}\label{eq:sub-mod}
- \begin{split}
- &\max_{t}\max_{S\subseteq X} \; \mathcal{O}\big(\neigh{S},t\big)\\
- &\text{s.t. } |S| + t\leq k
- \end{split}
+ \max_{\substack{S\subseteq X\\ t \in \mathbb{N}}} \; \mathcal{O}\big(\neigh{S},t\big)
+ \quad\text{s.t. } |S| + t\leq k
\end{equation}
-Intuitively, we fix $t$ arbitrarily so that the inner $\max$ in \eqref{eq:sub-mod} becomes a submodular maximization problem with fixed budget $t$. We then optimize over the value of $t$. Combining this observation with the greedy algorithm for monotone submodular maximization~\cite{nemhauser}, we obtain Algorithm~\ref{alg:comb}, whose performance guarantee is summarized in Proposition~\ref{prop:main_result}.
+Intuitively, we fix $t$ arbitrarily so that the maximization above becomes a submodular maximization problem with fixed budget $t$. We then optimize over the value of $t$. Combining this observation with the greedy algorithm for monotone submodular maximization~\cite{nemhauser}, we obtain Algorithm~\ref{alg:comb}, whose performance guarantee is summarized in Proposition~\ref{prop:main_result}.
\begin{algorithm}
\caption{Combinatorial algorithm}
@@ -477,32 +183,44 @@ Intuitively, we fix $t$ arbitrarily so that the inner $\max$ in \eqref{eq:sub-mo
stage. Then $\mathrm{A}(S) \geq (1-1/e)\mathrm{OPT}_A$.
\end{proposition}
-\begin{proof}
- We simply note that the content of the outer \textsf{for} loop on line 2 of Algorithm~\ref{alg:comb} is the greedy submodular maximization algorithm of \cite{nemhauser}. Since $\mathcal{O}(\neigh{\cdot}, t)$ is submodular (Proposition~\ref{prop:sub}), this solves the inner $\max$ in \eqref{eq:sub-mod} with an approximation ratio of $(1-1/e)$. The outer \textsf{for} loop then computes the outer $\max$ of \eqref{eq:sub-mod}.
- As a consequence, Algorithm~\ref{alg:comb} computes a $(1-1/e)$-approximate non-adaptive solution. We conclude by applying Proposition~\ref{prop:cr}.
-\end{proof}
-\subsection{Running time}
-The algorithm described above considers all possible ways to split the seeding budget between the first and the second stage. For each possible split $\{(t,k-t)\}_{t=1\ldots,k-1}$, the algorithm computes an approximation to the optimal non adaptive solution that uses $k-t$ nodes in the first stage and $t$ nodes in the second stage, and returns the solution for the split with the highest value (breaking ties arbitrarily). This process can be trivially parallelized across $k-1$ machines, each performing a computation of a single split.
+%\subsubsection{Scalability}
+
+\noindent\textbf{Parallelization.} The algorithm described above considers all
+possible ways to split the seeding budget between the first and the second
+stage. For each possible split $\{(t,k-t)\}_{t=1\ldots,k-1}$, the algorithm
+computes an approximation to the optimal non adaptive solution that uses $k-t$
+nodes in the first stage and $t$ nodes in the second stage, and returns the
+solution for the split with the highest value (breaking ties arbitrarily).
+This process can be trivially parallelized across $k-1$ machines, each
+performing a computation of a single split. With slightly more effort, for any
+$\epsilon>0$ one can parallelize over $\log_{1+\epsilon}n$ machines at the cost
+of losing a factor of $\epsilon$ in the approximation guarantee (see full
+version of the paper~\cite{full} for details).\newline
+
+\noindent \textbf{Implementation in MapReduce.} While the previous paragraph
+describes how to parallelize the outer \texttt{for} loop of
+Algorithm~\ref{alg:comb}, we note that its inner loop can also be parallelized
+in the MapReduce framework. Indeed, it corresponds to the greedy algorithm
+applied to the function $\mathcal{O}\left(\neigh{\cdot}, t\right)$. The
+\textsc{Sample\&Prune} approach successfully applied in \cite{mr} to obtain
+MapReduce algorithms for various submodular maximizations can also be applied
+to Algorithm~\ref{alg:comb} to cast it in the MapReduce framework. The details
+of the algorithm can be found in the full version of the paper~\cite{full}.
+\newline
-A slightly more sophisticated approach is to consider only $\log n$ splits: $(1,k-1),(2,k-2),\ldots,(2^{\lfloor \log n \rfloor},1)$ and then select the best solution from this set. It is not hard to see that in comparison to the previous approach, this would reduce the approximation guarantee by a factor of at most 2: if the optimal solution is obtained by spending $t$ on the first stage and $k-t$ in the second stage, then since $t \leq 2\cdot2^{\lfloor \log t \rfloor}$ the solution computed for $(2^{\lfloor \log t \rfloor}, k - 2^{\lfloor \log t \rfloor})$ will have at least half that value.
-More generally, for any $\epsilon>0$ one can parallelize over $\log_{1+\epsilon}n$ machines at the cost of losing a factor of $(1+\epsilon)$ in the approximation guarantee.
+%A slightly more sophisticated approach is to consider only $\log n$ splits: $(1,k-1),(2,k-2),\ldots,(2^{\lfloor \log n \rfloor},1)$ and then select the best solution from this set. It is not hard to see that in comparison to the previous approach, this would reduce the approximation guarantee by a factor of at most 2: if the optimal solution is obtained by spending $t$ on the first stage and $k-t$ in the second stage, then since $t \leq 2\cdot2^{\lfloor \log t \rfloor}$ the solution computed for $(2^{\lfloor \log t \rfloor}, k - 2^{\lfloor \log t \rfloor})$ will have at least half that value.
+%More generally, for any $\epsilon>0$ one can parallelize over $\log_{1+\epsilon}n$ machines at the cost of losing a factor of $(1+\epsilon)$ in the approximation guarantee.
-To implement Algorithm~\ref{alg:comb} efficiently, the computation of the $\argmax$ on line 5 must be dealt with carefully. $\mathcal{O}(\neigh{S_t\cup\{x\}},t)$ is the optimal solution to the fractional Knapsack problem~\eqref{eq:knap} with budget $t$ and can be computed in time $\min(\frac{t}{p_\text{min}},n)$ by iterating over the list of nodes in $\neigh{S_t\cup\{x\}}$ by decreasing order of the degrees. This decreasing order of $\neigh{S_t}$ can be maintained throughout the greedy construction of $S_t$ by:
+\noindent \textbf{Algorithmic speedups.} To implement Algorithm~\ref{alg:comb} efficiently, the computation of the $\argmax$ on line 5 must be dealt with carefully. $\mathcal{O}(\neigh{S_t\cup\{x\}},t)$ is the optimal solution to the fractional Knapsack problem~\eqref{eq:knap} with budget $t$ and can be computed in time $\min(\frac{t}{p_\text{min}},n)$ by iterating over the list of nodes in $\neigh{S_t\cup\{x\}}$ in decreasing order of the degrees. This decreasing order of $\neigh{S_t}$ can be maintained throughout the greedy construction of $S_t$ by:
\begin{itemize}
-\item ordering the list of neighbors of nodes in $X$ by decreasing order of the degrees when initially constructing the graph. This is responsible for the $n\log n$ additive factor.
+ \item ordering the list of neighbors of nodes in $X$ by decreasing order of the degrees when initially constructing the graph. This is responsible for a $O(n\log n)$ pre-processing time.
\item when adding node $x$ to $S_t$, observe that $\neigh{S_t\cup\{x\}} = \neigh{S_t}\cup\neigh{\{x\}}$. Hence, if $\neigh{S_t}$ and $\neigh{\{x\}}$ are sorted lists, then $\mathcal{O}(\neigh{S_t\cup\{x\}},t)$ can be computed in a single iteration of length $\min(\frac{t}{p_\text{min}},n)$ where the two sorted lists are merged on the fly.
\end{itemize}
As a consequence, the running time of line 5 is bounded from above by $m\min(\frac{t}{p_\text{min}},n)$. The two nested \textsf{for} loops are responsible for the additional $k^2$ factor. The running time of Algorithm~\ref{alg:comb} is summarized in Proposition~\ref{prop:running_time}.
\begin{proposition}\label{prop:running_time}
- Algorithm~\ref{alg:comb} runs in time $O\big(n\log
- n + k^2 m \min(\frac{k}{p_\text{min}},n)\big)$, with $p_\text{min} =\min\{p_u,u\in\neigh{X}\}$.
+ Let $p_\text{min} =\min\{p_u,u\in\neigh{X}\}$, then Algorithm~\ref{alg:comb} runs in time
+ ${O\big(n\log n + k^2 m \min(\frac{k}{p_\text{min}},n)\big)}$.
\end{proposition}
-
-
-
-
-
-
diff --git a/paper/sections/appendix.tex b/paper/sections/appendix.tex
index 64ca77e..aa0ef1d 100644
--- a/paper/sections/appendix.tex
+++ b/paper/sections/appendix.tex
@@ -1,76 +1,309 @@
-
-\noindent \textbf{From expectation to high probability.}
-
-%\begin{lemma}\label{lemma:nd}
-% Let $A\subseteq\mathcal{U}$ and $x=(v,c)\in\mathcal{U}\setminus A$, then
-% the function $b\mapsto \mathcal{O}(A\cup\{x\},b)-\mathcal{O}(A,b)$ is
-% non-decreasing over $\reals^+$.
-%\end{lemma}
-
-
-
-
-
-
-\begin{proof}[of Proposition~\ref{main_result}]
- For each realization of the neighbors $R$, let us define by $X_R$ the
- random set which includes each $u\in R$ with probability $q_u$ on the
- second day and:
+\section{Adaptivity proofs}
+\begin{proof}[of Proposition~\ref{prop:gap}]
+ We will first show that the optimal adaptive policy can be interpreted as
+ a non-adaptive policy. Let $S$ be the optimal adaptive
+ solution and define $\delta_R:\neigh{X}\mapsto \{0,1\}$:
\begin{displaymath}
- \tilde{X}_R=
- \begin{cases}
- X_R&\text{if } |X_R|\leq t\\
- \emptyset&\text{otherwise}
- \end{cases}
+ \delta_R(u) \defeq \begin{cases}
+ 1&\text{if } u\in\argmax\big\{f(T);\; T\subseteq R,\; |T|\leq
+ k-|S|\big\} \\
+ 0&\text{otherwise}
+ \end{cases},
\end{displaymath}
- where $t = k-|S|$. $\tilde{X}_R$ is a random variable over the feasible
- solutions on the second day. As a consequence:
+ one can write
\begin{displaymath}
- A(S) \geq \sum_{R\subseteq\neigh{S}} p_R\mathbb{E}\big[f(\tilde{X}_R)\big]
+ \begin{split}
+ \sum_{R\subseteq\neigh{S}} p_R
+ \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)
+ &=
+ \sum_{R\subseteq\neigh{S}} p_R
+ \sum_{u\in\neigh{X}}\delta_R(u)w_u\\
+ &=
+ \sum_{u\in\neigh{X}}w_u\sum_{R\subseteq\neigh{S}}p_R\delta_R(u).
+ \end{split}
\end{displaymath}
- Let us define by $Y$ the random set which includes each $u\in\neigh{X}$
- with probability $p_uq_u$ and:
+
+ Let us now define for $u\in\neigh{X}$:
\begin{displaymath}
- \tilde{Y}=
- \begin{cases}
- Y&\text{if } |Y|\leq t\\
- \emptyset&\text{otherwise}
+ q_u \defeq \begin{cases}
+ \sum_{R\subseteq\neigh{S}}\frac{p_R}{p_u}\delta_R(u)
+ &\text{if }p_u\neq 0\\
+ 0&\text{otherwise}
\end{cases}.
\end{displaymath}
- It is easy to see that $\tilde{X}_R$ is the conditional expectation of
- $\tilde{Y}$ given
- $R$. Hence:
+ This allows us to write:
\begin{displaymath}
- \sum_{R\subseteq\neigh{S}} p_R\mathbb{E}\big[f(\tilde{X}_R)\big]
- = \mathbb{E}\big[f(\tilde{Y})\big]
+ \sum_{R\subseteq\neigh{S}} p_R
+ \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)
+ = \sum_{u\in\neigh{X}}p_uq_uw_u = F(\mathbf{p}\circ\mathbf{q})
\end{displaymath}
- But:
- \begin{align*}
- \mathbb{E}\big[f(\tilde{Y})\big]
- &=\sum_{T\subseteq\neigh{X}}\mathbb{P}\big[\tilde{Y}=T\big]f(T)
- = \sum_{\substack{T\subseteq\neigh{X}\\|T|\leq
- t}}\mathbb{P}\big[Y=T\big]f(T)\\
- &= \mathbb{E}\big[f(Y)\big]-\sum_{\substack{T\subseteq\neigh{X}\\|T|>
- t}}\mathbb{P}\big[Y=T\big]f(T)
- \end{align*}
- Roughly:
+ where the last equality is obtained from \eqref{eq:multi} by successively using the linearity of the expectation and the linearity of $f$.
+
+ Furthermore, observe that $q_u\in[0,1]$, $q_u=0$ if $u\notin\neigh{S}$ and:
\begin{displaymath}
- \sum_{\substack{T\subseteq\neigh{X}\\|T|>
- t}}\mathbb{P}\big[Y=T\big]f(T)\leq
- \frac{1}{2}\mathbb{E}\big[f(Y)\big]
+ \begin{split}
+ |S|+\sum_{u\in\neigh{X}}p_uq_u
+ &= |S|+\sum_{R\subseteq\neigh{S}}p_R\sum_{u\in\neigh{X}}\delta_R(u)\\
+ &\leq |S| + \sum_{R\subseteq\neigh{S}}p_R(k-|S|)\leq k
+ \end{split}
\end{displaymath}
- Combining the above inequalities we get:
+
+ Hence, $(S,\mathbf{q})\in\mathcal{F}_{NA}$. In other words, we have written the optimal adaptive solution as a relaxed
+ non-adaptive solution. This conclude the proof of the proposition.
+\end{proof}
+
+\vspace{0.5em}
+
+\begin{proof}[of Proposition~\ref{prop:cr}]
+ Using the definition of $\mathrm{A}(S)$, one can write:
\begin{displaymath}
- A(S)\geq \frac{1}{2} \mathbb{E}\big[f(Y)\big]
+ \mathrm{A}(S) = \sum_{R\subseteq\neigh{S}} p_R
+ \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)
+ \geq \sum_{R\subseteq\neigh{S}} p_R \mathbf{E}\big[f(I)\big]
\end{displaymath}
- But $\mathbb{E}\big[f(Y)\big]$ is precisely the value of the non-adaptive
- solution computed by Algorithm~\ref{alg:comb} which is
- a $\left(1-\frac{1}{e}\right)$ approximation of the
- optimal non adaptive solution. Hence:
+ where the inequality comes from the fact that $I$ is a feasible random set: $|I|\leq k-|S|$, hence the expected value of $f(I)$ is bounded by the maximum of $f$ over feasible sets.
+
+Equation~\eqref{eq:cr} then implies:
+\begin{equation}\label{eq:tmp}
+ \mathrm{A}(S)
+ \geq (1-\varepsilon)\sum_{R\subseteq\neigh{S}} p_R F(\mathbf{q})
+ = (1-\varepsilon)F(\mathbf{p}\circ\mathbf{q}).
+\end{equation}
+
+Equation~\eqref{eq:tmp} holds for any $\varepsilon\geq 0$. In particular, for $\varepsilon$ smaller than $\inf_{S\neq T} |A(S)-A(T)|$, we obtain that $\mathrm{A}(S)\geq F(\mathbf{p}\circ\mathbf{q})$. Note that such a $\varepsilon$ is at most polynomially small in the size of the instance.
+$(S, \mathbf{q})$ is an $\alpha$-approximate non adaptive solution, hence $F(\mathbf{p}\circ\mathbf{q}) \geq \alpha\mathrm{OPT}_{NA}$. We can then conclude by applying Proposition~\ref{prop:gap}.
+\end{proof}
+
+\section{Algorithms Proofs}
+We first discuss the NP-hardness of the problem.
+
+\noindent \textbf{\textsc{NP}-Hardness.} In contrast to standard influence maximization, adaptive seeding is already \textsc{NP}-Hard even for the simplest cases. In the case when $f(S)=|S|$ and all probabilities equal one, the decision problem is whether given a budget $k$ and target value $\ell$ there exists a subset of $X$ of size $k-t$ which yields a solution with expected value of $\ell$ using $t$ nodes in $\mathcal{N}(X)$. This is equivalent to deciding whether there are $k-t$ nodes in $X$ that have $t$ neighbors in $\mathcal{N}(X)$. To see this is \textsc{NP}-hard, consider reducing from \textsc{Set-Cover} where there is one node $i$ for each input set $T_i$, $1\leq i\leq n$, with $\neigh{i}= T_i$ and integers $k,\ell$, and the output is ``yes'' if there is a family of $k$ sets in the input which cover at least $\ell$ elements, and ``no'' otherwise.
+
+
+\subsection{LP-based approach}
+In the LP-based approach we rounded the solution using the pipage rounding method. We discuss this with greater detail here.
+
+\noindent \textbf{Pipage Rounding.}
+The pipage rounding method~\cite{pipage} is a deterministic rounding method that can be applied to a variety of problems. In particular, it can be applied to LP-relaxations of the \textsc{Max-K-Cover} problem where we are given a family of sets that cover elements of a universe and the goal is to find $k$ subsets whose union has the maximal cardinality. The LP-relaxation is a fractional solution over subsets, and the pipage rounding procedure then rounds the allocation in linear time, and the integral solution is guaranteed to be within a factor of $(1-1/e)$ of the fractional solution.
+We make the following key observation: for any given $\textbf{q}$, one can remove all elements in $\mathcal{N}(X)$ for which $q_{u}=0$, without changing the value of any solution $(\boldsymbol\lambda,\textbf{q})$.
+Our rounding procedure can therefore be described as follows: given a solution $(\boldsymbol\lambda,\textbf{q})$ we remove all nodes $u \in \mathcal{N}(X)$ for which $q_{u}=0$, which leaves us with a fractional solution to a (weighted) version of the \textsc{Max-K-Cover} problem where nodes in $X$ are the sets and the universe is the set of weighted nodes in $\mathcal{N}(X)$ that were not removed. We can therefore apply pipage rounding and lose only a factor of $(1-1/e)$ in quality of the solution.
+
+\subsection{Combinatorial Algorithm}
+We include the missing proofs from the combinatorial algorithm section. The scalability and implementation in MapReduce are discussed in this section as well.
+
+\begin{proof}[of Lemma~\ref{lemma:nd}]
+\emph{W.l.o.g} we can rename and order the pairs in $T$ so that $w_1\geq w_2\geq\ldots\geq w_m $.
+Then, $\mathcal{O}(T,b)$ has the following simple piecewise linear expression:
+\begin{displaymath}\label{eq:pw}
+ \mathcal{O}(T,b) =
+ \begin{cases}
+ b w_1&\text{if }0\leq b<p_1\\
+ \displaystyle
+ \sum_{k=1}^{i-1}p_k(w_k-w_i)
+ + b w_i
+ &\displaystyle\text{if } 0\leq b - \sum_{k=1}^{i-1}p_k< p_i\\
+ \displaystyle
+ \sum_{k=1}^m p_kw_k
+ &\displaystyle\text{if } b\geq\sum_{i=1}^m p_k
+
+ \end{cases}
+\end{displaymath}
+
+Let us define for $t\in\reals^+$, $n(t)\defeq \inf\Big\{i\text{ s.t.
+} \sum_{k=1}^i p_k > t\Big \}$ with $n(t)=+\infty$ when the set is empty. In
+particular, note that $x\mapsto n(t)$ is non-decreasing. Denoting
+$\partial_+\mathcal{O}_T$ the right derivative of $\mathcal{O}(T,\cdot)$, one
+can write $\partial_+\mathcal{O}_T(t)=w_{n(t)}$, with the convention that
+$w_\infty = 0$.
+
+Writing $i \defeq \sup\Big\{j\text{ s.t.
+} w_j\geq w_x\Big\}$, it is easy to see that
+$\partial_+\mathcal{O}_{T\cup\{x\}}\geq\partial_+\mathcal{O}_T$. Indeed:
+\begin{enumerate}
+ \item if $n(t)\leq i$ then $\partial_+\mathcal{O}_{T\cup\{x\}}(t)
+ = \partial_+\mathcal{O}_T(t)= w_{n(t)}$.
+ \item if $n(t)\geq i+1$ and $n(t-c)\leq i$ then $\partial_+\mathcal{O}_{T\cup\{x\}}(t)
+ = w_x\geq w_{n(t)}= \partial_+\mathcal{O}_T(t)$.
+ \item if $n(t-c)\geq i+1$, then $\partial_+\mathcal{O}_{T\cup\{x\}}
+ = w_{n(t-c)}\geq w_{n(t)}=\partial_+\mathcal{O}_T(t)$.
+\end{enumerate}
+
+Let us now consider $b$ and $c$ such that $b\leq c$. Then, using the integral
+representation of $\mathcal{O}(T\cup\{x\},\cdot)$ and $\mathcal{O}(T,\cdot)$, we get:
+\begin{multline*}
+ \mathcal{O}(T\cup\{x\},c)-\mathcal{O}(T\cup\{x\},b)=\int_b^c\partial_+\mathcal{O}_{T\cup\{x\}}(t)dt\\
+ \geq\int_b^c\partial_+\mathcal{O}_T(t)dt=\mathcal{O}(T,c)-\mathcal{O}(T,b)
+\end{multline*}
+Re-ordering the terms, $\mathcal{O}(T\cup\{x\},c)-\mathcal{O}(T,c)\geq
+\mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)$
+which concludes the proof of the lemma.
+\end{proof}
+
+\vspace{0.5em}
+
+\begin{proof}[of Lemma~\ref{lemma:sub}]
+ Let $T\subseteq\neigh{X}$ and $x, y\in\neigh{X}\setminus T$. Using the
+ second-order characterization of submodular functions, it suffices to show
+ that:
+ \begin{displaymath}\label{eq:so}
+ \mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)\geq
+ \mathcal{O}(T\cup\{y, x\},b)-\mathcal{O}(T\cup\{y\},b)
+ \end{displaymath}
+
+ We distinguish two cases based on the relative position of $w_x$ and $w_y$.
+ The following notations will be useful: $S_T^x \defeq \big\{u\in
+ T\text{ s.t. }w_x\leq w_u\big\}$ and $P_T^x\defeq
+ T\setminus S_T^x$.
+
+ \textbf{Case 1:} If $w_y\geq w_x$, then one can
+ write:
+ \begin{gather*}
+ \mathcal{O}(T\cup\{y,x\},b) = \mathcal{O}(P_T^y\cup\{y\},b_1)+
+ \mathcal{O}(S_T^y\cup\{x\},b_2)\\
+ \mathcal{O}(T\cup\{y\},b) = \mathcal{O}(P_T^y\cup\{y\},b_1)
+ + \mathcal{O}(S_T^y,b_2)
+ \end{gather*}
+ where $b_1$ is the fraction of the budget $b$ spent on $P_T^y\cup\{y\}$ and
+ $b_2=b-b_1$.
+
+ Similarly:
+ \begin{gather*}
+ \mathcal{O}(T\cup\{x\},b) = \mathcal{O}(P_T^y, c_1) + \mathcal{O}(S_T^y\cup\{x\},c_2)\\
+ \mathcal{O}(T, b) = \mathcal{O}(P_T^y, c_1) + \mathcal{O}(S_T^y,c_2)
+ \end{gather*}
+ where $c_1$ is the fraction of the budget $b$ spent on $P_T^y$ and $c_2
+ = b - c_1$.
+
+ Note that $b_1\geq c_1$: an optimal solution will first spent as much
+ budget as possible on $P_T^y\cup\{y\}$ before adding elements in
+ $S_T^y\cup\{x\}$.
+
+ In this case:
\begin{displaymath}
- A(S) \geq \frac{1}{2}\left(1-\frac{1}{e}\right) OPT_{NA}
+ \begin{split}
+ \mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)&=
+ \mathcal{O}(S_T^y\cup\{x\},c_2)+\mathcal{O}(S_T^y,c_2)\\
+ &\geq \mathcal{O}(S_T^y\cup\{x\},b_2)+\mathcal{O}(S_T^y,b_2)\\
+ & = \mathcal{O}(T\cup\{y, x\},b)-\mathcal{O}(T\cup\{y\},b)
+ \end{split}
\end{displaymath}
- Finally, we conclude with Proposition~\ref{prop:gap}
+ where the inequality comes from Lemma~\ref{lemma:nd} and
+ $c_2\geq b_2$.
+
+ \textbf{Case 2:} If $w_x > w_y$, we now decompose
+ the solution on $P_T^x$ and $S_T^x$:
+ \begin{gather*}
+ \mathcal{O}(T\cup\{x\},b) = \mathcal{O}(P_T^x\cup\{x\},b_1)
+ + \mathcal{O}(S_T^x,b_2)\\
+ \mathcal{O}(T,b) = \mathcal{O}(P_T^x,c_1)+\mathcal{O}(S_T^x,c_2)\\
+ %\intertext{and}
+ \mathcal{O}(T\cup\{y, x\},b) = \mathcal{O}(P_T^x\cup\{x\},b_1)
+ + \mathcal{O}(S_T^x\cup\{y\},b_2)\\
+ \mathcal{O}(T\cup\{y\},b) = \mathcal{O}(P_T^x,c_1)+\mathcal{O}(S_T^x\cup\{y\},c_2)
+ \end{gather*}
+ with $b_1+b_2=b$, $c_1+c_2=b$ and $b_2\leq c_2$.
+
+ In this case again:
+ \begin{multline*}
+ \mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)=
+ \mathcal{O}(S_T^x,b_2)-\mathcal{O}(S_T^x,c_2)\\
+ \geq \mathcal{O}(S_T^x\cup\{y\},b_2)-\mathcal{O}(S_T^x\cup\{y\},c_2)\\
+ = \mathcal{O}(T\cup\{y, x\},b)-\mathcal{O}(T\cup\{y\},b)
+ \end{multline*}
+ where the inequality uses Lemma~\ref{lemma:nd} and $c_2\geq b_2$.
+
+ In both cases, we were able to obtain the second-order characterization of submodularity. This concludes the proof of the lemma.
\end{proof}
+\vspace{0.5em}
+
+\begin{proof}[of Proposition~\ref{prop:sub}]
+ Let us consider $S$ and $T$ such that $S\subseteq T\subseteq X$ and $x\in
+ X\setminus T$. In particular, note that $\neigh{S}\subseteq\neigh{T}$.
+
+ Let us write $\neigh{S\cup\{x\}}=\neigh{S}\cup R$ with $\neigh{S}\cap
+ R=\emptyset$ and similarly, $\neigh{T\cup\{x\}}=\neigh{T}\cup R'$ with
+ $\neigh{T}\cap R'=\emptyset$. It is clear that $R'\subseteq R$. Writing $R'=\{u_1,\ldots,u_k\}$:
+ \begin{multline*}
+ \mathcal{O}(\neigh{T\cup\{x\}},b)- \mathcal{O}(\neigh{T},b)\\
+ =\sum_{i=1}^k\mathcal{O}(\neigh{T}\cup\{u_1,\ldots u_i\},b)
+ -\mathcal{O}(\neigh{T}\cup\{u_1,\ldots u_{i-1}\},b)\\
+ \leq \sum_{i=1}^k\mathcal{O}(\neigh{S}\cup\{u_1,\ldots u_i\},b)
+ -\mathcal{O}(\neigh{S}\cup\{u_1,\ldots u_{i-1}\},b)\\
+ =\mathcal{O}(\neigh{S}\cup R',b)-\mathcal{O}(\neigh{S},b)
+ \end{multline*}
+ where the inequality comes from the submodularity of $\mathcal{O}(\cdot,b)$ proved in Lemma~\ref{lemma:sub}. This same function is also obviously set-increasing, hence:
+ \begin{multline*}
+ \mathcal{O}(\neigh{S}\cup R',b)-\mathcal{O}(\neigh{S},b)\\
+ \leq \mathcal{O}(\neigh{S}\cup R,b)-\mathcal{O}(\neigh{S},b)\\
+ =\mathcal{O}(\neigh{S\cup\{x\}},b)-\mathcal{O}(\neigh{S},b)
+ \end{multline*}
+ This concludes the proof of the proposition.
+\end{proof}
+
+
+\begin{proof}[of Proposition~\ref{prop:main_result}]
+ We simply note that the content of the outer \textsf{for} loop on line 2 of Algorithm~\ref{alg:comb} is the greedy submodular maximization algorithm of \cite{nemhauser}. Since $\mathcal{O}(\neigh{\cdot}, t)$ is submodular (Proposition~\ref{prop:sub}), this solves the inner $\max$ in \eqref{eq:sub-mod} with an approximation ratio of $(1-1/e)$. The outer \textsf{for} loop then computes the outer $\max$ of \eqref{eq:sub-mod}.
+
+ As a consequence, Algorithm~\ref{alg:comb} computes a $(1-1/e)$-approximate non-adaptive solution. We conclude by applying Proposition~\ref{prop:cr}.
+\end{proof}
+
+\subsection{Parallelization}
+As discussed in the body of the paper, the algorithm can be parallelized across $k$ different machines, each one computing an approximation for a fixed budget $k-t$ in the first stage and $t$ in the second.
+A slightly more sophisticated approach is to consider only $\log n$ splits: $(1,k-1),(2,k-2),\ldots,(2^{\lfloor \log n \rfloor},1)$ and then select the best solution from this set. It is not hard to see that in comparison to the previous approach, this would reduce the approximation guarantee by a factor of at most 2: if the optimal solution is obtained by spending $t$ on the first stage and $k-t$ in the second stage, then since $t \leq 2\cdot2^{\lfloor \log t \rfloor}$ the solution computed for $(2^{\lfloor \log t \rfloor}, k - 2^{\lfloor \log t \rfloor})$ will have at least half that value.
+More generally, for any $\epsilon>0$ one can parallelize over $\log_{1+\epsilon}n$ machines at the cost of losing a factor of $(1+\epsilon)$ in the approximation guarantee.
+
+\subsection{Implementation in MapReduce}
+
+As noted in Section~\ref{sec:comb}, lines 4 to 7 of Algorithm~\ref{alg:comb}
+correspond to the greedy heuristic of \cite{nemhauser} applied to the
+submodular function $f_t(S) \defeq \mathcal{O}\big(\neigh{S}, t\big)$.
+A variant of this heuristic, namely the $\varepsilon$-greedy heuristic,
+combined with the \textsc{Sample\&Prune} method of \cite{mr} allows us to write
+a MapReduce version of Algorithm~\ref{alg:comb}. The resulting algorithm is
+described in Algorithm~\ref{alg:combmr}
+
+\begin{algorithm}
+ \caption{Combinatorial algorithm, MapReduce}
+ \label{alg:combmr}
+ \algsetup{indent=2em}
+ \begin{algorithmic}[1]
+ \STATE $S\leftarrow \emptyset$
+ \FOR{$t=1$ \TO $k-1$}
+ \STATE $S_t\leftarrow \emptyset$
+ \FOR{$i=1$ \TO $\log_{1+\varepsilon}\Delta$}
+ \STATE $U\leftarrow X$, $S'\leftarrow \emptyset$
+ \WHILE{$|U|>0$}
+ \STATE $R\leftarrow$ sample from $U$ w.p. $\min\left(1,
+ \frac{\ell}{|U|}\right)$
+ \WHILE{$|R|>0$ \OR $|S_t\cup S'|< k$}
+ \STATE $x\leftarrow$ some element from $R$
+ \IF{$\nabla f_t(S_t\cup S', x)\geq\frac{\Delta}{(1+\varepsilon)^i}$}
+ \STATE $S'\leftarrow S'\cup\{x\}$
+ \ENDIF
+ \STATE $R\leftarrow R\setminus\{x\}$
+ \ENDWHILE
+ \STATE $S_t\leftarrow S_t\cup S'$
+ \STATE $U\leftarrow\{x\in U\,|\, \nabla f_t(S_t,
+ x)\geq\frac{\Delta}{(1+\varepsilon)^i}\}$
+ \ENDWHILE
+ \ENDFOR
+ \IF{$\mathcal{O}(\neigh{S_t},t)>\mathcal{O}(\neigh{S},k-|S|)$}
+ \STATE $S\leftarrow S_t$
+ \ENDIF
+ \ENDFOR
+ \RETURN $S$
+ \end{algorithmic}
+\end{algorithm}
+
+We denoted by $\nabla f_t(S, x)$ the marginal increment of $x$ to the set $S$
+for the function $f_t$, $\nabla f_t(S, x) = f_t(S\cup\{x\}) - f_t(S)$.
+$\Delta$ is an upper bound on the marginal contribution of any element. In our
+case, $\Delta = \max_{u\in\neigh{X}} w_u$ provides such an upper bound. The
+sampling in line 7 selects a small enough number of elements that the
+\texttt{while} loop from lines 8 to 14 can be executed on a single machine.
+Furthermore, lines 7 and 16 can be implemented in one round of MapReduce each.
+The approximation ratio of Algorithm~\ref{alg:combmr} is
+$1-\frac{1}{e}-\varepsilon$. The proof of this result as well as the optimal
+choice of $\ell$ follow from Theorem 10 in \cite{mr}.
diff --git a/paper/sections/combinatorial.tex b/paper/sections/combinatorial.tex
deleted file mode 100644
index b744564..0000000
--- a/paper/sections/combinatorial.tex
+++ /dev/null
@@ -1,299 +0,0 @@
-\subsection{Submodularity of the problem}
-Let $\mathcal{U} = \{(v_u,c_u);\, u\in\neigh{X}\}$ with $v_u=p_uw_u$ and
-$c_u=p_u$. Let $b\in\reals^+$ and $A\subseteq\mathcal{U}$, we denote by
-$\mathcal{O}(A,b)$ the optimal solution to:
-\begin{displaymath}
- \begin{split}
- &\max_{q\in[0,1]^n}\sum_{u\in\neigh{X}}q_uv_u\\
- &\text{s.t. }\sum_{u\in\neigh{X}}q_uc_u\leq b\text{ and } q_u=0\text{ if
- }u\notin A
-\end{split}
-\end{displaymath}
-so that the optimization problem \eqref{eq:relaxed-multi} for relaxed
-non-adaptive policies can be written:
-\begin{equation}\label{eq:sub}
- \begin{split}
- &\max_{S\subseteq X}\mathcal{O}\big(\neigh{S},k-|S|\big)\\
- &\text{s.t. }|S|\leq k
- \end{split}
-\end{equation}
-
-In this section, we will show how \eqref{eq:sub} can be reduced to a submodular
-maximization problem, leading to a fast combinatorial algorithm for the
-adaptive seeding model in the voter model.
-
-\begin{lemma}\label{lemma:nd}
- Let $A\subseteq\mathcal{U}$ and $x=(v,c)\in\mathcal{U}\setminus A$, then
- the function $b\mapsto \mathcal{O}(A\cup\{x\},b)-\mathcal{O}(A,b)$ is
- non-decreasing over $\reals^+$.
-\end{lemma}
-
-\begin{proof}
-\emph{W.l.o.g} we can rename and order the pairs in $A$ so that:
-\begin{displaymath}
-\frac{v_1}{c_1}\geq \frac{v_2}{c_2}\geq\ldots\geq\frac{v_m}{c_m}.
-\end{displaymath}
-Then, $\mathcal{O}(A,b)$ has the following simple piecewise linear expression:
-\begin{equation}\label{eq:pw}
- \mathcal{O}(A,b) =
- \begin{cases}
- b\frac{v_1}{c_1}&\text{if }0\leq b<c_1\\
- \sum_{k=1}^{i-1}v_k + \left(b-\sum_{k=1}^{i-1}c_k\right)\frac{v_i}{c_i}
- &\text{if } \sum_{k=1}^{i-1}c_k\leq b< \sum_{k=1}^ic_k\\
- \sum_{k=1}^m v_k&\text{if } b\geq\sum_{i=1}^m c_k
-
- \end{cases}
-\end{equation}
-
-Let us define for $t\in\reals^+$, $n(t)\defeq \inf\Big\{i\text{ s.t.
-} \sum_{k=1}^i c_k > t\Big \}$ with $n(t)=+\infty$ when the set is empty. In
-particular, note that $x\mapsto n(t)$ is non-decreasing. Denoting
-$\partial_+\mathcal{O}_A$ the right derivative of $\mathcal{O}(A,\cdot)$, one
-can write $\partial_+\mathcal{O}_A(t)
-=\frac{v_{n(t)}}{c_{n(t)}}$, with the convention that
-$\frac{v_\infty}{c_\infty} = 0$.
-
-Writing $i \defeq \sup\Big\{j\text{ s.t.
-} \frac{v_j}{c_j}\geq\frac{v}{c}\Big\}$, it is easy to see that
-$\partial_+\mathcal{O}_{A\cup\{x\}}\geq\partial_+\mathcal{O}_A$. Indeed:
-\begin{enumerate}
- \item if $n(t)\leq i$ then $\partial_+\mathcal{O}_{A\cup\{x\}}(t)
- = \partial_+\mathcal{O}_A(t)= \frac{v_{n(t)}}{c_{n(t)}}$.
- \item if $n(t)\geq i+1$ and $n(t-c)\leq i$ then $\partial_+\mathcal{O}_{A\cup\{x\}}(t)
- = \frac{v}{c}\geq\frac{v_{n(t)}}{c_{n(t)}}= \partial_+\mathcal{O}_A(t)$.
- \item if $n(t-c)\geq i+1$, then $\partial_+\mathcal{O}_{A\cup\{x\}}
- = \frac{v_{n(t-c)}}{c_{n(t-c)}}\geq
- \frac{v_{n(t)}}{c_{n(t)}}=\partial_+\mathcal{O}_A(t)$.
-\end{enumerate}
-
-Let us now consider $b$ and $c$ such that $b\leq c$. Then, using the integral
-representation of $\mathcal{O}(A\cup\{x\},\cdot)$ and $\mathcal{O}(A,\cdot)$, we get:
-\begin{multline*}
- \mathcal{O}(A\cup\{x\},c)-\mathcal{O}(A\cup\{x\},b)=\int_b^c\partial_+\mathcal{O}_{A\cup\{x\}}(t)dt\\
- \geq\int_b^c\partial_+\mathcal{O}_A(t)dt=\mathcal{O}(A,c)-\mathcal{O}(A,b)
-\end{multline*}
-Re-ordering the terms, $\mathcal{O}(A\cup\{x\},c)-\mathcal{O}(A,c)\geq
-\mathcal{O}(A\cup\{x\},b)-\mathcal{O}(A,b)$
-which concludes the proof of the lemma.
-\end{proof}
-
-\begin{lemma}\label{lemma:sub}
- Let $b\in\reals^+$, then the function $A\mapsto \mathcal{O}(A,b)$ is submodular.
-\end{lemma}
-
-\begin{proof}
- Let $A\subseteq\mathcal{U}$ and $x, y\in\mathcal{U}\setminus A$. Using the
- second-order characterization of submodular functions, it suffices to show
- that:
- \begin{equation}\label{eq:so}
- \mathcal{O}(A\cup\{x\},b)-\mathcal{O}(A,b)\geq
- \mathcal{O}(A\cup\{y\}\cup\{x\},b)-\mathcal{O}(A\cup\{y\},b)
- \end{equation}
-
- Let us write $x=(v_x, c_x)$ and $y=(v_y,c_y)$. We distinguish two cases
- based on the relative position of $\frac{v_x}{c_x}$ and $\frac{v_y}{c_y}$.
- The following notations will be useful: $S_A^x \defeq \big\{(v,c)\in
- A\text{ s.t. }\frac{v_x}{c_x}\leq\frac{v}{c}\big\}$ and $P_A^x\defeq
- A\setminus S_A^x$.
-
- \textbf{Case 1:} If $\frac{v_y}{c_y}\geq \frac{v_x}{c_x}$, then one can
- write:
- \begin{gather*}
- \mathcal{O}(A\cup\{y\}\cup\{x\},b) = \mathcal{O}(P_A^y\cup\{y\},b_1)+
- \mathcal{O}(S_A^y\cup\{x\},b_2)\\
- \mathcal{O}(A\cup\{y\},b) = \mathcal{O}(P_A^y\cup\{y\},b_1)
- + \mathcal{O}(S_A^y,b_2)
- \end{gather*}
- where $b_1$ is the fraction of the budget $b$ spent on $P_A^y\cup\{y\}$ and
- $b_2=b-b_1$.
-
- Similarly:
- \begin{gather*}
- \mathcal{O}(A\cup\{x\},b) = \mathcal{O}(P_A^y, c_1) + \mathcal{O}(S_A^y\cup\{x\},c_2)\\
- \mathcal{O}(A, b) = \mathcal{O}(P_A^y, c_1) + \mathcal{O}(S_A^y,c_2)
- \end{gather*}
- where $c_1$ is the fraction of the budget $b$ spent on $P_A^y$ and $c_2
- = b - c_1$.
-
- Note that $b_1\geq c_1$: an optimal solution will first spent as much
- budget as possible on $P_A^y\cup\{y\}$ before adding elements in
- $S_A^y\cup\{x\}$. We can now rewrite \eqref{eq:so}:
- \begin{displaymath}
- \mathcal{O}(S_A^y\cup\{x\},c_2)+\mathcal{O}(S_A^y,c_2)\geq
- \mathcal{O}(S_A^y\cup\{x\},b_2)+\mathcal{O}(S_A^y,b_2)
- \end{displaymath}
- with $c_2\geq b_2$. This inequality is true by Lemma~\ref{lemma:nd}.
-
- \textbf{Case 2:} If $\frac{v_x}{c_x}\geq \frac{v_y}{c_y}$, we now decompose
- the solution on $P_A^x$ and $S_A^x$:
- \begin{gather*}
- \mathcal{O}(A\cup\{x\},b) = \mathcal{O}(P_A^x\cup\{x\},b_1)
- + \mathcal{O}(S_A^x,b_2)\\
- \mathcal{O}(A,b) = \mathcal{O}(P_A^x,c_1)+\mathcal{O}(S_A^x,c_2)
- \intertext{and}
- \mathcal{O}(A\cup\{y\}\cup\{x\},b) = \mathcal{O}(P_A^x\cup\{x\},b_1)
- + \mathcal{O}(S_A^x\cup\{y\},b_2)\\
- \mathcal{O}(A\cup\{y\},b) = \mathcal{O}(P_A^x,c_1)+\mathcal{O}(S_A^x\cup\{y\},c_2)
- \end{gather*}
- with $b_1+b_2=b$, $c_1+c_2=b$ and $b_2\leq c_2$. The equation \eqref{eq:so}
- can be rewritten:
- \begin{displaymath}
- \mathcal{O}(S_A^x,b_2)-\mathcal{O}(S_A^x,c_2)\geq
- \mathcal{O}(S_A^x\cup\{u\},b_2)-\mathcal{O}(S_A^x\cup\{y\},c_2)
- \end{displaymath}
- Reordering the terms, we get:
- \begin{displaymath}
- \mathcal{O}(S_A^x\cup\{y\},c_2) -\mathcal{O}(S_A^x,c_2)\geq
- \mathcal{O}(S_A^x\cup\{u\},b_2)-\mathcal{O}(S_A^x,b_2)
- \end{displaymath}
- with $c_2\geq b_2$. This is again true by Lemma~\ref{lemma:nd}.
-\end{proof}
-
-\begin{proposition}\label{prop:sub}
- Let $b\in\mathbf{R}^+$, then the function
- $S\mapsto\mathcal{O}(\neigh{S},b)$ is submodular over $2^X$.
-\end{proposition}
-
-\begin{proof}
- Let us consider $S$ and $T$ such that $S\subseteq T\subseteq X$ and $x\in
- X\setminus T$. In particular, note that $\neigh{S}\subseteq\neigh{T}$.
-
- Let us write $\neigh{S\cup\{x\}}=\neigh{S}\cup R$ with $\neigh{S}\cap
- R=\emptyset$ and similarly, $\neigh{T\cup\{x\}}=\neigh{T}\cup R'$ with
- $\neigh{T}\cap R'=\emptyset$. Using these notations, it is clear that
- $R'\subseteq R$. Let us enumerate the elements in $R'$ using an arbitrary
- order: $R'=\{u_1,\ldots,u_k\}$. Then one can write:
- \begin{displaymath}
- \begin{split}
- \mathcal{O}(\neigh{T\cup\{x\}},b)&=\mathcal{O}(\neigh{T}\cup R',b)\\
- &=\sum_{i=1}^k\mathcal{O}(\neigh{T}\cup\{u_1,\ldots u_i\},b)
- -\mathcal{O}(\neigh{T}\cup\{u_1,\ldots u_{i-1}\},b)\\
- &\leq \sum_{i=1}^k\mathcal{O}(\neigh{S}\cup\{u_1,\ldots u_i\},b)
- -\mathcal{O}(\neigh{S}\cup\{u_1,\ldots u_{i-1}\},b)\\
- &=\mathcal{O}(\neigh{S}\cup R',b)-\mathcal{O}(\neigh{S},b)
- \end{split}
- \end{displaymath}
- where the inequality comes from the submodularity of
- $A\mapsto\mathcal{O}(A,b)$ proved in Lemma~\ref{lemma:sub}. This same
- function is also obviously set-increasing, hence:
- \begin{displaymath}
- \begin{split}
- \mathcal{O}(\neigh{S}\cup R',b)-\mathcal{O}(\neigh{S},b)
- &\leq \mathcal{O}(\neigh{S}\cup R,b)-\mathcal{O}(\neigh{S},b)\\
- &=\mathcal{O}(\neigh{S\cup\{x\}},b)-\mathcal{O}(\neigh{S},b)
- \end{split}
- \end{displaymath}
- This concludes the proof of the proposition.
-\end{proof}
-
-Using Proposition~\ref{prop:sub}, we can finally reduce \eqref{eq:sub} to
-a submodular optimization problem by treating $|S|$ as a variable to optimize.
-Specifically, \eqref{eq:sub} is equivalent to the following problem:
-\begin{equation}\label{eq:sub2}
-\begin{split}
- \max_{1\leq t\leq k}\max_{|S|\leq k-t}\mathcal{O}(\neigh{S},t)
-\end{split}
-\end{equation}
-
-\begin{algorithm}
- \caption{Combinatorial algorithm}
- \label{alg:comb}
- \algsetup{indent=2em}
- \begin{algorithmic}[1]
- \STATE $S\leftarrow \emptyset$
- \FOR{$t=1$ \TO $k-1$}
- \STATE $S_t\leftarrow \emptyset$
- \FOR{$i=1$ \TO $k-t$}
- \STATE $x^*\leftarrow\argmax_{x\in
- X\setminus S_t}\mathcal{O}(\neigh{S_t\cup\{x\}},t)
- -\mathcal{O}(\neigh{S_t},t)$\label{line:argmax}
- \STATE $S_t\leftarrow S_t\cup\{x^*\}$
- \ENDFOR
- \IF{$\mathcal{O}(\neigh{S_t},t)>\mathcal{O}(\neigh{S},k-|S|)$}
- \STATE $S\leftarrow S_t$
- \ENDIF
- \ENDFOR
- \RETURN $S$
- \end{algorithmic}
-\end{algorithm}
-
-\begin{proposition}
- Let $S$ be the set computed by Algorithm~\ref{alg:comb} and let us denote
- by $A(S)$ the value of the adaptive policy which selects $S$ on the first
- day. Then:
- \begin{displaymath}
- A(S) \geq \frac{1}{2}\left(1-\frac{1}{e}\right)OPT.
- \end{displaymath}
- Furthermore, Algorithm~\ref{alg:comb} runs in time $O\left(n\log
- n+k^2m\min(\frac{k}{p_\text{min}},n)\right)$, where $m=|X|$, $n=|\neigh{X}|$
- and $p_\text{min} =\min\{p_u,u\in\neigh{X}\}$.
-\end{proposition}
-
-\begin{proof}
- For each realization of the neighbors $R$, let us define by $X_R$ the
- random set which includes each $u\in R$ with probability $q_u$ on the
- second day and:
- \begin{displaymath}
- \tilde{X}_R=
- \begin{cases}
- X_R&\text{if } |X_R|\leq t\\
- \emptyset&\text{otherwise}
- \end{cases}
- \end{displaymath}
- where $t = k-|S|$. $\tilde{X}_R$ is a random variable over the feasible
- solutions on the second day. As a consequence:
- \begin{displaymath}
- A(S) \geq \sum_{R\subseteq\neigh{S}} p_R\mathbb{E}\big[f(\tilde{X}_R)\big]
- \end{displaymath}
- Let us define by $Y$ the random set which includes each $u\in\neigh{X}$
- with probability $p_uq_u$ and:
- \begin{displaymath}
- \tilde{Y}=
- \begin{cases}
- Y&\text{if } |Y|\leq t\\
- \emptyset&\text{otherwise}
- \end{cases}.
- \end{displaymath}
- It is easy to see that $\tilde{X}_R$ is the conditional expectation of
- $\tilde{Y}$ given
- $R$. Hence:
- \begin{displaymath}
- \sum_{R\subseteq\neigh{S}} p_R\mathbb{E}\big[f(\tilde{X}_R)\big]
- = \mathbb{E}\big[f(\tilde{Y})\big]
- \end{displaymath}
- But:
- \begin{align*}
- \mathbb{E}\big[f(\tilde{Y})\big]
- &=\sum_{T\subseteq\neigh{X}}\mathbb{P}\big[\tilde{Y}=T\big]f(T)
- = \sum_{\substack{T\subseteq\neigh{X}\\|T|\leq
- t}}\mathbb{P}\big[Y=T\big]f(T)\\
- &= \mathbb{E}\big[f(Y)\big]-\sum_{\substack{T\subseteq\neigh{X}\\|T|>
- t}}\mathbb{P}\big[Y=T\big]f(T)
- \end{align*}
- Roughly:
- \begin{displaymath}
- \sum_{\substack{T\subseteq\neigh{X}\\|T|>
- t}}\mathbb{P}\big[Y=T\big]f(T)\leq
- \frac{1}{2}\mathbb{E}\big[f(Y)\big]
- \end{displaymath}
- Combining the above inequalities we get:
- \begin{displaymath}
- A(S)\geq \frac{1}{2} \mathbb{E}\big[f(Y)\big]
- \end{displaymath}
- But $\mathbb{E}\big[f(Y)\big]$ is precisely the value of the non-adaptive
- solution computed by Algorithm~\ref{alg:comb} which is
- a $\left(1-\frac{1}{e}\right)$ approximation of the
- optimal non adaptive solution. Hence:
- \begin{displaymath}
- A(S) \geq \frac{1}{2}\left(1-\frac{1}{e}\right) OPT_{NA}
- \end{displaymath}
- Finally, we conclude with Proposition~\ref{prop:gap}
-\end{proof}
-
-\subsection{From expectation to high probability}
-
-\subsection{Questions}
-
-\begin{itemize}
- \item Can it be parallelized (map-reduce)?
-\end{itemize}
diff --git a/paper/sections/experiments.log b/paper/sections/experiments.log
deleted file mode 100644
index e69de29..0000000
--- a/paper/sections/experiments.log
+++ /dev/null
diff --git a/paper/sections/experiments.tex b/paper/sections/experiments.tex
index 1a085d1..201066a 100644
--- a/paper/sections/experiments.tex
+++ b/paper/sections/experiments.tex
@@ -1,14 +1,88 @@
-In this section, we give experimental evidence of the validity of our approach. Specifically, we show that our algorithms obtain significant improvement over standard influence maximization, that these improvements are observed in various scenarios (different verticals, different models of influence propagation), and that our approach is efficient in terms of running-time and scalable to large social networks.
+In this section we validate the adaptive seeding approach through experimentation.
+%In the sections above we showed how to design algorithms that have provable performance guarantees for adaptive seeding, which and even parallelizable.
+Specifically, we show that our algorithms for adaptive seeding obtain
+significant improvement over standard influence maximization, that these
+improvements are robust to changes in environment variables, and that our
+approach is efficient in terms of running-time and scalable to large social
+networks.
-\subsection{Data description}
-Our main testbed is the friendship graph of the social network service Facebook~\cite{facebook}. We start by observing that an efficient campaign run on a social network will primarily target users who have already expressed interests in the topic being promoted. In the adaptive seeding terminology, these users constitute the set of initial nodes that can be targeted during the seeding phase.
+\subsection{Experimental setup}
+We tested our algorithms on three types of datasets. Each of them allows us to
+experiment on a different aspect of the adaptive seeding problem. The Facebook
+Pages dataset that we collected ourselves has a central place in our
+experiments since it is the one which is closet to actual applications of
+adaptive seeding.
-In order to reproduce this process, we selected ten different verticals (topics) and for each of these verticals, a person, an institution or an entity whose associated Facebook page is regularly used for promotional posts related to this topic. On each of these pages, we selected a recent post (posted no later than January 2014) with approximately 1,000 \emph{likes}. The set of users who liked those posts constitute our initial sets of nodes.
+\textbf{Synthetic networks.} Using standard models of social networks we
+generated large-scale graphs to model the social network. To emulate the
+process of users following a topic (the core set $X$) we sampled subsets of
+nodes at random, and applied our algorithms on the sample and their neighbors.
+The main advantage of these data sets is that they allow us to generate graphs
+of arbitrary sizes and experiment with various parameters that govern the
+structure of the graph. The disadvantages are that users who follow a topic
+are not necessarily random samples, and that social networks often have
+structural properties that are not captured in generative models.
-We then crawled the social network of these sets: for each user, we collected her list of friends, and the degrees (number of friends) of these friends. Table~\ref{tab:data} summarizes statistics about the collected data.
+\textbf{Real networks.} We used publicly available data sets of real social
+networks available at \cite{snapnets}. As for synthetic networks, we used
+a random sample of nodes to emulate users who follow a topic, which is the main
+disadvantage of this approach. The advantage however, is that such datasets
+contain an entire network which allows testing different propagation
+parameters.
+
+\textbf{Facebook Pages.} We collected data from several Facebook Pages, each
+associated with a commercial entity that uses the Facebook page to communicate
+with its followers. For each page, we selected a post and then collected data
+about the users who expressed interest (``liked'') the post and their friends.
+The advantage of this data set is that it is highly representative of the
+scenario we study here. Campaigns run on a social network will primarily target
+users who have already expressed interests in the topic being promoted. The
+main disadvantage of this method is that such data is extremely difficult to
+collect due to the crawling restrictions that Facebook applies and gives us
+only the 2-hop neighborhood around a post. This makes it difficult to
+experiment with different propagation parameters. Fortunately, as we soon
+discuss, we were able to circumvent some of the crawling restrictions and
+collect large networks, and the properties of the voter influence model are
+such that these datasets suffice to accurately account for influence
+propagation in the graph.\newline
-\begin{table}[h!]
+
+\begin{figure}[t]
+ \centering
+ \includegraphics[width=0.4\textwidth]{images/para.pdf}
+ \caption{Comparison of the average degree of the core set users and the
+ average degree of their friends.}
+ \label{fig:paradox}
+ \vspace{-10pt}
+\end{figure}
+
+\noindent\textbf{Data collection.}
+We selected Facebook Pages in different verticals (topics). Each page is
+operated by an institution or an entity whose associated Facebook Page is
+regularly used for promotional posts related to this topic. On each of these
+pages, we selected a recent post (posted no later than January 2014) with
+approximately 1,000 \emph{likes}. The set of users who liked those posts
+constitute our core set. We then crawled the social network of
+these sets: for each user, we collected her list of friends, and the degrees
+(number of friends) of these friends.\newline
+
+
+\noindent\textbf{Data description.} Among the several verticals we collected,
+we select eight of them for which we will report our results. We obtained
+similar results for the other ones. Table~\ref{tab:data} summarizes statistics
+about the selected verticals. We note that depending on the privacy settings
+of the core set users, it was not always possible to access their list of
+friends. We decided to remove these users since their ability to spread
+information could not be readily determined. This effect, combined with various
+errors encountered during the data collection, accounts for an approximate 15\%
+reduction between the users who liked a post and the number of users in the
+datasets we used. Following our discussion in the introduction, we observe
+that on average, the degrees of core set users is much lower than the degrees of
+their friends. This is highlighted on Figure~\ref{fig:paradox} and justifies
+our approach.
+
+\begin{table}[t]
\small
\centering
\setlength{\tabcolsep}{3pt}
@@ -18,104 +92,403 @@ We then crawled the social network of these sets: for each user, we collected he
\midrule
Charity & Kiva & 978 & 131334 \\%& 134.29 & 1036.26\\
Travel & Lonely Planet & 753 & 113250 \\%& 150.40 & 898.50\\
- Public Action & LaManifPourTous & 1041 & 97959 \\%& 94.10 & 722.02\\
+ %Public Action & LaManifPourTous & 1041 & 97959 \\%& 94.10 & 722.02\\
Fashion & GAP & 996 & 115524 \\%& 115.99 & 681.98\\
Events & Coachella & 826 & 102291 \\%& 123.84 & 870.16\\
Politics & Green Party & 1044 & 83490 \\%& 79.97 & 1053.25\\
Technology & Google Nexus & 895 & 137995 \\%& 154.19 & 827.84\\
News & The New York Times & 894 & 156222 \\%& 174.74 & 1033.94 \\
- Consumption & Peet's & 776 & 56268 \\%& 72.51 & 520.47\\
+ %Consumption & Peet's & 776 & 56268 \\%& 72.51 & 520.47\\
Entertainment & HBO & 828 & 108699 \\%& 131.28 & 924.09\\
\bottomrule
\end{tabular}
-\caption{Dataset statistics. $m$: number of initial users, $n$: number of friends of the initial users.}
+\caption{Dataset statistics. $m$: number of users in the core set, $n$: number
+of friends of core set users.}
%$S$: avg. degree of an initial user, $F$: avg. degree of a friend of an initial user.}
\label{tab:data}
+ \vspace{-10pt}
\end{table}
-We note that depending on the privacy settings of the initial users, it was not always possible to access their list of friends. We decided to remove these users since their ability to spread information could not be readily determined. This effect, combined with various errors encountered during the data collection, accounts for an approximate 15\% reduction between the users who liked a post and the number of users in the datasets we used.
-
-\begin{figure}[t!]
- \centering
- \includegraphics[scale=0.7]{images/para.pdf}
- \caption{Comparison of the average degree of the initial users and the average degree of their friends.}
- \label{fig:para}
-\end{figure}
-
-Following our discussion in the introduction, we observe that on average, the degrees of initial users is much lower than the degrees of their friends. This is highlighted on Figure~\ref{fig:para} and justifies our approach.
-
\subsection{Performance of Adaptive Seeding}
-\label{sec:performance}
+\label{sec:performance} For a given problem instance with a budget of $k$ we
+applied the adaptive seeding algorithm (the combinatorial version). Recall from
+Section~\ref{sec:model} that performance is defined as the expected influence
+that the seeder can obtain by optimally selecting users on the second stage,
+where \emph{influence} is defined as the sum of the degrees of the selected
+users. We tested our algorithm against the following benchmarks:
-We start by comparing the performance of adaptive seeding to standard influence maximization approaches. The following heuristics were considered:
\begin{itemize}
- \item \emph{Max degree:} selecting as many initial users as the budget permits, in decreasing order of degrees.
- \item \emph{Random:} selecting a random sample of initial users of the size permitted by the budget.
- \item \emph{Random friend:} spending half of the budget to select a random sample of initial users. For each of the selected random users, we then select randomly one of her friend (hence spending the total budget overall).
+ \item \emph{Random Node} (\textsf{RN}): we randomly select $k$ users from
+ the core set. This is a typical benchmark in comparing influence
+ maximization algorithms~\cite{KKT03}.
+ \item \emph{Influence Maximization} (\textsf{IM}): we apply the optimal
+ influence maximization algorithm on the core set. This is the naive
+ application of influence maximization. For the voter model, when the
+ propagation time is polynomially large in the network size, the optimal
+ solution is to simply take the $k$ highest degree nodes~\cite{even-dar}.
+ We study the case of bounded time horizons in Section~\ref{sec:inf}.
+ \item \emph{Random Friend} (\textsf{RF}): we implement a naive two-stage approach:
+ randomly select $k/2$ nodes from the core set, and for each
+ node select a random neighbor (hence spending the budget of $k$ rewards
+ overall). This method was recently shown to outperform standard
+ influence maximization when the core set is random~\cite{LS13}.
\end{itemize}
-\begin{figure*}[p]
- \centerline{\includegraphics{images/perf.pdf}}
- \caption{Performance of adaptive seeding compared to other influence maximization approaches. The influence, on the $y$-axis is displayed in logarithmic scale and is measured by the sum of the degrees of the selected users~\eqref{eq:voter}. The budget is represented on the $x$-axis as a fraction of the seed size.}
+
+\begin{figure*}[t]
+ \centerline{\includegraphics[width=0.99\textwidth]{images/perf2.pdf}}
+ \vspace{-5pt}
+ \caption{\small{Performance of adaptive seeding compared to other influence
+ maximization approaches. The horizontal axis represents the budget used as
+a fraction of the size of the core set. The vertical axis is the
+expected influence reachable by optimally selecting nodes on the second stage.}}
\label{fig:performance}
+ \vspace{-10pt}
\end{figure*}
-Figure~\ref{fig:performance} compares the performance of \emph{Adaptive seeding}, our own approach, to the afore-mentioned approaches for all the verticals we collected. For this figure, the probability of initial users' friends joining on the second day was set to one. Section~\ref{sec:robustness} explores other influence propagation scenarios.
+\subsection{Performance on Facebook Pages} Figure~\ref{fig:performance}
+compares the performance of \emph{adaptive seeding}, our own approach, to the
+afore-mentioned approaches for all the verticals we collected. In this first
+experiment we made simplifying assumptions about the parameters of the model.
+The first assumption is that all probabilities in the adaptive seeding model
+are equal to one. This implicitly assumes that every friend of a user who
+followed a certain topic is interested in promoting the topic given a reward.
+Although this is a strong assumption that we will revisit, we note that the
+probabilities can be controlled to some extent by the social networking service
+on which the campaign is being run by showing prominently the campaign material
+(sponsored links, fund-raising banners, etc.). The second assumption is that
+the measure of influence is the sum of the degrees of the selected set. This
+measure is an appealing proxy as it is known that in the voter model, after
+polynomially many time steps, the influence of each node is proportional to its
+degree with high probability~\cite{even-dar}. Since the influence process
+cannot be controlled by the designer, the assumption is often that the
+influence process runs until it stabilizes (in linear thresholds and
+independent cascades for example, the process terminates after a linear number
+of steps~\cite{KKT03}). We perform a set of experiments for different time
+horizons in Section~\ref{sec:inf}.
+
+
-We note that the \emph{Random friend} heuristic significantly outperforms \emph{Max degree}. Using the same budget, the degree gain induced by moving from initial users to their friends is such that selecting at random among the initial users' friends already does better than the best heuristic restricted only on initial users. Using \emph{Adaptive seeding} to optimize the choice of initial users based on their friends' degrees then results in an order of magnitude increase over \emph{Random friend}, consistently for all the verticals.
+%One can conside
+%
+%In the next sections we perform tests with different parameters of probabilities and time horizons. We
+%
+%\begin{itemize}
+%\item In the first experiment represented the probability of initial users'
+%friends joining on the second day was set to one, i.e. it assumes that every friend of a user who followed a certain topic is interested in promoting the topic given a reward.
+%In the following sections we analyze cases when the probabilities vary.
+%We note however that this assumption is not unrealistic since the probabilities can be controlled to some extent by the social networking service on which the campaign is being run. By showing prominently the campaign
+%material (sponsored links, fund-raising banners, etc.).
+%%, the conversion rate can be increased beyond what would happen via regular word-of-mouth propagation.
+%
+%\item
+%The measure of influence reported in this experiment, is the sum of the degrees of the selected set.
+%This measure is an appealing proxy due to known results about influence maximization. It is shown in~\cite{ES08} that in the voter model, after polynomially many time steps the influence of each node is proportional to its degree, with high probability. Since the influence process cannot be controlled by the designer, the assumption is often that the influence process runs until it stabilizes (in linear thresholds and independent cascades for example, the process terminates after a linear number of steps~\cite{KKT03}). We perform a set of experiments for different time horizons in the following section.
+%\end{itemize}
-We also compare the relative improvements of \emph{Adaptive seeding} over \emph{Max degree} across the different verticals. The results are shown in Figure~\ref{fig:compare}.
-\begin{figure}[ht!]
- \centerline{ \includegraphics[scale=0.7]{images/comp.pdf} }
+%\noindent\textbf{Performance.}
+It is striking to see how well adaptive seeding does in comparison to other
+methods. Even when using a small budget (0.1 fraction of the core set, which
+in these cases is about 100 nodes), adaptive seeding improves influence by
+a factor of at least 10, across all verticals. To confirm this, we plot the
+relative improvements of \emph{adaptive seeding} over \textsf{IM} in aggregate
+over the different pages. The results are shown in Figure~\ref{fig:compare}.
+This dramatic improvement is largely due to the friendship paradox phenomenon
+that adaptive seeding leverages. Returning to Figure~\ref{fig:performance}, it
+is also interesting to note that the \textsf{RF} heuristic significantly
+outperforms the standard \textsf{IM} benchmark. Using the same budget, the
+degree gain induced by moving from the core set to its neighborhood is such
+that selecting at random among the core set users' friends already does better
+than the best heuristic restricted only on the core set. Using \emph{adaptive
+seeding} to optimize the choice of core set users based on their friends'
+degrees then results in an order of magnitude increase over \textsf{RF},
+consistently for all the pages.\newline
+
+\begin{figure}[t]
+ \vspace{-10pt}
+ \centerline{ \includegraphics[width=0.4\textwidth]{images/comp2.pdf} }
\vspace{-10pt}
- \caption{Ratio of the performance of adaptive seeding to max. degree for several verticals.}
+ \caption{Ratio of the performance of adaptive seeding to \textsf{IM}. Bars represents the mean improvement across all verticals, and the ``error bar'' represents the range of improvement across verticals.}
\label{fig:compare}
+ \vspace{-15pt}
\end{figure}
-\subsection{Robustness to spread of influence frictions}
+\subsection{The Effect of the Probabilistic Model}
\label{sec:robustness}
-The results presented in Section~\ref{sec:performance} were computed assuming a probability of propagation between the two stages of adaptive seeding equal to one. Estimating this probability is a research problem on its own; however, we note that it can be controlled to some extent by the social networking service on which the campaign is being run. By showing prominently the campaign material (sponsored links, fund-raising banners, etc.), the conversion rate can be increased beyond what would happen via regular word-of-mouth propagation.
+The results presented in Section~\ref{sec:performance} were computed assuming
+the probabilities in the adaptive seeding model are one. We now describe
+several experiments we performed with the Facebook Pages data set that test the
+advantages of adaptive seeding under different probability models.\newline
+
+%Estimating this probability is a research problem on its own; however,
+%we note that it can be controlled to some extent by the social networking
+%service on which the campaign is being run. By showing prominently the campaign
+%material (sponsored links, fund-raising banners, etc.), the conversion rate can
+%be increased beyond what would happen via regular word-of-mouth propagation.
+%\newline
-Figure~\ref{fig:prob} shows the impact of the probability of propagation between the two stages. For several values of $p$, we computed the performance of \emph{Adaptive seeding} when each friend of a seeded initial user joins during the second stage independently with probability $p$. We see that even with $p=0.01$, \emph{Adaptive seeding} still outperforms \emph{Max degree}. As $p$ increases, the performance of \emph{Adaptive seeding} quickly increases and reaches $80\%$ of the values of Figure~\ref{fig:performance} at $p=0.5$.
+\noindent\textbf{Impact of the Bernouilli parameter.}
+Figure~\ref{fig:prob} shows the impact of the probability of nodes realizing in
+the second stage. We computed the performance of \emph{adaptive seeding} when
+each friend of a seeded user in the core set joins during the second stage
+independently with probability $p$, using different values of $p$. We call $p$
+the \emph{Bernouilli} parameter, since the event that a given user joins on the
+second stage of adaptive seeding is governed by a Bernouilli variable of
+parameter $p$. We see that even with $p=0.01$, \emph{adaptive seeding} still
+outperforms \textsf{IM}. As $p$ increases, the performance of \emph{adaptive
+seeding} quickly increases and reaches $80\%$ of the values of
+Figure~\ref{fig:performance} at $p=0.5$.\newline
-\begin{figure}[ht!]
+\begin{figure}[t]
\begin{subfigure}[t]{0.23\textwidth}
- \includegraphics[scale=0.45]{images/prob.pdf}
+ \includegraphics[scale=0.48]{images/prob.pdf}
\vspace{-10pt}
\caption{}
\label{fig:prob}
\end{subfigure}
+\hspace{1pt}
\begin{subfigure}[t]{0.23\textwidth}
- \includegraphics[scale=0.45]{images/hbo_likes.pdf}
- \vspace{-10pt}
+ \includegraphics[scale=0.48]{images/hbo_likes.pdf}
\caption{}
\label{fig:killer}
\end{subfigure}
- \caption{(a) Performance of Adaptive seeding in logarithmic scale for various propagation probabilities. (b) In logarithmic scale, performance of \emph{Adaptive seeding} when restricted to users who \emph{liked} HBO (\textsf{Adapt. seed. (rest.)}), compared to \emph{Max degree} and the unrestricted \emph{Adaptive seeding}.}
+ \vspace{-5pt}
+ \caption{\small{(a) Performance of adaptive seeding for various propagation
+ probabilities. (b) Performance of \emph{adaptive seeding} when restricted
+ to the subgraph of users who \emph{liked} HBO (red line).}}
+ \vspace{-20pt}
\end{figure}
-In practice, the propagation probability will vary among individuals. However, for those who have already expressed interest in the promoted content, we can expect this probability to be close to one. For our next experiment, we chose a vertical (HBO) and trimmed the social graph we collected by only keeping on the second stage users who indicated this vertical (HBO) in their list of interests. Figure~\ref{fig:killer} shows that even on this very restricted set of users, \emph{Adaptive seeding} still outperforms \emph{Max degree} and reaches approximately $50\%$ of the unrestricted adaptive seeding.
+\noindent\textbf{Coarse estimation of probabilities.}
+In practice, the probability a user may be interested in promoting a campaign
+her friend is promoting may vary. However, for those who have already expressed
+interest in the promoted content, we can expect this probability to be close to
+one. We therefore conducted the following experiment. We chose a page (HBO)
+and trimmed the social graph we collected by only keeping on the second stage
+users who indicated this page (HBO) in their list of interests. This is
+a coarse estimation of the probabilities as it assumes that if a friend follows
+HBO she will be willing to promote with probability 1 (given a reward), and
+otherwise the probability of her promoting anything for HBO is 0.
+Figure~\ref{fig:killer} shows that even on this very restricted set of users,
+\emph{adaptive seeding} still outperforms \textsf{IM} and reaches approximately
+$50\%$ of the unrestricted adaptive seeding.\newline
-%\item Fix budget; plot the number of people that tweeted coffee between 9am to 10am, between 10am to 11am, between 11am to noon,...,
-%\item Argue: if the probability for a friend joining is small, then as long as each person has a few high degree friends, we will get a constant fraction of high degree friends joining, in expectation.
-%\item Experiment only on friends who also followed the topic: go to a vertical with a lot of followers (but hopefully number of ``likes'' are under 1000) and perform the adaptive seeding experiment only on people who also follow that vertical. Should roughly be 3x requests as other experiments.
-%\end{itemize}
+\noindent\textbf{Impact of the probability distribution.} In order to test
+scenarios where users have a rich spectrum of probabilities of realizing on the
+second stage. We consider a setting where the Bernouilli parameter $p$ is drawn
+from a distribution. We considered four different distributions; for each
+distribution for fixed values of the budget and the parameter $p$, we tuned the
+parameters of the distribution so that its mean is exactly $p$. We then plotted
+the performance as a function of the budget and mean $p$.
+
+For the Beta distribution, we fixed $\beta=5$ and tuned the $\alpha$ parameter
+to obtain a mean of $p$, thus obtaining a unimodal distribution. For the normal
+distribution, we chose a standard deviation of $0.01$ to obtain a distribution
+more concentrated around its mean than the Beta distribution. Finally, for the
+inverse degree distribution, we took the probability of a node joining on
+the second stage to be proportional to the inverse of its degree (scaled so that on
+average, nodes join with probability $p$). The results are shown in
+Figure~\ref{fig:bernouilli}.
+
+We observe that the results are comparable to the one we obtained in the
+uniform case in Figure~\ref{fig:prob} except in the case of the inverse degree
+distribution for which the performance is roughly halved. Remember that the
+value of a user $v$ on the second stage of adaptive seeding is given by $p_v
+d_v$ where $d_v$ is its degree and $p_v$ is the its probability of realizing on
+the second stage. Choosing $p_v$ to be proportional to ${1}/{d_v}$ has the
+effect of normalizing the nodes on the second stage and is a strong
+perturbation of the original degree distribution of the nodes available on the
+second stage.
+
+\begin{figure*}[t!]
+\centering
+\begin{subfigure}[b]{0.25\textwidth}
+\includegraphics[width=\textwidth]{images/beta.pdf}
+\caption{Beta distribution}
+\end{subfigure}
+\begin{subfigure}[b]{0.25\textwidth}
+\includegraphics[width=\textwidth]{images/gauss.pdf}
+\caption{Normal Distribution}
+\end{subfigure}
+\begin{subfigure}[b]{0.24\textwidth}
+\includegraphics[width=\textwidth]{images/power.pdf}
+\caption{Power-law distribution}
+\end{subfigure}
+\begin{subfigure}[b]{0.24\textwidth}
+\includegraphics[width=\textwidth]{images/deg.pdf}
+\caption{Inverse degree}
+\end{subfigure}
+\caption{Performance of adaptive seeding as a function of the budget and the
+mean of the distribution from which the Bernouilli parameters are drawn. The
+details of the parameters for each distribution can be found in
+Section~\ref{sec:robustness}.}.
+\label{fig:bernouilli}
+\vspace{-10pt}
+\end{figure*}
+
+\subsection{Impact of the Influence Model}
+\label{sec:inf}
+
+The Facebook Pages data set we collected is limited in that we only have access
+to the 2-hop neighborhood around the seed users and we use the degree of the
+second stage users as a proxy for their influence. As proved in
+\cite{even-dar}, in the voter model, the influence of nodes converges to their
+degree with high probability when the number of time steps become polynomially
+large in the network size.
+
+\begin{figure}
+ \centering
+ \includegraphics[width=0.48\textwidth]{images/voter.pdf}
+ \vspace{-20pt}
+ \caption{Performance of adaptive seeding compared to \textsf{IM} for the voter
+ influence model with $t$ steps.}
+ \vspace{-10pt}
+ \label{fig:voter}
+\end{figure}
+
+In order to analyze the expected number of nodes influenced according to the
+voter model that terminates after some fixed number of time steps, we use
+publicly available data sets from \cite{snapnets} where the entire network is
+at our disposal. As discussed above, we sample nodes uniformly at random to
+model the core set. We then run the voter model for $t$ time steps
+to compute the influence of the second stage users. Figure~\ref{fig:voter}
+shows the performance of adaptive seeding as a function of $t$ compared to the
+performance of the \textsf{IM} benchmark. In this experiment, the budget was
+set to half the size of the core set.
+
+We see that the performance of adaptive seeding quickly converges (5 time steps
+for \textsf{Slashdot}, 15 time steps for \textsf{Epinions}). In practice, the
+voter model converges much faster than the theoretical guarantee of
+\cite{even-dar}, which justifies using the degree of the second stage users as
+measure of influence as we did for the Facebook Pages data sets.
+Furthermore, we see that similarly to the Facebook data sets, adaptive seeding
+significantly outperforms \textsf{IM}.
+
+
+\subsection{Performance on Synthetic Networks}
+
+In order to analyze the impact of topological variations we generated synthetic
+graphs using standard network models. All the generated graphs have $100,000$
+vertices, for each model, we tuned the generative parameters to obtain when
+possible a degree distribution (or graph density otherwise) similar to what we
+observed in the Facebook Pages data sets.
+
+\begin{itemize}
+ \item \emph{Barabási-Albert:} this well-known model is often used to model
+ social graphs because its degree distribution is a power law. We took
+ 10 initial vertices and added 10 vertices at each step, using the
+ preferential attachment model, until we reached 100,000 vertices.
+ \item \emph{Small-World:} also known as the Watts-Strogatz model. This
+ model was one of the first models proposed for social networks. Its
+ diameter and clustering coefficient are more representative of a social
+ network than what one would get with the Erdős–Rényi model. We started
+ from a regular lattice of degree 200 and rewired each edge with
+ probability 0.3.
+ \item \emph{Kronecker:} Kronecker graphs were more recently introduced in
+ \cite{kronecker} as a scalable and easy-to-fit model for social
+ networks. We started from a star graph with 4 vertices and computed
+ Kronecker products until we reached 100,000 nodes.
+ \item \emph{Configuration model:} The configuration model allows us to
+ construct a graph with a given degree distribution. We chose a page
+ (GAP) and generated a graph with the same degree distribution using the
+ configuration model.
+\end{itemize}
+The performance of adaptive seeding compared to our benchmarks can be found in
+Figure~\ref{fig:synth}. We note that the improvement obtained by adaptive
+seeding is comparable to the one we had on real data except for the
+\emph{Small-World} model. This is explained by the nature of the model:
+starting from a regular lattice, some edges are re-wired at random. This model
+has similar properties to a random graph where the friendship paradox does not
+hold~\cite{LS13}. Since adaptive seeding is designed to leverage the friendship
+paradox, such graphs are not amenable to this approach.
+
+\begin{figure}[t]
+ \centering
+ \includegraphics[width=0.48\textwidth]{images/perf_synth.pdf}
+ \vspace{-15pt}
+ \caption{Performance of adaptive seeding on synthetic networks.}
+ \label{fig:synth}
+ \vspace{-10pt}
+\end{figure}
\subsection{Scalability}\label{sec:scalability}
+To test the scalability of adaptive seeding we were guided by two central
+questions. First, we were interested to witness the benefit our non-sampling
+approach has over the standard SAA method. Secondly, we wanted to understand
+when one should prefer to use the LP-based approach from Section~\ref{sec:lp}
+over the combinatorial one from Section~\ref{sec:comb}. The computations in
+this section were run on Intel Core i5 CPU 4x2.40Ghz. For each computation, we
+plot the time and number of CPU cycles it took.\newline
+
+\noindent\textbf{Comparison with SAA.} The objective function of the
+non-adaptive problem \eqref{eq:relaxed} is an expectation over exponentially
+many sets, all possible realizations of the neighbors in the second stage.
+Following the sampling-based approach introduced in \cite{singer}, this
+expectation can be computed by averaging the values obtained in
+$O\left(n^2\right)$ independent sample realizations of the second stage users
+($n$ is the number of neighbors of core set users). One important aspect of
+the algorithms introduced in this paper is that in the additive case, this
+expectation can be computed exactly without sampling, thus significantly
+improving the theoretical complexity.
-Finally, we look at the scalability of our approaches. More specifically, we compare the running time of the LP-based approach and the combinatorial approach for different instance sizes.
+In Figure~\ref{fig:sampling}, we compare the running time of our combinatorial
+algorithm to the same algorithm where the expectation is computed via sampling.
+We note that this sampling-based algorithm is still simpler than the algorithm
+introduced in \cite{singer} for general influence models. However, we observe
+a significant gap between its running time and the one of the combinatorial
+algorithm. Since each sample takes linear time to compute, this gap is in fact
+$O(n^3)$, quickly leading to impracticable running times as the size of the
+graph increases. This highlights the importance of the \emph{sans-sampling}
+approach underlying the algorithms we introduced.\newline
-Figure~\ref{fig:time} shows the running time and number of CPU cycles used by the LP algorithm and the combinatorial algorithm as a function of the network size $n$. The varying size of the network was obtained by randomly sampling a varying fraction of initial users and then trimming the social graph by only keeping friends of this random sample on the second stage. The computations were run on Intel Core i5 CPU 4x2.40Ghz. The LP solver used was CLP~\cite{clp}.
+\begin{figure}[t]
+ \centerline{ \includegraphics[width=0.48\textwidth]{images/sampling.pdf} }
+ \vspace{-10pt}
+ \caption{Running time and number of CPU cycles used by the sampling-based
+ algorithm and the combinatorial adaptive seeding algorithm for different
+sizes of the core set.}
+ \label{fig:sampling}
+ \vspace{-15pt}
+\end{figure}
+
+\noindent\textbf{Combinatorial vs. LP algorithm.}
+We now compare the running time of the LP-based approach and the combinatorial
+approach for different instance sizes.
+
+Figure~\ref{fig:time} shows the running time and number of CPU cycles used by
+the LP algorithm and the combinatorial algorithm as a function of the network
+size $n$. The varying size of the network was obtained by randomly sampling
+a varying fraction of core set users and then trimming the social graph by only
+keeping friends of this random sample on the second stage. The LP solver used
+was CLP~\cite{clp}.
-\begin{figure}[h!]
+\begin{figure}[t]
\centerline{ \includegraphics[scale=0.9]{images/time.pdf} }
\vspace{-10pt}
\caption{Running time and number of CPU cycles of the combinatorial algorithm and the LP algorithm as a function of the number of nodes $n$. First row with budget $k=100$, second row with budget $k=500$.}
\label{fig:time}
+ \vspace{-10pt}
\end{figure}
-We observe that for a \emph{small} value of the budget $k$ ($k=100$, first row of Figure~\ref{fig:time}), the combinatorial algorithm outperforms the LP algorithm. When $k$ becomes \emph{large} ($k=500$, second row of Figure~\ref{fig:time}), the LP algorithm becomes faster. This can be explained by the $k^2$ factor in the running time guarantee of the combinatorial algorithm (see Proposition~\ref{prop:running_time}). Even though the asymptotic $n\log n$ guarantee of the combinatorial algorithm should theoretically outperform the LP-based approach for large $n$, we were not able to observe it for our instance sizes. In practice, one can choose which of the two algorithms to apply depending on the relative sizes of $k$ and $n$.
+We observe that for a \emph{small} value of the budget $k$ (first row
+of Figure~\ref{fig:time}), the combinatorial algorithm outperforms the LP
+algorithm. When $k$ becomes \emph{large} (second row of
+Figure~\ref{fig:time}), the LP algorithm becomes faster. This can be explained
+by the $k^2$ factor in the running time of the combinatorial
+algorithm (see Proposition~\ref{prop:running_time}). Even though the asymptotic
+guarantee of the combinatorial algorithm should theoretically
+outperform the LP-based approach for large $n$, we were not able to observe it
+for our instance sizes. In practice, one can choose which of the two algorithms
+to apply depending on the relative sizes of $k$ and $n$.\newline
+
+
+%\begin{figure}
+% \centerline{ \includegraphics[width=0.47\textwidth]{images/sampling.pdf} }
+% \vspace{-10pt}
+% \caption{}
+% \label{fig:sampling}
+%\end{figure}
%\noindent \textbf{Others}
%\begin{itemize}
diff --git a/paper/sections/introduction.log b/paper/sections/introduction.log
deleted file mode 100644
index e69de29..0000000
--- a/paper/sections/introduction.log
+++ /dev/null
diff --git a/paper/sections/introduction.tex b/paper/sections/introduction.tex
index 6c2e345..bd99791 100644
--- a/paper/sections/introduction.tex
+++ b/paper/sections/introduction.tex
@@ -5,51 +5,226 @@
%
%
%Effective means to spread information through this platform are continuously being developed, and in particular methods for s
-The massive adoption of social networking services in recent years creates a unique platform for promoting ideas and spreading information. The joy and ease of communicating through online social networks leaves traces of behavioral data which allow observing, predicting and even engineering processes of information diffusion. First posed by Domingos and Richardson~\cite{DR01,RD02} and elegantly formulated and further developed by Kempe, Kleinberg, and Tardos~\cite{KKT03}, \emph{influence maximization} is the algorithmic challenge of selecting individuals who can serve as early adopters of a new idea, product, or technology in a manner that will trigger a large cascade in the social network. Since its inception, numerous techniques and improvements have been developed ranging from sophisticated predictive models of influence ~\cite{LAH06,RLK10,BHMW11,ACKS13,manuel2013icml,du13nips} to fast approximation methods~\cite{LKGFVG07,MR07,C08,KDD11,borgs2012influence}.
+The massive adoption of social networking services in recent years creates a unique platform for promoting ideas and spreading information. Communication through online social networks leaves traces of behavioral data which allow observing, predicting and even engineering processes of information diffusion. First posed by Domingos and Richardson~\cite{DR01,RD02} and elegantly formulated and further developed by Kempe, Kleinberg, and Tardos~\cite{KKT03}, \emph{influence maximization} is the algorithmic challenge of selecting a fixed number of individuals who can serve as early adopters of a new idea, product, or technology in a manner that will trigger a large cascade in the social network.
+%Since its inception, numerous techniques and improvements have been developed ranging from sophisticated predictive models of influence ~\cite{LAH06,RLK10,BHMW11,ACKS13,manuel2013icml,du13nips} to fast approximation methods~\cite{LKGFVG07,MR07,C08,KDD11,borgs2012influence}.
+%While there has been a great deal of progress on efficient algorithmic methods for this problem and impressive methods for learning models of influence from data, a fundamental problem has been largely overlooked.
-While there has been a great deal of progress on efficient algorithmic methods for this problem and impressive methods for learning models of influence from data, a fundamental problem has been largely overlooked. Due to the heavy-tailed degree distribution of social networks, influential users are rare, and thus the application of influence maximization techniques can often become ineffective.
+In many cases where influence maximization methods are applied one cannot
+select any user in the network but is limited to some subset of users. As an
+example, consider an online retailer who wishes to promote a product through
+word-of-mouth by rewarding influential customers who purchased the product.
+The retailer is then limited to select influential users from the set of users
+who purchased the product. In general, we will call the \emph{core set} the set
+of users an influence maximization campaign can access. When the goal is to
+select influential users from the core set, the laws that govern social
+networks can lead to poor outcomes. Due to the heavy-tailed degree
+distribution of social networks, high degree nodes are rare, and since
+influence maximization techniques often depend on the ability to select high
+%(not necessarily the highest)
+degree nodes, a naive application of influence
+maximization techniques to the core set can become ineffective.\newline
-For a concrete example, consider a scenario where the goal is to select influential users who visit an online store and reward them for promoting a product through their social network. In such a case, if the users who visit the online store are not influential, even the best techniques for identifying influential users would have poor performance. \newline
+%For a concrete example, consider a scenario where the goal is to select influential users who visit an online store and reward them for promoting a product through their social network. In such a case, if the users who visit the online store are not influential, even the best techniques for identifying influential users would have poor performance.
\begin{figure}
\centering
\includegraphics[scale=0.55]{images/dist.pdf}
+ \vspace{-15pt}
\caption{CDF of the degree distribution of users who liked a post by Kiva on Facebook and that of their friends.}
\label{fig:para}
+ \vspace{-15pt}
\end{figure}
+\noindent \textbf{An adaptive approach.}
+An alternative approach to spending the entire budget on the core set is an
+adaptive two-stage approach. In the first stage, one can spend a fraction of
+the budget on the core users so that they invite their friends to participate
+in the campaign, then in the second stage spend the rest of the budget
+on the influential friends who hopefully have arrived. The idea behind this
+approach is to leverage a structural phenomenon in social networks known as
+the friendship paradox~\cite{feld1991}. Intuitively, the friendship paradox
+says that individuals are not likely to have many friends, but they likely
+have a friend that does (``your friends have more friends than you''). In
+Figure~\ref{fig:para} we give an example of such an effect by plotting a CDF of
+the degree distribution of a core set of users who responded to a post on
+Facebook and the degree distribution of their friends. Remarkably, there are
+also formal guarantees of such effects. Recent work shows that for any network
+that has a power law degree distribution and a small fraction of random edges,
+there is an asymptotic gap between the average degree of small samples of nodes
+and that of their neighbors, with constant probability~\cite{LS13}. The
+implication is that when considering the core users (e.g. those who visit the
+online store) as random samples from a social network, any algorithm which can
+use their neighbors as influencers will have dramatic improvement over the
+direct application of influence maximization.\newline
-\noindent \textbf{The power of random neighbors.} Remarkably, while a random individual in a social networks is not likely to be influential, it turns out that she is likely to know someone who is. Recent work shows that for any network with a power law degree distribution with a small fraction of random edges, with constant probability there is an asymptotic gap between influence (either the total number of neighbors or sum of degrees) of small samples of the network and that of their neighbors~\cite{LS13}. %Intuitively, as influential nodes are well connected, one can hope that many users know an influential user who can serve as an early adopter.
-The implication is that when considering the accessible users (e.g. those who visit the online store) as random samples from a social network, any algorithm which can use their neighbors as influencers will have dramatic improvement over the direct application of influence maximization over the accessible users.\newline
+%The benefit of this method is that it can incentivizes influential nodes to influence their peers.
+%The idea of two-stage (or in general, a multi-stage) approach is to use the
+%nodes who arrive in the first stage to recruit influential users who can be
+%incentivized to spread information. In standard influence maximization, the
+%nodes who are not in the initial seed do not receive incentives to propagate
+%information, and cascades tend to die off
+%quickly~\cite{YC10,BHMW11,GWG12,CADKL14}.
+
+
+
+%\noindent \textbf{The friendship paradox.} Remarkably, while a random individual in a social network is not likely to be influential, it turns out that she is likely to know someone who is. Recent work shows that for any network that has a power law degree distribution, with a small fraction of random edges there is an asymptotic gap between influence (either the total number of neighbors or sum of degrees) of small samples of the network and that of their neighbors, with constant probability ~\cite{LS13}. %Intuitively, as influential nodes are well connected, one can hope that many users know an influential user who can serve as an early adopter.
+%The implication is that when considering the accessible users (e.g. those who visit the online store) as random samples from a social network, any algorithm which can use their neighbors as influencers will have dramatic improvement over the direct application of influence maximization over the accessible users.\newline
%\begin{figure*}[t!]
% \centering
-% \includegraphics[scale=0.60]{images/dist.pdf}
-% \caption{CDF of the degree distribution of seed users and their friends.}
+% \includegraphics[scale=0.08]{images/adaptive_seeding.pdf}
+% \caption{Adaptive seeding.}
% \label{fig:para}
%\end{figure*}
+%
+%
+%\noindent \textbf{An adaptive approach.}
+%Motivated by the friendship paradox, an alternative approach to spending the
+%entire budget on the users who are accessible is to adaptively select
+%influencers in two stages. In the first stage, one can spend a fraction of the
+%budget on the accessible users to invite their friends to participate in the
+%campaign, hope their friends arrive, and in the second stage spend the rest of
+%the budget on the friends who arrived.
+%%The benefit of this method is that it can incentivizes influential nodes to influence their peers.
+%The idea of two-stage (or in general, a multi-stage) approach is to use the
+%nodes who arrive in the first stage to recruit influential users who can be
+%incentivized to spread information. In standard influence maximization, the
+%nodes who are not in the initial seed do not receive incentives to propagate
+%information, and cascades tend to die off
+%quickly~\cite{YC10,BHMW11,GWG12,CADKL14}.
+%\newline
+\noindent \textbf{Warmup.} Suppose we are given a network, a random set of
+core users $X$ and a budget $k$, and the goal is to select a subset of nodes in
+$X$ of size $t\leq k$ which has the most influential set of neighbors of size
+$k-t$. For simplicity, assume for now that the influence of a set is simply
+its average degree. If we take the $k/2$ highest degree neighbors of $X$, then
+surely there is a set $S$ of size at most $k/2$ in $X$ connected to this set,
+and selecting $S$ would be a two-approximation to this problem. In comparison,
+the standard approach of influence maximization is to select the $k$ highest
+degree nodes in $X$. Thus, standard influence maximization would have $k$ of
+the most influential nodes in $X$ while the approximation algorithm we propose
+has $k/2$ of the most influential nodes from its set of neighbors. How much
+better or worse is it to use this approach over the standard one? If
+the network has a power-law degree distribution with a small fraction of random
+edges, and influence is measured in terms of sum of degrees of a set, then the
+results of~\cite{LS13} discussed above imply that the two-stage approach
+which allows seeding neighbors can do asymptotically (in the size of the
+network) better.
+%\footnote{\tiny{In fact, the results of~\cite{LS13} hold not only for measures of influence such as degree, but also for measures like coverage functions, and the voter model which we discuss in this paper.}}.
+Thus, at least intuitively, it looks as if two-stage approaches may be worth
+investigating.
+\newline
-\noindent \textbf{Adaptive Seeding.}
-An alternative approach to spending the entire budget on the users who are accessible is to follow a two-stage approach. In the first stage, one can spend a fraction of the budget on the accessible users to invite their friends to participate in the campaign, hope their friends arrive, and in the second stage spend the rest of the budget on the friends who arrived. This approach, called \emph{adaptive seeding} has been recently formalized in~\cite{singer}. As in the standard formulation of influence maximization, the setup in adaptive seeding includes a network, a budget, and an influence function. %which encodes the expected number of nodes influenced as a result of selecting a subset in the network.
-In addition, there is a subset of nodes $X$, the set of accessible users, and their neighbors $\mathcal{N}(X)$, where each neighbor has some probability indicating their likelihood to arrive if invited. The framework is a two-stage stochastic optimization problem. In the first stage any fraction of the budget can be used to select a subset of nodes in $X$, and as a result each neighbor of this set is then realized independently. In the second stage the remaining budget can be used to select from the set of neighbors that arrived. The goal is to select a subset of nodes in $X$, s.t. the influence function is maximized in expectation over all possible arrivals of the neighbors. The main result in~\cite{singer} is a constant factor approximation algorithm for well-studied influence models such as independent cascade and linear threshold.
-%More formally, adaptive seeding is phrased as a two-stage stochastic optimization problem.
-
-\subsection{A scalable approach to adaptive seeding}
-The constant-factor approximation guarantees for adaptive seeding are, at large, a theoretical triumph.
-Although the algorithms for adaptive seeding have desirable approximation guarantees~\cite{singer}, they rely on various forms of sampling, which creates significant blowup in the input size. While such techniques provide strong theoretical guarantees, for social network data sets which are often either large or massive, such approaches are inapplicable. The natural question is therefore:
+In this paper, our goal is to study the potential benefits of adaptive approaches for influence maximization. We are largely motivated by the following question.
\begin{center}
-\textit{Is adaptive seeding scalable?}
+\textit{Can adaptive optimization lead to significant improvements in influence maximization?}
\end{center}
-In this paper our main goal is to develop scalable approaches to adaptive seeding and to provide compelling experimental evidence of the dramatic impact such approaches can have in various applications of influence maximization.
-\newline
+To study this question we use the adaptive seeding model recently formalized
+in~\cite{singer}. The main distinctions from the caricature model in the
+warmup problem above is that in adaptive seeding the core set $X$ can be
+arbitrary (it does not have to be random), and every neighbor of $X$ is assumed
+to arrive with some independent probability. These probabilities are used to
+model the uncertainty we have in that the neighbors would be interested in
+promoting the product, as they are not in the core set. The goal in adaptive
+seeding is to select a subset of nodes in $X$ such that, in expectation over
+all possible arrivals of its neighbors, one can select a maximally influential
+set of neighbors with the remaining budget.\footnote{\tiny{The model can be
+ extended to the case where nodes take on different costs, and results
+ we present here largely generalize to such settings as well. Although
+ it seems quite plausible that the probability of attracting neighbors
+ could depend on the rewards they receive, the model deliberately
+ assumes unit costs, consistent with the celebrated
+Kempe-Kleinberg-Tardos model~\cite{KKT03}. Of course, if the likelihood of
+becoming an early adopter is inversely proportional to one's influence, then
+any influence maximization model loses substance.}} It is worth noting that
+using $X$ to be the entire set of nodes in the network we get the
+Kempe-Kleinberg-Tardos model~\cite{KKT03}, and thus adaptive seeding can be
+seen as a generalization of this model.\newline
+
+%This is a simplified version of adaptive seeding where every neighbor realizes with probability one. A simple algorithm would be to find the most influential subset of size $k/2$ nodes from $\mathcal{N}(X)$, and then select its (at most) $k/2$ neighbors. For any submodular influence function, this gives a two-approximation.
+%
+%The crux comes when introducing probabilities.
+%
+%The standard approach to
+%
+%When the measure of influence is proportional to sum of degrees or cover of a set, then the discussion
+%
+%If we measure influence as the sum of degrees (or any submodular function)
+%
+
+
+
+
+%
+%More formally, adaptive seeding is phrased as a two-stage stochastic optimization problem.
+
+%\noindent \textbf{Adaptive Seeding.}
+%%An alternative approach to spending the entire budget on the users who are accessible is to follow a two-stage approach. In the first stage, one can spend a fraction of the budget on the accessible users to invite their friends to participate in the campaign, hope their friends arrive, and in the second stage spend the rest of the budget on the friends who arrived.
+%A two-stage approach, called \emph{adaptive seeding} was recently formalized in~\cite{singer}. As in the standard formulation of influence maximization, the setup in adaptive seeding includes a network, a budget, and an influence function. %which encodes the expected number of nodes influenced as a result of selecting a subset in the network.
+%In addition, there is a subset of nodes $X$, the set of accessible users, and their neighbors $\mathcal{N}(X)$, where each neighbor has some probability indicating their likelihood to arrive if invited. The framework is a two-stage stochastic optimization problem. In the first stage any fraction of the budget can be used to select a subset of nodes in $X$, and as a result each neighbor of this set is then realized independently
+%%~\footnote{The probability of a node being influenced does not model influence, but the likelihood of that node to be willing to disseminate information in exchange for the reward offered. In other words, this is a standard bayesian utility model with no externalities.}.
+%In the second stage the remaining budget can be used to select from the set of neighbors that arrived. The goal is to select a subset of nodes in $X$, s.t. the influence function is maximized in expectation over all possible arrivals of the neighbors.
+%%The goal is to recruit the neighbors to become influencers using the budget (free samples), as opposed to try influencing them to forward the information without incentives~\footnote{i.e. the assumption is that nodes have a standard bayesian utility model with no externalities.}. The function $f(\cdot)$ quantifies the number of individuals in the networks that will be influenced as a result of selecting a subset of early adopters. Influence functions are known to be special cases of submodular function~\cite{KKT03,MR07}.
+%Although it seems quite plausible that the probabilities of attracting neighbors could depend on the rewards they receive, the simplification assuming unit costs is deliberate.
+%This simplification is consistent with the celebrated Kempe-Kleinberg-Tardos model~\cite{KKT03}, and can be extended to the case where nodes take on different costs. Of course, if the likelihood of becoming an early adopter is inversely proportional to one's influence, then any influence maximization model loses substance.
+%
+
+
+%\subsection{A scalable approach}
+%The main result in~\cite{singer} is a constant factor approximation algorithm for well-studied influence models such as independent cascade and linear threshold. These constant-factor approximation guarantees for adaptive seeding are, at large, a theoretical triumph.
+%Although the algorithms for adaptive seeding have desirable approximation guarantees~\cite{singer}, they rely on various forms of sampling, which creates significant blowup in the input size. While such techniques provide strong theoretical guarantees, for social network data sets which are often either large or massive, such approaches are inapplicable. The natural question is therefore:
+%
+%\begin{center}
+%\textit{Is adaptive seeding scalable?}
+%\end{center}
+%
+%In this paper our main goal is to develop scalable approaches to adaptive seeding and to provide compelling experimental evidence of the dramatic impact such approaches can have in various applications of influence maximization.
+%\newline
+%
+%\noindent \textbf{Challenges.}
+
+\noindent \textbf{Scalability.} One of the challenges in adaptive seeding is
+scalability. This is largely due to the stochastic nature of the problem
+derived from uncertainty about arrival of neighbors. The main result
+in~\cite{singer} is a constant factor approximation algorithm for well-studied
+influence models such as independent cascade and linear threshold which is, at
+large, a theoretical triumph. These algorithms rely on various forms of
+sampling, which lead to a significant blowup in the input size. While such
+techniques provide strong theoretical guarantees, for social network data sets
+which are often either large or massive, such approaches are inapplicable. The
+main technical challenge we address in this work is how to design scalable
+adaptive optimization techniques for influence maximization which do not
+require sampling.\newline
+
+\noindent \textbf{Beyond random users.} The motivation for the adaptive
+approach hinges on the friendship paradox, but what if the core set is not
+a random sample? The results in~\cite{LS13} hold when the core set of users is
+random but since users who follow a particular topic are not a random sample of
+the network, we must somehow evaluate adaptive seeding on representative data
+sets. The experimental challenge is to estimate the prominence of high degree
+neighbors in settings that are typical of viral marketing campaigns.
+Figure~\ref{fig:para} is a foreshadowing of the experimental methods we used to
+show that an effect similar to the friendship paradox exists in such cases as
+well. \newline
+
+%\noindent \textbf{Adaptive optimization sans sampling.}
+%To consider practical applications of adaptive approaches we require scalable methods.
+%The main technical idea we develop in this work can be described as follows. We say that a policy is \emph{adaptive} if it selects nodes in the first stage, and only after the realization of their neighbors, selects a subset of nodes with the remaining budget in the second stage. In contract, a \emph{non-adaptive} policy makes all its decisions before the realizations occur. The framework we develop in this work involves designing non-adaptive policies which could then be turned into adaptive ones. At a high level, the main idea would be to use a particular version of non-adaptive policies whose optimal solution is an upper bound to the optimal adaptive policy. We will then argue that a solution to the non-adaptive problem can be turned into a solution to the adaptive policy, without losing almost any value. This will therefore reduce our problem to that of designing solutions to the non-adaptive problem we define, for which we then develop specialized algorithms. The main advantage in the non-adaptive framework is that unlike standard approaches in stochastic optimization, it avoids using sampling. As we will later discuss, this dramatically reduces the running time of the algorithms, both in theory and in practice.\newline
+
+
+
+%The crux in designing adaptive seeding algorithms is due to both the combinatorial and stochastic nature of the problem. The combinatorial aspect involves the selection of the nodes in the first stage which affects which nodes will realize in the second stage. As we will later show, this aspect makes the problem $\textsc{NP}$-Hard even for the simplest of objective functions. The stochastic aspect requires the optimization to be over potentially exponentially-many realizations. A common approach in stochastic optimization is Sample Average Approximation (SAA). The main idea behind SAA is to sample realizations of the second stage, solve the optimization problem by taking a solution which is optimal on average over the sampled instances.
+%
+%For the instances we are interested in here that have size of over $N=10^5$ nodes, the required number of samples are on the order of $O(N^3)$, the algorithms can have $O(N^2)$ running time, and the total running time is then on the order of $10^{30}$ operations and SAA is simply infeasible.\footnote{In \cite{singer} SAA is used for linear models. For models such as independent cascade and linear threshold, a different sampling method is introduced which requires similar blowup in the input size to generate a concave relaxation of the optimization problem.}\newline
+
+
+
+
-\noindent \textbf{Challenges.}
-The challenges in designing adaptive seeding algorithms are due to both the combinatorial and stochastic nature of the problem. The combinatorial aspect involves the selection of the nodes in the first stage which affects which nodes will realize in the second stage. As we will later show, this aspect makes the problem $\textsc{NP}$-Hard even for the simplest of objective functions. The stochastic aspect requires the optimization to be over potentially exponentially-many realizations. A common approach in stochastic optimization is Sample Average Approximation (SAA). The main idea behind SAA is to sample realizations of the second stage, solve the optimization problem by taking a solution which is optimal on average over the sampled instances.
- For the instances we are interested in here that have size of over $N=10^5$ nodes, the required number of samples are on the order of $O(N^3)$, the algorithms can have $O(N^2)$ running time, and the total running time is then on the order of $10^{30}$ operations and SAA is simply infeasible.\footnote{In \cite{singer} SAA is used for linear models. For models such as independent cascade and linear threshold, a different sampling method is introduced which requires similar blowup in the input size to generate a concave relaxation of the optimization problem.}\newline
%The first of these challenges can be addressed by sampling realizations of the
%neighbors $R$ and averaging over them to get an arbitrarly-accurate
@@ -63,20 +238,84 @@ The challenges in designing adaptive seeding algorithms are due to both the comb
-\noindent \textbf{Stochastic optimization sans sampling.}
-In this paper we develop a framework for adaptive seeding which circumvents the use of sampling, while maintaining desirable approximation guarantees. We say that a policy is \emph{adaptive} if it selects nodes in the first stage, and only after the realization of their neighbors, selects a subset of nodes with the remaining budget in the second stage. In contract, a \emph{non-adaptive} policy makes all its decisions before the realizations occur.
-The framework we develop in this work involves designing non-adaptive policies which could then be turned into adaptive ones. At a high level, the main idea would be to use a particular version of non-adaptive policies whose optimal solution is an upper bound to the optimal adaptive policy. We will then argue that a solution to the non-adaptive problem can be turned into a solution to the adaptive policy, without losing almost any value. This will therefore reduce our problem to that of designing solutions to the non-adaptive problem we define, for which we then develop specialized algorithms. The main advantage in the non-adaptive framework is that unlike standard approaches in stochastic optimization, it avoids using sampling. As we will later discuss, this dramatically reduces the running time of the algorithms, both in theory and in practice.\newline
-
-%\noindent \textbf{Main results.}
-\subsection{Main results}
-Our results are for linear models of influence, i.e. models for which the influence of a set can be expressed as the sum of the influence of its members. While this class does not include models such as the independent cascade and the linear threshold model, it includes the well-studied \emph{voter model}~\cite{holley1975ergodic} and measures such as node degree and click-through-rate of users which serve as a natural proxies of influence in many settings. In comparison to submodular influence functions, the relative simplicity of linear models allows making substantial progress on this challenging problem.
-Using this framework, we develop two algorithms, both achieving an approximation ratio of $(1-1/e)$ for the adaptive problem. The first algorithm is implemented through a linear program, which proves to be extremely efficient over instances where there is a large budget. The second approach is a combinatorial algorithm with the same approximation guarantee which has good theoretical guarantees on its running time and does well on instances with smaller budgets.
-
-To show the applicability of these approaches, and the potential of adaptive seeding at large, we performed several experiments on real social network data sets. We collected publicly available data from Facebook on users who expressed interest (``liked'') a certain post from a topic they follow and data on their friends. The premise here is that such users mimic potential participants in a viral marketing campaign. The performance of our algorithms on these data sets shows adaptive seeding can have dramatic improvements over standard approaches for influence maximization.
+%\subsection{Main Results}
+%Our main results in this paper show that adaptive seeding is a scalable
+%approach which can dramatically improve upon standard approaches of influence
+%maximization in cases when limited to marketing through users that expressed
+%interest in a topic. We first design adaptive seeding algorithms that avoid
+%sampling. This allows implementation and experimentation with this approach on
+%large data sets. We then use these algorithms to conduct a series of
+%experiments to show the potential of adaptive approaches for influence
+%maximization.
+%
+%The guarantees of our algorithms hold for linear models of influence, i.e.
+%models for which the influence of a set can be expressed as the sum of the
+%influence of its members. While this class does not include models such as the
+%independent cascade and the linear threshold model, it includes the
+%well-studied \emph{voter model}~\cite{holley1975ergodic,even-dar} and measures
+%such as node degree and click-through-rate or retetweet measures of users which
+%serve as a natural proxies of influence in many settings~\cite{ZHGS10}. In
+%comparison to submodular influence functions, the relative simplicity of linear
+%models allows making substantial progress on this challenging problem. Using
+%this framework, we develop two algorithms, both achieving an approximation
+%ratio of $(1-1/e)$ for the adaptive problem. The first algorithm is
+%implemented through a linear program, which proves to be extremely efficient
+%over instances where there is a large budget. The second approach is
+%a combinatorial algorithm with the same approximation guarantee which can be
+%easily parallelized, has good theoretical guarantees on its running time and
+%does well on instances with smaller budgets.
+%
+%After obtaining scalable algorithms with desirable guarantees, we used the
+%algorithms in our experiments. We performed several experiments on synthetic
+%and real social network data sets. The main component of the experiments
+%involved collecting publicly available data from Facebook on users who
+%expressed interest (``liked'') a certain post from a topic they follow and data
+%on their friends. The premise here is that such users mimic potential
+%participants in a viral marketing campaign. The results on these data sets
+%suggest that adaptive seeding can have dramatic improvements over standard
+%influence maximization methods.\newline
+\noindent \textbf{Main results.}
+Our main results in this paper show that adaptive seeding is a scalable
+approach which can dramatically improve upon standard approaches of influence
+maximization. We present a general method that enables designing adaptive
+seeding algorithms in a manner that avoids sampling, and thus makes adaptive
+seeding scalable to large size graphs. We use this approach as a basis for
+designing two algorithms, both achieving an approximation ratio of $(1-1/e)$
+for the adaptive problem. The first algorithm is implemented through a linear
+program, which proves to be extremely efficient over instances where there is
+a large budget. The second approach is a combinatorial algorithm with the same
+approximation guarantee which can be easily parallelized, has good theoretical
+guarantees on its running time and does well on instances with smaller budgets.
+The guarantees of our algorithms hold for linear models of influence,
+\emph{i.e.} models for which the influence of a set can be expressed as the sum
+of the influence of its members. While this class does not include models such
+as the independent cascade and the linear threshold model, it includes the
+well-studied \emph{voter model}~\cite{holley1975ergodic,even-dar} and measures
+such as node degree, click-through-rate or retweet measures of users which
+serve as natural proxies of influence in many settings~\cite{ZHGS10}. In
+comparison to submodular influence functions, the relative simplicity of linear
+models allows making substantial progress on this challenging problem.
+We then use these algorithms to conduct a series of experiments to show the
+potential of adaptive approaches for influence maximization both on synthetic
+and real social networks. The main component of the experiments involved
+collecting publicly available data from Facebook on users who expressed
+interest (``liked'') a certain post from a topic they follow and data on their
+friends. The premise here is that such users mimic potential participants in
+a viral marketing campaign. The results on these data sets suggest that
+adaptive seeding can have dramatic improvements over standard influence
+maximization methods.\newline
+\noindent \textbf{Paper organization.}
+We begin by formally describing the model and the assumptions we make in the
+following section. In Section~\ref{sec:adaptivity} we describe the reduction
+of the adaptive seeding problem to a non-adaptive relaxation. In
+Section~\ref{sec:algorithms} we describe our non-adaptive algorithms for
+adaptive seeding. In Section~\ref{sec:experiments} we describe our
+experiments, and conclude with a brief discussion on related work.
+
%\subsection{Stochastic optimization sans sampling}
%A common approach in stochastic optimization is Sample Average Approximation (SAA).
%The main idea behind SAA is to sample realizations of the second stage, solve the optimization problem on the sampled instances, and average the solution. Often, when the number of samples is polynomial in the input size of the problem. In our case the problem is too large.
diff --git a/paper/sections/lp.log b/paper/sections/lp.log
deleted file mode 100644
index e69de29..0000000
--- a/paper/sections/lp.log
+++ /dev/null
diff --git a/paper/sections/model.tex b/paper/sections/model.tex
index 10c34d0..d26046a 100644
--- a/paper/sections/model.tex
+++ b/paper/sections/model.tex
@@ -1,21 +1,22 @@
%\subsection{Problem and notations}
-Let us begin by introducing the following notation.
+%Let us begin by introducing the following notation.
%We will use the following notation to formally discuss the model.
-Given a graph $G=(V,E)$, for a node $u\in V$ we denote by $\neigh{u}$
-the neighborhood of $u$. By extension, for any subset of nodes $S\subseteq V$,
-$\neigh{S}\defeq \bigcup_{u\in S}\neigh{u}$ will denote the neighborhood of
+Given a graph $G=(V,E)$, for a node $v\in V$ we denote by $\neigh{u}$
+the neighborhood of $v$. By extension, for any subset of nodes $S\subseteq V$,
+$\neigh{S}\defeq \bigcup_{v\in S}\neigh{v}$ will denote the neighborhood of
$S$. The notion of influence in the graph is captured by a function
$f:2^{|V|}\rightarrow \reals_+$ mapping a subset of nodes to a non-negative
influence value.\newline
\noindent \textbf{The adaptive seeding model.}
-The input of the \emph{adaptive seeding} problem is a set of initial nodes
+The input of the \emph{adaptive seeding} problem is a \emph{core set} of nodes
$X\subseteq V$ and for any node $u\in\neigh{X}$ a probability $p_u$ that $u$
-realizes if one of its neighbor in $X$ is seeded. We will write $m=|X|$ and $n=|\neigh{X}|$ the parameters controlling the input size. The seeding process
+realizes if one of its neighbor in $X$ is seeded. We will write $m=|X|$ and
+$n=|\neigh{X}|$ the parameters controlling the input size. The seeding process
is the following:
\begin{enumerate}
\item \emph{Seeding:} the seeder selects a subset of nodes $S\subseteq
- X$ among the initial nodes.
+ X$ in the core set.
\item \emph{Realization of the neighbors:} every node $u\in\neigh{S}$
realizes independently with probability $p_u$. We denote by
$R\subseteq\neigh{S}$ the subset of nodes that is realized during this
@@ -27,8 +28,8 @@ is the following:
There is a budget constraint $k$ on the total number of nodes that can be
selected: $S$ and $T$ must satisfy $|S|+|T|\leq k$. The seeder chooses the set
$S$ before observing the realization $R$ and thus wishes to select optimally in
-expectation over all such possible realizations. Formally, the objective can be stated
-as:
+expectation over all such possible realizations. Formally, the objective can be
+stated as:
\begin{equation}\label{eq:problem}
\begin{split}
&\max_{S\subseteq X} \sum_{R\subseteq\neigh{S}} p_R
@@ -36,26 +37,38 @@ as:
&\text{s.t. }|S|\leq k
\end{split}
\end{equation}
-where $p_R$ is the probability that the set $R$ realizes under the
-probabilities $p_u,\,u\in\neigh{S}$:
-\begin{displaymath}
- p_R \defeq \prod_{u\in R}p_u\prod_{u\in\neigh{S}\setminus R}(1-p_u)
-\end{displaymath}
+where $p_R$ is the probability that the set $R$ realizes,
+%\begin{displaymath}
+$ p_R \defeq \prod_{u\in R}p_u\prod_{u\in\neigh{S}\setminus R}(1-p_u)$.
+%\end{displaymath}
-It is important to note that the process through which nodes arrive in the second stage is \emph{not} an influence process. The nodes in the second stage arrive if they are willing to spread information in exchange for a unit of the budget. Only when they have arrived does the influence process occur. This process is encoded in the influence function and occurs during the influence maximization stage without incentivizing nodes along the propagation path.\newline
+It is important to note that the process through which nodes arrive in the
+second stage is \emph{not} an influence process. The nodes in the second stage
+arrive if they are willing to spread information in exchange for a unit of the
+budget. Only when they have arrived does the influence process occur. This
+process is encoded in the influence function and occurs after the influence
+maximization stage without incentivizing nodes along the propagation path. In
+general, the idea of a two-stage (or in general, multi-stage) approach is to
+use the nodes who arrive in the first stage to recruit influential users who
+can be incentivized to spread information. In standard influence maximization,
+the nodes who are not in the core set do not receive incentives to propagate
+information, and cascades tend to die off
+quickly~\cite{YC10,BHMW11,GWG12,CADKL14}.\newline
\noindent \textbf{Influence functions.}
-In this paper we focus on \emph{linear} (or additive) influence models: influence models for which the value of a subset of nodes can be expressed as a weighted sum of their individual influence.
-One important example of such models is the \emph{voter model} \cite{richardson} used to represent the spread of opinions in a social network: at each time step, a node
-adopts an opinion with a probability equal to the fraction of its neighbors
-sharing this opinion at the previous time step. Formally, this can be written
-as a discrete-time Markov chain over opinion configurations of the network.
-In this model influence maximization amounts to ``converting'' the
-optimal subset of nodes to a given opinion at the initial time so as to
-maximize the number of converts after a given period of time.
-Remarkably, a simple analysis shows that under this model, the influence
-function $f$ is additive:
+In this paper we focus on \emph{linear} (or additive) influence models:
+in these models the value of a subset of nodes can be expressed as
+a weighted sum of their individual influence. One important example of such
+models is the \emph{voter model} \cite{richardson} used to represent the spread
+of opinions in a social network: at each time step, a node adopts an opinion
+with a probability equal to the fraction of its neighbors sharing this opinion
+at the previous time step. Formally, this can be written as a discrete-time
+Markov chain over opinion configurations of the network. In this model
+influence maximization amounts to ``converting'' the optimal subset of nodes to
+a given opinion at the initial time so as to maximize the number of converts
+after a given period of time. Remarkably, a simple analysis shows that under
+this model, the influence function $f$ is additive:
\begin{equation}\label{eq:voter}
\forall S\subseteq V,\; f(S) = \sum_{u\in S} w_u
\end{equation}
@@ -64,4 +77,6 @@ the transition matrix of the Markov chain. This observation led to the
development of fast algorithms for influence maximization under the voter
model~\cite{even-dar}.\newline
-\noindent \textbf{\textsc{NP}-Hardness.} In contrast to standard influence maximization, adaptive seeding is already \textsc{NP}-Hard even for the simplest influence functions. In the case when $f(S)=|S|$ and all probabilities equal one, the decision problem is whether given a budget $k$ and target value $\ell$ there exists a subset of $X$ of size $k-t$ which yields a solution with expected value of $\ell$ using $t$ nodes in $\mathcal{N}(X)$. This is equivalent to deciding whether there are $k-t$ nodes in $X$ that have $t$ neighbors in $\mathcal{N}(X)$. To see this is \textsc{NP}-hard, consider reducing from \textsc{Set-Cover} where there is one node $i$ for each input set $T_i$, $1\leq i\leq n$, with $\neigh{i}= T_i$ and integers $k,\ell$, and the output is ``yes'' if there is a family of $k$ sets in the input which cover at least $\ell$ elements, and ``no'' otherwise.
+\noindent \textbf{\textsc{NP}-Hardness.} In contrast to standard influence maximization, adaptive seeding is already \textsc{NP}-Hard even for the simplest influence functions such as $f(S) = |S|$ and when all probabilities are one. We discuss this in the full version of the paper~\cite{full}.
+%In the case when $f(S)=|S|$ and all probabilities equal one, the decision problem is whether given a budget $k$ and target value $\ell$ there exists a subset of $X$ of size $k-t$ which yields a solution with expected value of $\ell$ using $t$ nodes in $\mathcal{N}(X)$. It is easy to see that this problem is \textsc{NP}-hard by reduction from \textsc{Set-Cover}.
+%This is equivalent to deciding whether there are $k-t$ nodes in $X$ that have $t$ neighbors in $\mathcal{N}(X)$. To see this is \textsc{NP}-hard, consider reducing from \textsc{Set-Cover} where there is one node $i$ for each input set $T_i$, $1\leq i\leq n$, with $\neigh{i}= T_i$ and integers $k,\ell$, and the output is ``yes'' if there is a family of $k$ sets in the input which cover at least $\ell$ elements, and ``no'' otherwise.
diff --git a/paper/sections/performance.tex b/paper/sections/performance.tex
deleted file mode 100644
index 8b13789..0000000
--- a/paper/sections/performance.tex
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/paper/sections/related.tex b/paper/sections/related.tex
index b4200c1..439dd3a 100644
--- a/paper/sections/related.tex
+++ b/paper/sections/related.tex
@@ -1,9 +1,14 @@
-\subsection{Related work}
-Influence maximization was introduced by Domingos and Richardson~\cite{DR01, RD02}, and later formulated by Kempe, Kleinberg and Tardos~\cite{KKT03, KKT05}, and has been extensively studied since. Kempe, Kleinberg and Tardos~\cite{KKT03, KKT05} were able to cast this problem as a submodular optimization problem for many classes of influence models, hence allowing for algorithms with good approximations guarantees. Further refinements of the influence models and associated approximation guarantees can be found in \cite{MR07, C08}. In~\cite{even-dar}, the authors look at the special case of the voter model and design efficient algorithms in this setting.
-
-Our two-stage model for influence maximization is related to the field of stochastic optimization where problems are commonly solved using the \emph{sample average approximation} method \cite{SampleAverage}. In \cite{dean2004approximating, gupta2012approximation}, the authors use a notion of non-adaptive solutions related though not equivalent to ours.
+Influence maximization was introduced by Domingos and Richardson~\cite{DR01, RD02}, formulated by Kempe, Kleinberg and Tardos~\cite{KKT03, KKT05}, and has been extensively studied since~\cite{MR07, C08, LKGFVG07,MR07,C08,KDD11,borgs2012influence}.
+The main result in~\cite{KKT03, KKT05} is a characterization of influence processes as submodular functions, which implies good approximation guarantees for the influence maximization problem.
+%Kempe, Kleinberg and Tardos~\cite{KKT03, KKT05} were able to cast this problem as a submodular optimization problem for many classes of influence models, hence allowing for algorithms with good approximations guarantees.
+%Further refinements of the influence models and associated approximation guarantees can be found in \cite{MR07, C08}.
+In~\cite{even-dar}, the authors look at the special case of the voter model and design efficient algorithms in this setting.
+Our two-stage model for influence maximization is related to the field of stochastic optimization where problems are commonly solved using the \emph{sample average approximation} method~\cite{SampleAverage}. %In \cite{dean2004approximating, gupta2012approximation}, the authors use a different notion of non-adaptive solutions.
%The problem we study is the same as in \cite{singer} but our goals are different.
%Our main goal here is to obtain scalable algorithms that can be applied on the datasets we collected.
+%Other models of adaptive optimization have been previously studied in the context of influence maximization.
+%In \cite{asadpour2008stochastic}, the authors study a stochastic sequential submodular maximization problem where at each step an element is chosen, its realization is revealed and the next decision is made.
+%Golovin and Krause \cite{golovin2011adaptive} study a similar model and apply it to a multi-stage influence maximization problem. We note that contrary to our model, the decision made at a given stage does not affect the following stages as the entire set of nodes is available as potential seeds at every stage.
+Golovin and Krause \cite{golovin2011adaptive} study a stochastic sequential submodular maximization problem where at each step an element is chosen, its realization is revealed and the next decision is made. We note that contrary to adaptive seeding, the decision made at a given stage does not affect the following stages as the entire set of nodes is available as potential seeds at every stage.
-Other models of adaptive optimization have been previously studied in the context of influence maximization. In \cite{asadpour2008stochastic}, the authors study a stochastic sequential submodular maximization problem where at each step an element is chosen, its realization is revealed and the next decision is made. Golovin and Krause \cite{golovin2011adaptive} study a similar model and apply it to a multi-stage influence maximization problem. We note that contrary to our model, the decision made at a given stage does not affect the following stages as the entire set of nodes is available as potential seeds at every stage.