1 files changed, 113 insertions, 395 deletions
diff --git a/paper/sections/algorithms.tex b/paper/sections/algorithms.tex
index e59de2a..810653f 100644
--- a/paper/sections/algorithms.tex
+++ b/paper/sections/algorithms.tex
@@ -1,219 +1,22 @@
-%In our context, a non-adaptive policy is simply a policy that makes all its decision before the realizations occur.  That is, before the algorithm sees which neighbors arrive, it decides on how to allocate its seeding budget.  This is in contrast to the optimal adaptive policy which waits for nodes to arrive.
-%
-
-We start by introducing \emph{non-adaptive policies} for the adaptive seeding problem. These policies are used as a tool to obtain adaptive solutions as will be discussed in Sections~\ref{sec:gap} and~\ref{sec:round}.
-We will say that a policy is non-adaptive if it selects a set of nodes $S
-\subseteq X$ to be seeded in the first stage and a vector of probabilities
-$\mathbf{q}\in[0,1]^n$, s.t.  each neighbor $u$ of $S$ which realizes
-is included in the solution independently with probability $q_u$.  The
-constraint will now be that the budget is only respected in expectation, i.e.
-$|S| + \textbf{p}^T\textbf{q} \leq k$. Formally the optimization problem for non-adaptive policies can be written as:
-\begin{equation}\label{eq:relaxed}
-    \begin{split}
-    %&\max_{}
-    \max_{\substack{S\subseteq X\\q\in[0,1]^n} }& \;
-    \sum_{R\subseteq\neigh{X}} \Big (\prod_{u\in R} p_uq_u\prod_{u\in\neigh{X}\setminus
-    R}(1-p_uq_u) \Big )
- f(R)\\
-    \text{s.t. } & \; |S|+\textbf{p}^T\textbf{q} \leq k,\;
-q_u \leq \mathbf{1}_{\{u\in\neigh{S}\}}
-\end{split}
-\end{equation}
-
-%$\textbf{p}\odot \textbf{q}$ denotes componentwise multiplication and $\textbf{q}_R$ denotes the positive entries of $\textbf{q}$ on nodes in $R$: 
-%
-%\begin{displaymath}
-%    \textbf{p}\circ \textbf{q}_R = \prod_{u\in R} p_uq_u\prod_{u\in\neigh{X}\setminus
-%    R}(1-p_uq_u)
-%\end{displaymath}
-%
-%\begin{displaymath}
-%    \Pr[R \ |\ \textbf{p}\odot \textbf{q} ] = \prod_{u\in R} p_uq_u\prod_{u\in\neigh{X}\setminus
-%    R}(1-p_uq_u)
-%\end{displaymath}
-
-Note that because of the condition $q_u\leq \mathbf{1}_{\{u\in\neigh{S}\}}$,
-the summand associated with $R$ in \eqref{eq:relaxed} vanishes whenever $R$
-contains $u\in\neigh{X}\setminus\neigh{S}$. Hence, the summation is restricted
-to $R\subseteq\neigh{S}$ as in \eqref{eq:problem}.
-
-%The relaxed non-adaptive optimization \eqref{eq:relaxed} problem can be written
-%more concisely using the \emph{multi-linear extension} of $f$:
-%\begin{displaymath}
-%    \forall p\in[0,1]^m,\; F(p)
-%    \defeq\mathbb{E}_{p_R}\big[f(R)\big]
-%    =\sum_{R\subseteq\neigh{X}} p_R f(R)
-%\end{displaymath}
-%This \emph{extension by expectation} is known to present cross-convecity
-%properties which can exploited in maximization problems when $f$ is
-%a submodular function \cite{dughmi, vondrak}. Using this definition, \eqref{eq:relaxed}
-%can be re-written as:
-%\begin{equation}\label{eq:relaxed-multi}
-%    \begin{split}
-%    &\max_{S\subseteq X} \max_{q\in[0,1]^n}F(p\circ q)\\
-%    &\text{s.t. }|S|+\sum_{u\in\neigh{X}}p_uq_u \leq k,\;
-%q_u \leq \mathbf{1}_{\{u\in\neigh{S}\}}
-%\end{split}
-%\end{equation}
-%
-%When $f$ is an ìnfluence function from the triggering model, it has been proved
-%in \cite{singer} that adaptive seeding has a bounded \emph{adaptivity gap}:
-%denoting by $OPT$ the optimal solution of \eqref{eq:problem} and by $OPT_{NA}$
-%the optimal solution of \eqref{eq:relaxed}, we have $OPT\leq\alpha OPT_{NA}$.
-%This inequality justifies using non-adaptive policies to approximate solutions
-%to the adaptive seeding problem.
-
-\subsection{Adaptivity gap for additive functions}\label{sec:gap}
-
-We will now justify the use of non-adaptive strategies by showing that the optimal solution for this form of non-adaptive strategies yields a higher value than adaptive ones.
-For brevity, given a probability vector $\mathbf{p}$ we will write:
-\begin{equation}\label{eq:multi}
-        F(\textbf{p}) \defeq\mathbb{E}_{\mathbf{p}}\big[f(R)\big]
-        =\sum_{R\subseteq\neigh{X}}p_R f(R)
-\end{equation}
-as well as $\textbf{p}\circ \textbf{q}$ to denote the component-wise multiplication between vectors $\textbf{p}$ and $\textbf{q}$.  Finally, we will use $\mathcal{F}_{A} := \{S \subseteq X : |S|\leq k\}$, and $\mathcal{F}_{NA} :=\{(S,\textbf{q}), |S|+\textbf{p}^T\textbf{q} \leq k, q_u \leq \mathbf{1}_{\{u\in\neigh{S}\}}\}$ to denote the feasible regions of the adaptive and non adaptive problems, respectively.
-
-%this form of non-adap
-%We already know that the adaptivity gap is bounded for a general class of
-%influence function. For adaptive functions, we get a stronger result:
-%\emph{relaxed-non adaptive policies are stronger than non-adaptive policies}.
-%
-%
-\begin{proposition}\label{prop:gap}
-For additive functions given by \eqref{eq:voter}, non-adaptive
-    policies are stronger than adaptive policies:
-    \begin{displaymath}
-        \begin{aligned}[t]
-            &\max_{S\subseteq X} \sum_{R\subseteq\neigh{S}} p_R
-            \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)\\
-            &\text{s.t. }S \in \mathcal{F}_{A}
-        \end{aligned}
-        \leq
-        \begin{aligned}[t]
-            &\max_{\substack{S\subseteq X\\q\in[0,1]^n}}
-            F(\mathbf{p}\circ\mathbf{q})\\
-            &\text{s.t. } (S,q) \in \mathcal{F}_{NA}
-                    \end{aligned}
-    \end{displaymath}
-\end{proposition}
-
-%\begin{proposition}\label{prop:gap}
-%Let $\mathcal{F}_{A} := \{T \subseteq X : |T|\leq k\}$, $\mathcal{F}_{NA} :=\{(T,\textbf{q}), |S|+\textbf{p}^T\textbf{q} \leq k, q_u \leq \mathbf{1}_{\{u\in\neigh{S}\}}\}$.
-%For additive functions of the form given by \eqref{eq:voter}, non-adaptive
-%    policies are stronger than adaptive policies:
-%    \begin{displaymath}
-%        \begin{aligned}[t]
-%            &\max_{S\subseteq X} \sum_{R\subseteq\neigh{S}} p_R
-%            \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)\\
-%            &\text{s.t. }|S|\leq k
-%        \end{aligned}
-%        \leq
-%        \begin{aligned}[t]
-%            &\max_{S\subseteq X} \max_{q\in[0,1]^n}F(p\circ q)\\
-%            &\text{s.t. }|S|+\sum_{u\in\neigh{X}}p_uq_u \leq k,\;
-%            q_u \leq \mathbf{1}_{\{u\in\neigh{S}\}}
-%        \end{aligned}
-%    \end{displaymath}
-%\end{proposition}
-
-\begin{proof}
-    We will first show that the optimal adaptive policy can be interpreted as
-    a non-adaptive policy. Let $S$ be the optimal adaptive
-    solution and define $\delta_R:\neigh{X}\mapsto \{0,1\}$:
-    \begin{displaymath}
-        \delta_R(u) \defeq \begin{cases}
-            1&\text{if } u\in\argmax\big\{f(T);\; T\subseteq R,\; |T|\leq
-        k-|S|\big\} \\
-            0&\text{otherwise}
-        \end{cases},
-    \end{displaymath}
-    one can write
-    \begin{displaymath}
-    \begin{split}
-        \sum_{R\subseteq\neigh{S}} p_R
-        \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)
-        &=
-        \sum_{R\subseteq\neigh{S}} p_R
-        \sum_{u\in\neigh{X}}\delta_R(u)w_u\\
-        &=
-        \sum_{u\in\neigh{X}}w_u\sum_{R\subseteq\neigh{S}}p_R\delta_R(u).
-    \end{split}
-    \end{displaymath}
-
-    Let us now define for $u\in\neigh{X}$:
-    \begin{displaymath}
-        q_u \defeq \begin{cases}
-            \sum_{R\subseteq\neigh{S}}\frac{p_R}{p_u}\delta_R(u)
-            &\text{if }p_u\neq 0\\
-            0&\text{otherwise}
-        \end{cases}.
-    \end{displaymath}
-    This allows us to write:
-    \begin{displaymath}
-            \sum_{R\subseteq\neigh{S}} p_R
-            \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)
-            = \sum_{u\in\neigh{X}}p_uq_uw_u = F(\mathbf{p}\circ\mathbf{q})
-    \end{displaymath}
-    where the last equality is obtained from \eqref{eq:multi} by successively using the linearity of the expectation and the linearity of $f$.
-
-    Furthermore, observe that $q_u\in[0,1]$, $q_u=0$ if $u\notin\neigh{S}$ and:
-    \begin{displaymath}
-        \begin{split}
-        |S|+\sum_{u\in\neigh{X}}p_uq_u
-        &= |S|+\sum_{R\subseteq\neigh{S}}p_R\sum_{u\in\neigh{X}}\delta_R(u)\\
-        &\leq |S| + \sum_{R\subseteq\neigh{S}}p_R(k-|S|)\leq k
-    \end{split}
-    \end{displaymath}
-
-    Hence, $(S,\mathbf{q})\in\mathcal{F}_{NA}$. In other words, we have written the optimal adaptive solution as a relaxed
-    non-adaptive solution. This conclude the proof of the proposition.
-\end{proof}
-
-\subsection{From non-adaptive to adaptive solutions}\label{sec:round}
-From the above proposition we now know that optimal non-adaptive solutions have
-higher values than adaptive solutions.  A priori, this does not imply that non-adaptive solutions are good adaptive solutions. More precisely, a non-adaptive solution is a pair $(S, \mathbf{q})$ such that starting from $S$ on the first stage and sampling nodes with probability $\mathbf{q}$ on the second stage will result in a solution which is as good as $\mathrm{A}(S)$ in expectation, where $\mathrm{A}$ denotes the objective function of the adaptive problem \eqref{eq:relaxed}. However, the set resulting from the sampling might exceed the budget on the second stage, hence preventing us from directly obtaining a feasible adaptive solution.
-
-Fortunately, the probability of exceeding the budget is small enough and with high probability the sampled set will be feasible. This property is leveraged in \cite{vondrak} to design a randomized rounding method with approximation guarantees. This randomized roundings are called \emph{contention resolution schemes}. More precisely, we note that once the set $S$ is fixed, the feasibility constraint of problem~\eqref{eq:relaxed} becomes a single Knapsack constraint, for which \cite{vondrak} constructs a $(1-\varepsilon, 1-\varepsilon)$-balanced contention resolution scheme. Applying Theorem~1.3 of the same paper, this contention resolution scheme will compute from $\mathbf{q}$ a \emph{feasible} random set $I$, such that:
-\begin{equation}\label{eq:cr}
-\mathbb{E}\big[f(I)\big] 
-\geq (1-\varepsilon) F(\mathbf{q}) 
-\end{equation}
-
-Equation~\eqref{eq:cr} allows us to prove the following proposition, where $\mathrm{OPT}_{NA}$ denotes the optimal value of problem~\eqref{eq:relaxed} and $\mathrm{OPT}_A$ denotes the optimal value of problem~\eqref{eq:problem}.
-
-\begin{proposition}\label{prop:cr}
-    Let $(S,\textbf{q})$ be an $\alpha$-approximate solution to the non-adaptive problem \eqref{eq:relaxed}, then $\mathrm{A}(S) \geq \alpha \mathrm{OPT}_A$.
-\end{proposition}  
-
-\begin{proof}
-    Using the definition of $\mathrm{A}(S)$, one can write:
-    \begin{displaymath}
-        \mathrm{A}(S) = \sum_{R\subseteq\neigh{S}} p_R
-        \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)
-        \geq \sum_{R\subseteq\neigh{S}} p_R \mathbf{E}\big[f(I)\big]
-    \end{displaymath}
-    where the inequality comes from the fact that $I$ is a feasible random set: $|I|\leq k-|S|$, hence the expected value of $f(I)$ is bounded by the maximum of $f$ over feasible sets.
-
-Equation~\eqref{eq:cr} then implies:
-\begin{equation}\label{eq:tmp}
-    \mathrm{A}(S) 
-    \geq (1-\varepsilon)\sum_{R\subseteq\neigh{S}} p_R F(\mathbf{q})
-    = (1-\varepsilon)F(\mathbf{p}\circ\mathbf{q}).
-\end{equation}
-
-Equation~\eqref{eq:tmp} holds for any $\varepsilon\geq 0$. In particular, for $\varepsilon$ smaller than $\inf_{S\neq T} |A(S)-A(T)|$, we obtain that $\mathrm{A}(S)\geq F(\mathbf{p}\circ\mathbf{q})$. Note that such a $\varepsilon$ is at most polynomially small in the size of the instance.
-$(S, \mathbf{q})$ is an $\alpha$-approximate non adaptive solution, hence $F(\mathbf{p}\circ\mathbf{q}) \geq \alpha\mathrm{OPT}_{NA}$. We can then conclude by applying Proposition~\ref{prop:gap}. 
-\end{proof}
-
-Therefore the adaptive seeding problem reduces to the non-adaptive problem.  We will now discuss two approaches to construct non-adaptive solutions. The first is an LP-based approach, and the second is a combinatorial algorithm. Both approaches have the same $(1-1/e)$ approximation ratio, which is then translated to a $(1-1/e)$ approximation ratio for the adaptive seeding problem~\eqref{eq:problem} via Proposition~\ref{prop:cr}.
+Section~\ref{sec:adaptivity} shows that the adaptive seeding problem reduces to
+the non-adaptive problem.  We will now discuss two approaches to construct
+approximate non-adaptive solutions. The first is an LP-based approach, and the
+second is a combinatorial algorithm. Both approaches have the same $(1-1/e)$
+approximation ratio, which is then translated to a $(1-1/e)$ approximation
+ratio for the adaptive seeding problem~\eqref{eq:problem} via
+Proposition~\ref{prop:cr}. As we will show in Section~\ref{sec:experiments},
+both algorithms have their advantages and disadvantages in terms of
+scalability.
 
 \subsection{An LP-Based Approach}
-Note that due to linearity of expectation, for a linear function $f$ of the form given by \eqref{eq:voter} we have:
+\label{sec:lp}
+Note that due to linearity of expectation, for a linear function $f$ of the
+form given by \eqref{eq:voter} we have:
 \begin{equation}\label{eq:multi-voter}
     \begin{split}
         F(\textbf{p}) 
-        &=\mathbb{E}_{p_R}\big[f(R)\big]
-        =\mathbb{E}_{p_R}\Bigg[\sum_{u\in\neigh{X}}w_u\mathbf{1}_{\{u\in
+        &=\mathbb{E}_{R}\big[f(R)\big]
+        =\mathbb{E}_{R}\Bigg[\sum_{u\in\neigh{X}}w_u\mathbf{1}_{\{u\in
         R\}}\Bigg]\\
         &=\sum_{u\in\neigh{X}}w_u\mathbb{P}[u\in R]
         =\sum_{u\in\neigh{X}}p_uw_u
@@ -223,10 +26,10 @@ Note that due to linearity of expectation, for a linear function $f$ of the form
 Thus, the non-adaptive optimization problem \eqref{eq:relaxed} can be written as:
 \begin{displaymath}
     \begin{split}
-        \max_{\substack{S\subseteq X\\q\in[0,1]^n} } 
+        \max_{\substack{S\subseteq X\\\mathbf{q}\in[0,1]^n} } 
         & \sum_{u\in\neigh{X}}p_uq_uw_u\\
         \text{s.t. } & |S|+ \textbf{p}^T\textbf{q} \leq k,\;
-                     q_u \leq \mathbf{1}_{\{u\in\neigh{S}\}}
+                     q_u \leq \mathbf{1}\{u\in\neigh{S}\}
     \end{split}
 \end{displaymath}
 
@@ -235,49 +38,70 @@ $\lambda_v\in[0,1]$ for each $v\in X$. We obtain the following
 LP for the adaptive seeding problem:
 \begin{equation}\label{eq:lp}
     \begin{split}
-        \max_{\substack{q\in[0,1]^n\\\lambda\in[0,1]^m}}
+        \max_{\substack{\mathbf{q}\in[0,1]^n\\\boldsymbol\lambda\in[0,1]^m}}
         & \;\sum_{u\in\neigh{X}}p_uq_uw_u\\
         \text{s.t. } & \sum_{v\in X}\lambda_v+\textbf{p}^T\textbf{q} \leq k,\;
         q_u \leq \sum_{v\in\neigh{u}} \lambda_v
 \end{split}
 \end{equation}
 
-An optimal solution to the above problem can be found using standard LP-solvers, in polynomial time.  The solution returned by the LP is \emph{fractional}, and requires a rounding procedure to return a feasible solution to our problem, where $S$ is integral.  We briefly describe our approach which uses the \emph{pipage rounding} method.\newline 
-
-\noindent \textbf{Pipage Rounding.}
-The pipage rounding method~\cite{pipage} is a deterministic rounding method that can be applied to a variety of problems.  In particular, it can be applied to LP-relaxations of the \textsc{Max-K-Cover} problem where we are given a family of sets that cover elements of a universe and the goal is to find $k$ subsets whose union has the maximal cardinality.  The LP-relaxation is a fractional solution over subsets, and the pipage rounding procedure then rounds the allocation in linear time, and the integral solution is guaranteed to be within a factor of $(1-1/e)$ of the fractional solution. 
-We make the following key observation: for any given $\textbf{q}$, one can remove all elements in $\mathcal{N}(X)$ for which $q_{u}=0$, without changing the value of any solution $(\lambda,\textbf{q})$.
-Our rounding procedure can therefore be described as follows: given a solution $(\lambda,\textbf{q})$ we remove all nodes $u \in \mathcal{N}(X)$ for which $q_{u}=0$, which leaves us with a fractional solution to a (weighted) version of the \textsc{Max-K-Cover} problem where nodes in $X$ are the sets and the universe is the set of weighted nodes in $\mathcal{N}(X)$ that were not removed.  We can therefore apply pipage rounding and lose only a factor of $(1-1/e)$ in quality of the solution.
+An optimal solution to the above problem can be found in polynomial time using
+standard LP-solvers.  The solution returned by the LP is \emph{fractional}, and
+requires a rounding procedure to return a feasible solution to our problem,
+where $S$ is integral.  To round the solution we use the pipage rounding
+method~\cite{pipage}.  We defer the details to the full version of the paper~\cite{full}.
 
 \begin{lemma}
-    For \mbox{\textsc{AdaptiveSeeding-LP}} as defined in \eqref{eq:lp}, any fractional solution $(\lambda, \mathbf{q})\in[0,1]^m\times[0,1]^n$ can be rounded to an integral solution $\bar{\lambda} \in \{0,1\}^{m}$ s.t. $(1-1/e) F(\mathbf{p}\circ\mathbf{q}) \leq A(\bar{\lambda})$ in $O(m + n)$ steps.
+    For \mbox{\textsc{AdaptiveSeeding-LP}} defined in \eqref{eq:lp}, any fractional solution $(\boldsymbol\lambda, \mathbf{q})\in[0,1]^m\times[0,1]^n$ can be rounded to an integral solution $\bar{\boldsymbol\lambda} \in \{0,1\}^{m}$ s.t. $(1-1/e) F(\mathbf{p}\circ\mathbf{q}) \leq A(\bar{\lambda})$ in $O(m + n)$ steps.
 \end{lemma}
 
 \subsection{A Combinatorial Algorithm}
-In this section, we will introduce a combinatorial algorithm with an identical approximation guarantee to the LP-based approach.  The main idea is to reduce the problem to a monotone submodular maximization problem and apply a variant of the celebrated greedy algorithm~\cite{nemhauser}.
-This property is quite a remarkable feature of linear influence models; for influence models such as independent cascade and linear threshold, the adaptive seeding problem does not reduce to submodular maximization, and a completely different approach is required~\cite{singer}. In contrast with standard influence maximization, the submodularity of the non-adaptive seeding problem is not simply a consequence of properties of the influence function. It also strongly relies on the combinatorial structure of the two stage optimization. 
+\label{sec:comb}
+
+In this section, we introduce a combinatorial algorithm with an identical
+approximation guarantee to the LP-based approach. However, its running time,
+stated in Proposition~\ref{prop:running_time} can be better than the one given
+by LP solvers depending on the relative sizes of the budget and the number of
+nodes in the graph. Furthermore, as we discuss at the end of this section, this
+algorithm is amenable to parallelization. 
+
+The main idea is to reduce the problem to a monotone submodular maximization
+problem and apply a variant of the celebrated greedy
+algorithm~\cite{nemhauser}.  
+%This property is quite a remarkable feature of
+%linear influence models; for influence models such as independent cascade and
+%linear threshold, the adaptive seeding problem does not reduce to submodular
+%maximization, and a completely different approach is required~\cite{singer}. 
+In contrast to standard influence maximization, the submodularity of the
+non-adaptive seeding problem is not simply a consequence of properties of the
+influence function; it also strongly relies on the combinatorial structure of
+the two-stage optimization. 
 
-Intuitively, we can think of our problem as trying to find a set $S$ in the first stage, for which the nodes that can be seeded on the second stage have the largest possible value.  To formalize this, for a budget $b\in\reals^+$ used in the second stage and a set of neighbors $T\subseteq\mathcal{N}(X)$, we will use 
-$\mathcal{O}(T,b)$ to denote the solution to:
+Intuitively, we can think of our problem as trying to find a set $S$ in the
+first stage, for which the nodes that can be seeded on the second stage have
+the largest possible value.  To formalize this, for a budget $b\in\reals^+$
+used in the second stage and a set of neighbors $T\subseteq\mathcal{N}(X)$, we
+will use $\mathcal{O}(T,b)$ to denote the solution to:
 \begin{equation}\label{eq:knap}
     \begin{split}
         \mathcal{O}(T,b)\defeq
-        \max_{q\in[0,1]^n} & \sum_{u\in\neigh{X}} p_uq_uw_u\\
-                            \text{s.t. } & \mathbf{p}^T\mathbf{q}\leq b\text{ and } q_u=0\text{ if
-    }u\notin T
+        \max_{\textbf{q}\in[0,1]^n} & \sum_{u\in\neigh{X} \cap T} p_uq_uw_u\\
+                            \text{s.t. } & \mathbf{p}^T\mathbf{q}\leq b
+                            %\text{ and } q_u=0\text{ if}u\notin T
 \end{split}
 \end{equation}
 
 The optimization problem \eqref{eq:relaxed} for
 non-adaptive policies can now be written as:
 \begin{equation}\label{eq:sub}
-    \begin{split}
-        \max_{S\subseteq X} &\; \mathcal{O}\big(\neigh{S},k-|S|\big)\\
-        \text{s.t. } & |S|\leq k
-    \end{split}
+        \max_{S\subseteq X} \; \mathcal{O}\big(\neigh{S},k-|S|\big)
+        \quad \text{s.t. } |S|\leq k
 \end{equation}
 
-We start by proving in Proposition~\ref{prop:sub} that for fixed $t$, $\mathcal{O}(\neigh{\cdot}, t)$ is submodular. This proposition relies on lemmas~\ref{lemma:nd} and~\ref{lemma:sub} about the properties of $\mathcal{O}(T,b)$ first seen as a function $b$, then as a function of $T$.
+We start by proving in Proposition~\ref{prop:sub} that for fixed $t$,
+$\mathcal{O}(\neigh{\cdot}, t)$ is submodular. This proposition relies on
+lemmas~\ref{lemma:nd} and~\ref{lemma:sub} about the properties of
+$\mathcal{O}(T,b)$.
 
 \begin{lemma}\label{lemma:nd}
     Let $T \subseteq \mathcal{N}(X)$ and $x \in \mathcal{N}(X)$, then
@@ -285,169 +109,51 @@ We start by proving in Proposition~\ref{prop:sub} that for fixed $t$, $\mathcal{
     non-decreasing in $b$.
 \end{lemma}
 
-\begin{proof}
-\emph{W.l.o.g} we can rename and order the pairs in $T$ so that $w_1\geq w_2\geq\ldots\geq w_m $.
-Then, $\mathcal{O}(T,b)$ has the following simple piecewise linear expression:
-\begin{displaymath}\label{eq:pw}
-    \mathcal{O}(T,b) = 
-    \begin{cases}
-        b w_1&\text{if }0\leq b<p_1\\
-        \displaystyle
-        \sum_{k=1}^{i-1}p_k(w_k-w_i)
-        + b w_i
-        &\displaystyle\text{if } 0\leq b - \sum_{k=1}^{i-1}p_k< p_i\\
-        \displaystyle
-        \sum_{k=1}^m p_kw_k
-        &\displaystyle\text{if } b\geq\sum_{i=1}^m p_k
-
-    \end{cases}
-\end{displaymath}
-
-Let us define for $t\in\reals^+$, $n(t)\defeq \inf\Big\{i\text{ s.t.
-} \sum_{k=1}^i p_k > t\Big \}$ with $n(t)=+\infty$ when the set is empty. In
-particular, note that $x\mapsto n(t)$ is non-decreasing. Denoting
-$\partial_+\mathcal{O}_T$ the right derivative of $\mathcal{O}(T,\cdot)$, one
-can write $\partial_+\mathcal{O}_T(t)=w_{n(t)}$, with the convention that
-$w_\infty = 0$.
-
-Writing $i \defeq \sup\Big\{j\text{ s.t.
-} w_j\geq w_x\Big\}$, it is easy to see that
-$\partial_+\mathcal{O}_{T\cup\{x\}}\geq\partial_+\mathcal{O}_T$. Indeed:
-\begin{enumerate}
-    \item if $n(t)\leq i$ then $\partial_+\mathcal{O}_{T\cup\{x\}}(t)
-        = \partial_+\mathcal{O}_T(t)= w_{n(t)}$.
-    \item if $n(t)\geq i+1$ and $n(t-c)\leq i$ then $\partial_+\mathcal{O}_{T\cup\{x\}}(t)
-        = w_x\geq w_{n(t)}= \partial_+\mathcal{O}_T(t)$.
-    \item if $n(t-c)\geq i+1$, then $\partial_+\mathcal{O}_{T\cup\{x\}}
-        = w_{n(t-c)}\geq w_{n(t)}=\partial_+\mathcal{O}_T(t)$.
-\end{enumerate}
-
-Let us now consider $b$ and $c$ such that $b\leq c$. Then, using the integral
-representation of $\mathcal{O}(T\cup\{x\},\cdot)$ and $\mathcal{O}(T,\cdot)$, we get:
+The proof of this lemma can be found in the full version of the paper~\cite{full}. The main
+idea consists in writing:
 \begin{multline*}
-    \mathcal{O}(T\cup\{x\},c)-\mathcal{O}(T\cup\{x\},b)=\int_b^c\partial_+\mathcal{O}_{T\cup\{x\}}(t)dt\\
-    \geq\int_b^c\partial_+\mathcal{O}_T(t)dt=\mathcal{O}(T,c)-\mathcal{O}(T,b)
+    \mathcal{O}(T\cup\{x\},c)-\mathcal{O}(T\cup\{x\},b)=\int_b^c\partial_+\mathcal{O}_{T\cup\{x\}}(t)dt
+\end{multline*}
+where $\partial_+\mathcal{O}_T$ denotes the right derivative of
+$\mathcal{O}(T,\cdot)$. For a fixed $T$ and $b$, $\mathcal{O}(T,b)$ defines
+a fractional Knapsack problem over the set $T$. Knowing the form of the optimal
+fractional solution, we can verify that
+$\partial_+\mathcal{O}_{T\cup\{x\}}\geq\partial_+\mathcal{O}_T$ and obtain:
+\begin{multline*}
+    \mathcal{O}(T\cup\{x\},c)-\mathcal{O}(T\cup\{x\},b)\geq 
+    \mathcal{O}(T,c)-\mathcal{O}(T,b)
 \end{multline*}
-Re-ordering the terms, $\mathcal{O}(T\cup\{x\},c)-\mathcal{O}(T,c)\geq
-\mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)$
-which concludes the proof of the lemma.
-\end{proof}
 
 \begin{lemma}\label{lemma:sub}
     For any $b\in\reals^+$, $\mathcal{O}(T,b)$ is submodular in $T$, $T\subseteq\neigh{X}$.
 \end{lemma}
 
-\begin{proof}
-    Let $T\subseteq\neigh{X}$ and $x, y\in\neigh{X}\setminus T$. Using the
-    second-order characterization of submodular functions, it suffices to show
-    that:
-    \begin{displaymath}\label{eq:so}
+The proof of this lemma is more technical. For $T\subseteq\neigh{X}$ and $x,
+y\in\neigh{X}\setminus T$, we need to show that:
+    \begin{displaymath}
         \mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)\geq
         \mathcal{O}(T\cup\{y, x\},b)-\mathcal{O}(T\cup\{y\},b)
     \end{displaymath}
+    This can be done by partitioning the set $T$ into ``high value
+    items'' (those with weight greater than $w_x$) and ``low value items'' and
+    carefully applying Lemma~\ref{lemma:nd} to the associated subproblems.
+    The proof is in the full version of the paper~\cite{full}.
 
-    We distinguish two cases based on the relative position of $w_x$ and $w_y$.
-    The following notations will be useful: $S_T^x \defeq \big\{u\in
-    T\text{ s.t.  }w_x\leq w_u\big\}$ and $P_T^x\defeq
-    T\setminus S_T^x$.
-
-    \textbf{Case 1:} If $w_y\geq w_x$, then one can
-    write:
-    \begin{gather*}
-        \mathcal{O}(T\cup\{y,x\},b) = \mathcal{O}(P_T^y\cup\{y\},b_1)+
-        \mathcal{O}(S_T^y\cup\{x\},b_2)\\
-        \mathcal{O}(T\cup\{y\},b) = \mathcal{O}(P_T^y\cup\{y\},b_1)
-        + \mathcal{O}(S_T^y,b_2)
-    \end{gather*}
-    where $b_1$ is the fraction of the budget $b$ spent on $P_T^y\cup\{y\}$ and
-    $b_2=b-b_1$.
-    
-    Similarly:
-    \begin{gather*}
-        \mathcal{O}(T\cup\{x\},b) = \mathcal{O}(P_T^y, c_1) + \mathcal{O}(S_T^y\cup\{x\},c_2)\\
-        \mathcal{O}(T, b) = \mathcal{O}(P_T^y, c_1) + \mathcal{O}(S_T^y,c_2)
-    \end{gather*}
-    where $c_1$ is the fraction of the budget $b$ spent on $P_T^y$ and $c_2
-    = b - c_1$. 
-
-    Note that $b_1\geq c_1$: an optimal solution will first spent as much
-    budget as possible on $P_T^y\cup\{y\}$ before adding elements in
-    $S_T^y\cup\{x\}$.
-
-    In this case:
-    \begin{displaymath}
-        \begin{split}
-            \mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)&=
-        \mathcal{O}(S_T^y\cup\{x\},c_2)+\mathcal{O}(S_T^y,c_2)\\
-        &\geq \mathcal{O}(S_T^y\cup\{x\},b_2)+\mathcal{O}(S_T^y,b_2)\\
-        & = \mathcal{O}(T\cup\{y, x\},b)-\mathcal{O}(T\cup\{y\},b)
-    \end{split}
-    \end{displaymath}
-    where the inequality comes from Lemma~\ref{lemma:nd} and 
-    $c_2\geq b_2$.
-
-    \textbf{Case 2:} If $w_x > w_y$, we now decompose
-    the solution on $P_T^x$ and $S_T^x$:
-    \begin{gather*}
-        \mathcal{O}(T\cup\{x\},b) = \mathcal{O}(P_T^x\cup\{x\},b_1)
-        + \mathcal{O}(S_T^x,b_2)\\
-        \mathcal{O}(T,b) = \mathcal{O}(P_T^x,c_1)+\mathcal{O}(S_T^x,c_2)\\
-        %\intertext{and}
-        \mathcal{O}(T\cup\{y, x\},b) = \mathcal{O}(P_T^x\cup\{x\},b_1)
-        + \mathcal{O}(S_T^x\cup\{y\},b_2)\\
-        \mathcal{O}(T\cup\{y\},b) = \mathcal{O}(P_T^x,c_1)+\mathcal{O}(S_T^x\cup\{y\},c_2)
-    \end{gather*}
-    with $b_1+b_2=b$, $c_1+c_2=b$ and $b_2\leq c_2$. 
-
-    In this case again:
-        \begin{multline*}
-            \mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)=
-        \mathcal{O}(S_T^x,b_2)-\mathcal{O}(S_T^x,c_2)\\
-        \geq \mathcal{O}(S_T^x\cup\{y\},b_2)-\mathcal{O}(S_T^x\cup\{y\},c_2)\\
-        = \mathcal{O}(T\cup\{y, x\},b)-\mathcal{O}(T\cup\{y\},b)
-    \end{multline*}
-    where the inequality uses Lemma~\ref{lemma:nd} and $c_2\geq b_2$.
-
-    In both cases, we were able to obtain the second-order characterization of submodularity. This concludes the proof of the lemma.
-\end{proof}
+Finally, Lemma~\ref{lemma:sub} can be used to show Proposition~\ref{prop:sub}
+whose proof can be found in the full version~\cite{full}.
 
 \begin{proposition}\label{prop:sub}
-    Let $b\in\mathbf{R}^+$, then $\mathcal{O}(\neigh{S},b)$ is submodular in $S$, $S\subseteq X$.
+    Let $b\in\mathbf{R}^+$, then $\mathcal{O}(\neigh{S},b)$ is monotone and
+    submodular in $S$, $S\subseteq X$.
 \end{proposition}
 
-\begin{proof}
-    Let us consider $S$ and $T$ such that $S\subseteq T\subseteq X$ and $x\in
-    X\setminus T$. In particular, note that $\neigh{S}\subseteq\neigh{T}$. 
-
-    Let us write $\neigh{S\cup\{x\}}=\neigh{S}\cup R$ with $\neigh{S}\cap
-    R=\emptyset$ and similarly, $\neigh{T\cup\{x\}}=\neigh{T}\cup R'$ with
-    $\neigh{T}\cap R'=\emptyset$. It is clear that $R'\subseteq R$. Writing $R'=\{u_1,\ldots,u_k\}$:
-    \begin{multline*}
-            \mathcal{O}(\neigh{T\cup\{x\}},b)- \mathcal{O}(\neigh{T},b)\\
-            =\sum_{i=1}^k\mathcal{O}(\neigh{T}\cup\{u_1,\ldots u_i\},b)
-            -\mathcal{O}(\neigh{T}\cup\{u_1,\ldots u_{i-1}\},b)\\
-            \leq \sum_{i=1}^k\mathcal{O}(\neigh{S}\cup\{u_1,\ldots u_i\},b)
-            -\mathcal{O}(\neigh{S}\cup\{u_1,\ldots u_{i-1}\},b)\\
-            =\mathcal{O}(\neigh{S}\cup R',b)-\mathcal{O}(\neigh{S},b)
-    \end{multline*}
-    where the inequality comes from the submodularity of $\mathcal{O}(\cdot,b)$ proved in Lemma~\ref{lemma:sub}. This same function is also obviously set-increasing, hence:
-    \begin{multline*}
-            \mathcal{O}(\neigh{S}\cup R',b)-\mathcal{O}(\neigh{S},b)\\
-            \leq \mathcal{O}(\neigh{S}\cup R,b)-\mathcal{O}(\neigh{S},b)\\
-            =\mathcal{O}(\neigh{S\cup\{x\}},b)-\mathcal{O}(\neigh{S},b)
-    \end{multline*}
-    This concludes the proof of the proposition.
-\end{proof}
-
 We can now use Proposition~\ref{prop:sub} to reduce \eqref{eq:sub} to a monotone submodular maximization problem. First, we note that~\eqref{eq:sub} can be rewritten:
 \begin{equation}\label{eq:sub-mod}
-    \begin{split}
-        &\max_{t}\max_{S\subseteq X} \; \mathcal{O}\big(\neigh{S},t\big)\\
-        &\text{s.t. } |S| + t\leq k
-    \end{split}
+    \max_{\substack{S\subseteq X\\ t \in \mathbb{N}}} \; \mathcal{O}\big(\neigh{S},t\big)
+        \quad\text{s.t. } |S| + t\leq k
 \end{equation}
 
-Intuitively, we fix $t$ arbitrarily so that the inner $\max$ in \eqref{eq:sub-mod} becomes a submodular maximization problem with fixed budget $t$. We then optimize over the value of $t$. Combining this observation with the greedy algorithm for monotone submodular maximization~\cite{nemhauser}, we obtain Algorithm~\ref{alg:comb}, whose performance guarantee is summarized in Proposition~\ref{prop:main_result}.
+Intuitively, we fix $t$ arbitrarily so that the maximization above becomes a submodular maximization problem with fixed budget $t$. We then optimize over the value of $t$. Combining this observation with the greedy algorithm for monotone submodular maximization~\cite{nemhauser}, we obtain Algorithm~\ref{alg:comb}, whose performance guarantee is summarized in Proposition~\ref{prop:main_result}.
 
 \begin{algorithm}
     \caption{Combinatorial algorithm}
@@ -477,32 +183,44 @@ Intuitively, we fix $t$ arbitrarily so that the inner $\max$ in \eqref{eq:sub-mo
     stage. Then $\mathrm{A}(S) \geq (1-1/e)\mathrm{OPT}_A$.
 \end{proposition}
 
-\begin{proof}
-    We simply note that the content of the outer \textsf{for} loop on line 2 of Algorithm~\ref{alg:comb} is the greedy submodular maximization algorithm of \cite{nemhauser}. Since $\mathcal{O}(\neigh{\cdot}, t)$ is submodular (Proposition~\ref{prop:sub}), this solves the inner $\max$ in \eqref{eq:sub-mod} with an approximation ratio of $(1-1/e)$. The outer \textsf{for} loop then computes the outer $\max$ of \eqref{eq:sub-mod}.
 
-    As a consequence, Algorithm~\ref{alg:comb} computes a $(1-1/e)$-approximate non-adaptive solution. We conclude by applying Proposition~\ref{prop:cr}.
-\end{proof}
-\subsection{Running time}
-The algorithm described above considers all possible ways to split the seeding budget between the first and the second stage.  For each possible split $\{(t,k-t)\}_{t=1\ldots,k-1}$, the algorithm computes an approximation to the optimal non adaptive solution that uses $k-t$ nodes in the first stage and $t$ nodes in the second stage, and returns the solution for the split with the highest value (breaking ties arbitrarily).  This process can be trivially parallelized across $k-1$ machines, each performing a computation of a single split.  
+%\subsubsection{Scalability}
+
+\noindent\textbf{Parallelization.}  The algorithm described above considers all
+possible ways to split the seeding budget between the first and the second
+stage.  For each possible split $\{(t,k-t)\}_{t=1\ldots,k-1}$, the algorithm
+computes an approximation to the optimal non adaptive solution that uses $k-t$
+nodes in the first stage and $t$ nodes in the second stage, and returns the
+solution for the split with the highest value (breaking ties arbitrarily).
+This process can be trivially parallelized across $k-1$ machines, each
+performing a computation of a single split.  With slightly more effort, for any
+$\epsilon>0$ one can parallelize over $\log_{1+\epsilon}n$ machines at the cost
+of losing a factor of $\epsilon$ in the approximation guarantee (see full
+version of the paper~\cite{full} for details).\newline
+
+\noindent \textbf{Implementation in MapReduce.} While the previous paragraph
+describes how to parallelize the outer \texttt{for} loop of
+Algorithm~\ref{alg:comb}, we note that its inner loop can also be parallelized
+in the MapReduce framework. Indeed, it corresponds to the greedy algorithm
+applied to the function $\mathcal{O}\left(\neigh{\cdot}, t\right)$. The
+\textsc{Sample\&Prune} approach successfully applied in \cite{mr} to obtain
+MapReduce algorithms for various submodular maximizations can also be applied
+to Algorithm~\ref{alg:comb} to cast it in the MapReduce framework. The details
+of the algorithm can be found in the full version of the paper~\cite{full}.
+\newline
 
-A slightly more sophisticated approach is to consider only $\log n$ splits: $(1,k-1),(2,k-2),\ldots,(2^{\lfloor \log n \rfloor},1)$ and then select the best solution from this set.  It is not hard to see that in comparison to the previous approach, this would reduce the approximation guarantee by a factor of at most 2: if the optimal solution is obtained by spending $t$ on the first stage and $k-t$ in the second stage, then since $t \leq 2\cdot2^{\lfloor \log t \rfloor}$ the solution computed for $(2^{\lfloor \log t \rfloor}, k - 2^{\lfloor \log t \rfloor})$ will have at least half that value.  
-More generally, for any $\epsilon>0$ one can parallelize over $\log_{1+\epsilon}n$ machines at the cost of losing a factor of $(1+\epsilon)$ in the approximation guarantee.
+%A slightly more sophisticated approach is to consider only $\log n$ splits: $(1,k-1),(2,k-2),\ldots,(2^{\lfloor \log n \rfloor},1)$ and then select the best solution from this set.  It is not hard to see that in comparison to the previous approach, this would reduce the approximation guarantee by a factor of at most 2: if the optimal solution is obtained by spending $t$ on the first stage and $k-t$ in the second stage, then since $t \leq 2\cdot2^{\lfloor \log t \rfloor}$ the solution computed for $(2^{\lfloor \log t \rfloor}, k - 2^{\lfloor \log t \rfloor})$ will have at least half that value.  
+%More generally, for any $\epsilon>0$ one can parallelize over $\log_{1+\epsilon}n$ machines at the cost of losing a factor of $(1+\epsilon)$ in the approximation guarantee.
 
 
-To implement Algorithm~\ref{alg:comb} efficiently, the computation of the $\argmax$ on line 5 must be dealt with carefully. $\mathcal{O}(\neigh{S_t\cup\{x\}},t)$ is the optimal solution to the fractional Knapsack problem~\eqref{eq:knap} with budget $t$ and can be computed in time $\min(\frac{t}{p_\text{min}},n)$ by iterating over the list of nodes in $\neigh{S_t\cup\{x\}}$ by decreasing order of the degrees. This decreasing order of $\neigh{S_t}$ can be maintained throughout the greedy construction of $S_t$ by:
+\noindent \textbf{Algorithmic speedups.}  To implement Algorithm~\ref{alg:comb} efficiently, the computation of the $\argmax$ on line 5 must be dealt with carefully. $\mathcal{O}(\neigh{S_t\cup\{x\}},t)$ is the optimal solution to the fractional Knapsack problem~\eqref{eq:knap} with budget $t$ and can be computed in time $\min(\frac{t}{p_\text{min}},n)$ by iterating over the list of nodes in $\neigh{S_t\cup\{x\}}$ in decreasing order of the degrees. This decreasing order of $\neigh{S_t}$ can be maintained throughout the greedy construction of $S_t$ by:
 \begin{itemize}
-\item ordering the list of neighbors of nodes in $X$ by decreasing order of the degrees when initially constructing the graph. This is responsible for the $n\log n$ additive factor.
+    \item ordering the list of neighbors of nodes in $X$ by decreasing order of the degrees when initially constructing the graph. This is responsible for a $O(n\log n)$ pre-processing time.
 \item when adding node $x$ to $S_t$, observe that $\neigh{S_t\cup\{x\}} = \neigh{S_t}\cup\neigh{\{x\}}$. Hence, if $\neigh{S_t}$ and $\neigh{\{x\}}$ are sorted lists, then $\mathcal{O}(\neigh{S_t\cup\{x\}},t)$ can be computed in a single iteration of length $\min(\frac{t}{p_\text{min}},n)$ where the two sorted lists are merged on the fly.
 \end{itemize}
 As a consequence, the running time of line 5 is bounded from above by $m\min(\frac{t}{p_\text{min}},n)$. The two nested \textsf{for} loops are responsible for the additional $k^2$ factor. The running time of Algorithm~\ref{alg:comb} is summarized in Proposition~\ref{prop:running_time}.
 
 \begin{proposition}\label{prop:running_time}
-    Algorithm~\ref{alg:comb} runs in time $O\big(n\log
-    n + k^2 m \min(\frac{k}{p_\text{min}},n)\big)$, with $p_\text{min} =\min\{p_u,u\in\neigh{X}\}$.
+    Let $p_\text{min} =\min\{p_u,u\in\neigh{X}\}$, then Algorithm~\ref{alg:comb} runs in time 
+    ${O\big(n\log n + k^2 m \min(\frac{k}{p_\text{min}},n)\big)}$.
 \end{proposition}
-
-
-
-
-
-