1 files changed, 291 insertions, 58 deletions
diff --git a/paper/sections/appendix.tex b/paper/sections/appendix.tex
index 64ca77e..aa0ef1d 100644
--- a/paper/sections/appendix.tex
+++ b/paper/sections/appendix.tex
@@ -1,76 +1,309 @@
-
-\noindent \textbf{From expectation to high probability.}
-
-%\begin{lemma}\label{lemma:nd}
-%    Let $A\subseteq\mathcal{U}$ and $x=(v,c)\in\mathcal{U}\setminus A$, then
-%    the function $b\mapsto \mathcal{O}(A\cup\{x\},b)-\mathcal{O}(A,b)$ is
-%    non-decreasing over $\reals^+$.
-%\end{lemma}
-
-
-
-
-
-
-\begin{proof}[of Proposition~\ref{main_result}]
-    For each realization of the neighbors $R$, let us define by $X_R$ the
-    random set which includes each $u\in R$ with probability $q_u$ on the
-    second day and:
+\section{Adaptivity proofs}
+\begin{proof}[of Proposition~\ref{prop:gap}]
+    We will first show that the optimal adaptive policy can be interpreted as
+    a non-adaptive policy. Let $S$ be the optimal adaptive
+    solution and define $\delta_R:\neigh{X}\mapsto \{0,1\}$:
     \begin{displaymath}
-        \tilde{X}_R=
-    \begin{cases}
-        X_R&\text{if } |X_R|\leq t\\
-        \emptyset&\text{otherwise}
-    \end{cases}
+        \delta_R(u) \defeq \begin{cases}
+            1&\text{if } u\in\argmax\big\{f(T);\; T\subseteq R,\; |T|\leq
+        k-|S|\big\} \\
+            0&\text{otherwise}
+        \end{cases},
     \end{displaymath}
-    where $t = k-|S|$. $\tilde{X}_R$ is a random variable over the feasible
-    solutions on the second day. As a consequence:
+    one can write
     \begin{displaymath}
-        A(S) \geq \sum_{R\subseteq\neigh{S}} p_R\mathbb{E}\big[f(\tilde{X}_R)\big]
+    \begin{split}
+        \sum_{R\subseteq\neigh{S}} p_R
+        \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)
+        &=
+        \sum_{R\subseteq\neigh{S}} p_R
+        \sum_{u\in\neigh{X}}\delta_R(u)w_u\\
+        &=
+        \sum_{u\in\neigh{X}}w_u\sum_{R\subseteq\neigh{S}}p_R\delta_R(u).
+    \end{split}
     \end{displaymath}
-    Let us define by $Y$ the random set which includes each $u\in\neigh{X}$
-    with probability $p_uq_u$ and:
+
+    Let us now define for $u\in\neigh{X}$:
     \begin{displaymath}
-        \tilde{Y}=
-        \begin{cases}
-            Y&\text{if } |Y|\leq t\\
-            \emptyset&\text{otherwise}
+        q_u \defeq \begin{cases}
+            \sum_{R\subseteq\neigh{S}}\frac{p_R}{p_u}\delta_R(u)
+            &\text{if }p_u\neq 0\\
+            0&\text{otherwise}
         \end{cases}.
     \end{displaymath}
-    It is easy to see that $\tilde{X}_R$ is the conditional expectation of
-    $\tilde{Y}$ given
-    $R$. Hence:
+    This allows us to write:
     \begin{displaymath}
-        \sum_{R\subseteq\neigh{S}} p_R\mathbb{E}\big[f(\tilde{X}_R)\big]
-        = \mathbb{E}\big[f(\tilde{Y})\big]
+            \sum_{R\subseteq\neigh{S}} p_R
+            \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)
+            = \sum_{u\in\neigh{X}}p_uq_uw_u = F(\mathbf{p}\circ\mathbf{q})
     \end{displaymath}
-    But:
-    \begin{align*}
-        \mathbb{E}\big[f(\tilde{Y})\big]
-        &=\sum_{T\subseteq\neigh{X}}\mathbb{P}\big[\tilde{Y}=T\big]f(T)
-    = \sum_{\substack{T\subseteq\neigh{X}\\|T|\leq
-    t}}\mathbb{P}\big[Y=T\big]f(T)\\
-    &= \mathbb{E}\big[f(Y)\big]-\sum_{\substack{T\subseteq\neigh{X}\\|T|>
-    t}}\mathbb{P}\big[Y=T\big]f(T)
-    \end{align*}
-    Roughly:
+    where the last equality is obtained from \eqref{eq:multi} by successively using the linearity of the expectation and the linearity of $f$.
+
+    Furthermore, observe that $q_u\in[0,1]$, $q_u=0$ if $u\notin\neigh{S}$ and:
     \begin{displaymath}
-        \sum_{\substack{T\subseteq\neigh{X}\\|T|>
-        t}}\mathbb{P}\big[Y=T\big]f(T)\leq
-        \frac{1}{2}\mathbb{E}\big[f(Y)\big]
+        \begin{split}
+        |S|+\sum_{u\in\neigh{X}}p_uq_u
+        &= |S|+\sum_{R\subseteq\neigh{S}}p_R\sum_{u\in\neigh{X}}\delta_R(u)\\
+        &\leq |S| + \sum_{R\subseteq\neigh{S}}p_R(k-|S|)\leq k
+    \end{split}
     \end{displaymath}
-    Combining the above inequalities we get:
+
+    Hence, $(S,\mathbf{q})\in\mathcal{F}_{NA}$. In other words, we have written the optimal adaptive solution as a relaxed
+    non-adaptive solution. This conclude the proof of the proposition.
+\end{proof}
+
+\vspace{0.5em}
+
+\begin{proof}[of Proposition~\ref{prop:cr}]
+    Using the definition of $\mathrm{A}(S)$, one can write:
     \begin{displaymath}
-        A(S)\geq \frac{1}{2} \mathbb{E}\big[f(Y)\big]
+        \mathrm{A}(S) = \sum_{R\subseteq\neigh{S}} p_R
+        \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)
+        \geq \sum_{R\subseteq\neigh{S}} p_R \mathbf{E}\big[f(I)\big]
     \end{displaymath}
-    But $\mathbb{E}\big[f(Y)\big]$ is precisely the value of the non-adaptive
-    solution computed by Algorithm~\ref{alg:comb} which is
-    a $\left(1-\frac{1}{e}\right)$ approximation of the
-    optimal non adaptive solution. Hence:
+    where the inequality comes from the fact that $I$ is a feasible random set: $|I|\leq k-|S|$, hence the expected value of $f(I)$ is bounded by the maximum of $f$ over feasible sets.
+
+Equation~\eqref{eq:cr} then implies:
+\begin{equation}\label{eq:tmp}
+    \mathrm{A}(S) 
+    \geq (1-\varepsilon)\sum_{R\subseteq\neigh{S}} p_R F(\mathbf{q})
+    = (1-\varepsilon)F(\mathbf{p}\circ\mathbf{q}).
+\end{equation}
+
+Equation~\eqref{eq:tmp} holds for any $\varepsilon\geq 0$. In particular, for $\varepsilon$ smaller than $\inf_{S\neq T} |A(S)-A(T)|$, we obtain that $\mathrm{A}(S)\geq F(\mathbf{p}\circ\mathbf{q})$. Note that such a $\varepsilon$ is at most polynomially small in the size of the instance.
+$(S, \mathbf{q})$ is an $\alpha$-approximate non adaptive solution, hence $F(\mathbf{p}\circ\mathbf{q}) \geq \alpha\mathrm{OPT}_{NA}$. We can then conclude by applying Proposition~\ref{prop:gap}. 
+\end{proof}
+
+\section{Algorithms Proofs}
+We first discuss the NP-hardness of the problem.
+
+\noindent \textbf{\textsc{NP}-Hardness.} In contrast to standard influence maximization, adaptive seeding is already \textsc{NP}-Hard even for the simplest cases.  In the case when $f(S)=|S|$ and all probabilities equal one, the decision problem is whether given a budget $k$ and target value $\ell$ there exists a subset of $X$ of size $k-t$ which yields a solution with expected value of $\ell$ using $t$ nodes in $\mathcal{N}(X)$.  This is equivalent to deciding whether there are $k-t$ nodes in $X$ that have $t$ neighbors in $\mathcal{N}(X)$.  To see this is \textsc{NP}-hard, consider reducing from \textsc{Set-Cover} where there is one node $i$ for each input set $T_i$, $1\leq i\leq n$, with $\neigh{i}= T_i$ and integers $k,\ell$, and the output is ``yes'' if there is a family of $k$ sets in the input which cover at least $\ell$ elements, and ``no'' otherwise.
+
+
+\subsection{LP-based approach}
+In the LP-based approach we rounded the solution using the pipage rounding method.  We discuss this with greater detail here.
+
+\noindent \textbf{Pipage Rounding.}
+The pipage rounding method~\cite{pipage} is a deterministic rounding method that can be applied to a variety of problems.  In particular, it can be applied to LP-relaxations of the \textsc{Max-K-Cover} problem where we are given a family of sets that cover elements of a universe and the goal is to find $k$ subsets whose union has the maximal cardinality.  The LP-relaxation is a fractional solution over subsets, and the pipage rounding procedure then rounds the allocation in linear time, and the integral solution is guaranteed to be within a factor of $(1-1/e)$ of the fractional solution. 
+We make the following key observation: for any given $\textbf{q}$, one can remove all elements in $\mathcal{N}(X)$ for which $q_{u}=0$, without changing the value of any solution $(\boldsymbol\lambda,\textbf{q})$.
+Our rounding procedure can therefore be described as follows: given a solution $(\boldsymbol\lambda,\textbf{q})$ we remove all nodes $u \in \mathcal{N}(X)$ for which $q_{u}=0$, which leaves us with a fractional solution to a (weighted) version of the \textsc{Max-K-Cover} problem where nodes in $X$ are the sets and the universe is the set of weighted nodes in $\mathcal{N}(X)$ that were not removed.  We can therefore apply pipage rounding and lose only a factor of $(1-1/e)$ in quality of the solution.
+
+\subsection{Combinatorial Algorithm}
+We include the missing proofs from the combinatorial algorithm section.  The scalability and implementation in MapReduce are discussed in this section as well.
+
+\begin{proof}[of Lemma~\ref{lemma:nd}]
+\emph{W.l.o.g} we can rename and order the pairs in $T$ so that $w_1\geq w_2\geq\ldots\geq w_m $.
+Then, $\mathcal{O}(T,b)$ has the following simple piecewise linear expression:
+\begin{displaymath}\label{eq:pw}
+    \mathcal{O}(T,b) = 
+    \begin{cases}
+        b w_1&\text{if }0\leq b<p_1\\
+        \displaystyle
+        \sum_{k=1}^{i-1}p_k(w_k-w_i)
+        + b w_i
+        &\displaystyle\text{if } 0\leq b - \sum_{k=1}^{i-1}p_k< p_i\\
+        \displaystyle
+        \sum_{k=1}^m p_kw_k
+        &\displaystyle\text{if } b\geq\sum_{i=1}^m p_k
+
+    \end{cases}
+\end{displaymath}
+
+Let us define for $t\in\reals^+$, $n(t)\defeq \inf\Big\{i\text{ s.t.
+} \sum_{k=1}^i p_k > t\Big \}$ with $n(t)=+\infty$ when the set is empty. In
+particular, note that $x\mapsto n(t)$ is non-decreasing. Denoting
+$\partial_+\mathcal{O}_T$ the right derivative of $\mathcal{O}(T,\cdot)$, one
+can write $\partial_+\mathcal{O}_T(t)=w_{n(t)}$, with the convention that
+$w_\infty = 0$.
+
+Writing $i \defeq \sup\Big\{j\text{ s.t.
+} w_j\geq w_x\Big\}$, it is easy to see that
+$\partial_+\mathcal{O}_{T\cup\{x\}}\geq\partial_+\mathcal{O}_T$. Indeed:
+\begin{enumerate}
+    \item if $n(t)\leq i$ then $\partial_+\mathcal{O}_{T\cup\{x\}}(t)
+        = \partial_+\mathcal{O}_T(t)= w_{n(t)}$.
+    \item if $n(t)\geq i+1$ and $n(t-c)\leq i$ then $\partial_+\mathcal{O}_{T\cup\{x\}}(t)
+        = w_x\geq w_{n(t)}= \partial_+\mathcal{O}_T(t)$.
+    \item if $n(t-c)\geq i+1$, then $\partial_+\mathcal{O}_{T\cup\{x\}}
+        = w_{n(t-c)}\geq w_{n(t)}=\partial_+\mathcal{O}_T(t)$.
+\end{enumerate}
+
+Let us now consider $b$ and $c$ such that $b\leq c$. Then, using the integral
+representation of $\mathcal{O}(T\cup\{x\},\cdot)$ and $\mathcal{O}(T,\cdot)$, we get:
+\begin{multline*}
+    \mathcal{O}(T\cup\{x\},c)-\mathcal{O}(T\cup\{x\},b)=\int_b^c\partial_+\mathcal{O}_{T\cup\{x\}}(t)dt\\
+    \geq\int_b^c\partial_+\mathcal{O}_T(t)dt=\mathcal{O}(T,c)-\mathcal{O}(T,b)
+\end{multline*}
+Re-ordering the terms, $\mathcal{O}(T\cup\{x\},c)-\mathcal{O}(T,c)\geq
+\mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)$
+which concludes the proof of the lemma.
+\end{proof}
+
+\vspace{0.5em}
+
+\begin{proof}[of Lemma~\ref{lemma:sub}]
+    Let $T\subseteq\neigh{X}$ and $x, y\in\neigh{X}\setminus T$. Using the
+    second-order characterization of submodular functions, it suffices to show
+    that:
+    \begin{displaymath}\label{eq:so}
+        \mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)\geq
+        \mathcal{O}(T\cup\{y, x\},b)-\mathcal{O}(T\cup\{y\},b)
+    \end{displaymath}
+
+    We distinguish two cases based on the relative position of $w_x$ and $w_y$.
+    The following notations will be useful: $S_T^x \defeq \big\{u\in
+    T\text{ s.t.  }w_x\leq w_u\big\}$ and $P_T^x\defeq
+    T\setminus S_T^x$.
+
+    \textbf{Case 1:} If $w_y\geq w_x$, then one can
+    write:
+    \begin{gather*}
+        \mathcal{O}(T\cup\{y,x\},b) = \mathcal{O}(P_T^y\cup\{y\},b_1)+
+        \mathcal{O}(S_T^y\cup\{x\},b_2)\\
+        \mathcal{O}(T\cup\{y\},b) = \mathcal{O}(P_T^y\cup\{y\},b_1)
+        + \mathcal{O}(S_T^y,b_2)
+    \end{gather*}
+    where $b_1$ is the fraction of the budget $b$ spent on $P_T^y\cup\{y\}$ and
+    $b_2=b-b_1$.
+    
+    Similarly:
+    \begin{gather*}
+        \mathcal{O}(T\cup\{x\},b) = \mathcal{O}(P_T^y, c_1) + \mathcal{O}(S_T^y\cup\{x\},c_2)\\
+        \mathcal{O}(T, b) = \mathcal{O}(P_T^y, c_1) + \mathcal{O}(S_T^y,c_2)
+    \end{gather*}
+    where $c_1$ is the fraction of the budget $b$ spent on $P_T^y$ and $c_2
+    = b - c_1$. 
+
+    Note that $b_1\geq c_1$: an optimal solution will first spent as much
+    budget as possible on $P_T^y\cup\{y\}$ before adding elements in
+    $S_T^y\cup\{x\}$.
+
+    In this case:
     \begin{displaymath}
-        A(S) \geq \frac{1}{2}\left(1-\frac{1}{e}\right) OPT_{NA}
+        \begin{split}
+            \mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)&=
+        \mathcal{O}(S_T^y\cup\{x\},c_2)+\mathcal{O}(S_T^y,c_2)\\
+        &\geq \mathcal{O}(S_T^y\cup\{x\},b_2)+\mathcal{O}(S_T^y,b_2)\\
+        & = \mathcal{O}(T\cup\{y, x\},b)-\mathcal{O}(T\cup\{y\},b)
+    \end{split}
     \end{displaymath}
-    Finally, we conclude with Proposition~\ref{prop:gap}
+    where the inequality comes from Lemma~\ref{lemma:nd} and 
+    $c_2\geq b_2$.
+
+    \textbf{Case 2:} If $w_x > w_y$, we now decompose
+    the solution on $P_T^x$ and $S_T^x$:
+    \begin{gather*}
+        \mathcal{O}(T\cup\{x\},b) = \mathcal{O}(P_T^x\cup\{x\},b_1)
+        + \mathcal{O}(S_T^x,b_2)\\
+        \mathcal{O}(T,b) = \mathcal{O}(P_T^x,c_1)+\mathcal{O}(S_T^x,c_2)\\
+        %\intertext{and}
+        \mathcal{O}(T\cup\{y, x\},b) = \mathcal{O}(P_T^x\cup\{x\},b_1)
+        + \mathcal{O}(S_T^x\cup\{y\},b_2)\\
+        \mathcal{O}(T\cup\{y\},b) = \mathcal{O}(P_T^x,c_1)+\mathcal{O}(S_T^x\cup\{y\},c_2)
+    \end{gather*}
+    with $b_1+b_2=b$, $c_1+c_2=b$ and $b_2\leq c_2$. 
+
+    In this case again:
+        \begin{multline*}
+            \mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)=
+        \mathcal{O}(S_T^x,b_2)-\mathcal{O}(S_T^x,c_2)\\
+        \geq \mathcal{O}(S_T^x\cup\{y\},b_2)-\mathcal{O}(S_T^x\cup\{y\},c_2)\\
+        = \mathcal{O}(T\cup\{y, x\},b)-\mathcal{O}(T\cup\{y\},b)
+    \end{multline*}
+    where the inequality uses Lemma~\ref{lemma:nd} and $c_2\geq b_2$.
+
+    In both cases, we were able to obtain the second-order characterization of submodularity. This concludes the proof of the lemma.
 \end{proof}
 
+\vspace{0.5em}
+
+\begin{proof}[of Proposition~\ref{prop:sub}]
+    Let us consider $S$ and $T$ such that $S\subseteq T\subseteq X$ and $x\in
+    X\setminus T$. In particular, note that $\neigh{S}\subseteq\neigh{T}$. 
+
+    Let us write $\neigh{S\cup\{x\}}=\neigh{S}\cup R$ with $\neigh{S}\cap
+    R=\emptyset$ and similarly, $\neigh{T\cup\{x\}}=\neigh{T}\cup R'$ with
+    $\neigh{T}\cap R'=\emptyset$. It is clear that $R'\subseteq R$. Writing $R'=\{u_1,\ldots,u_k\}$:
+    \begin{multline*}
+            \mathcal{O}(\neigh{T\cup\{x\}},b)- \mathcal{O}(\neigh{T},b)\\
+            =\sum_{i=1}^k\mathcal{O}(\neigh{T}\cup\{u_1,\ldots u_i\},b)
+            -\mathcal{O}(\neigh{T}\cup\{u_1,\ldots u_{i-1}\},b)\\
+            \leq \sum_{i=1}^k\mathcal{O}(\neigh{S}\cup\{u_1,\ldots u_i\},b)
+            -\mathcal{O}(\neigh{S}\cup\{u_1,\ldots u_{i-1}\},b)\\
+            =\mathcal{O}(\neigh{S}\cup R',b)-\mathcal{O}(\neigh{S},b)
+    \end{multline*}
+    where the inequality comes from the submodularity of $\mathcal{O}(\cdot,b)$ proved in Lemma~\ref{lemma:sub}. This same function is also obviously set-increasing, hence:
+    \begin{multline*}
+            \mathcal{O}(\neigh{S}\cup R',b)-\mathcal{O}(\neigh{S},b)\\
+            \leq \mathcal{O}(\neigh{S}\cup R,b)-\mathcal{O}(\neigh{S},b)\\
+            =\mathcal{O}(\neigh{S\cup\{x\}},b)-\mathcal{O}(\neigh{S},b)
+    \end{multline*}
+    This concludes the proof of the proposition.
+\end{proof}
+
+
+\begin{proof}[of Proposition~\ref{prop:main_result}]
+    We simply note that the content of the outer \textsf{for} loop on line 2 of Algorithm~\ref{alg:comb} is the greedy submodular maximization algorithm of \cite{nemhauser}. Since $\mathcal{O}(\neigh{\cdot}, t)$ is submodular (Proposition~\ref{prop:sub}), this solves the inner $\max$ in \eqref{eq:sub-mod} with an approximation ratio of $(1-1/e)$. The outer \textsf{for} loop then computes the outer $\max$ of \eqref{eq:sub-mod}.
+
+    As a consequence, Algorithm~\ref{alg:comb} computes a $(1-1/e)$-approximate non-adaptive solution. We conclude by applying Proposition~\ref{prop:cr}.
+\end{proof}
+
+\subsection{Parallelization}
+As discussed in the body of the paper, the algorithm can be parallelized across $k$ different machines, each one computing an approximation for a fixed budget $k-t$ in the first stage and $t$ in the second.
+A slightly more sophisticated approach is to consider only $\log n$ splits: $(1,k-1),(2,k-2),\ldots,(2^{\lfloor \log n \rfloor},1)$ and then select the best solution from this set.  It is not hard to see that in comparison to the previous approach, this would reduce the approximation guarantee by a factor of at most 2: if the optimal solution is obtained by spending $t$ on the first stage and $k-t$ in the second stage, then since $t \leq 2\cdot2^{\lfloor \log t \rfloor}$ the solution computed for $(2^{\lfloor \log t \rfloor}, k - 2^{\lfloor \log t \rfloor})$ will have at least half that value.  
+More generally, for any $\epsilon>0$ one can parallelize over $\log_{1+\epsilon}n$ machines at the cost of losing a factor of $(1+\epsilon)$ in the approximation guarantee.
+
+\subsection{Implementation in MapReduce}
+
+As noted in Section~\ref{sec:comb}, lines 4 to 7 of Algorithm~\ref{alg:comb}
+correspond to the greedy heuristic of \cite{nemhauser} applied to the
+submodular function $f_t(S) \defeq \mathcal{O}\big(\neigh{S}, t\big)$.
+A variant of this heuristic, namely the $\varepsilon$-greedy heuristic,
+combined with the \textsc{Sample\&Prune} method of \cite{mr} allows us to write
+a MapReduce version of Algorithm~\ref{alg:comb}. The resulting algorithm is
+described in Algorithm~\ref{alg:combmr}
+
+\begin{algorithm}
+    \caption{Combinatorial algorithm, MapReduce}
+    \label{alg:combmr}
+    \algsetup{indent=2em}
+    \begin{algorithmic}[1]
+        \STATE $S\leftarrow \emptyset$
+        \FOR{$t=1$ \TO $k-1$}
+            \STATE $S_t\leftarrow \emptyset$
+            \FOR{$i=1$ \TO $\log_{1+\varepsilon}\Delta$}
+                \STATE $U\leftarrow X$, $S'\leftarrow \emptyset$
+                \WHILE{$|U|>0$}
+                    \STATE $R\leftarrow$ sample from $U$ w.p. $\min\left(1,
+                    \frac{\ell}{|U|}\right)$
+                    \WHILE{$|R|>0$ \OR $|S_t\cup S'|< k$}
+                        \STATE $x\leftarrow$ some element from $R$
+                        \IF{$\nabla f_t(S_t\cup S', x)\geq\frac{\Delta}{(1+\varepsilon)^i}$}
+                            \STATE $S'\leftarrow S'\cup\{x\}$
+                        \ENDIF
+                        \STATE $R\leftarrow R\setminus\{x\}$
+                    \ENDWHILE
+                    \STATE $S_t\leftarrow S_t\cup S'$
+                    \STATE $U\leftarrow\{x\in U\,|\, \nabla f_t(S_t,
+                    x)\geq\frac{\Delta}{(1+\varepsilon)^i}\}$
+                \ENDWHILE
+            \ENDFOR
+            \IF{$\mathcal{O}(\neigh{S_t},t)>\mathcal{O}(\neigh{S},k-|S|)$}
+                \STATE $S\leftarrow S_t$
+            \ENDIF
+        \ENDFOR
+        \RETURN $S$
+    \end{algorithmic}
+\end{algorithm}
+
+We denoted by $\nabla f_t(S, x)$ the marginal increment of $x$ to the set $S$
+for the function $f_t$, $\nabla f_t(S, x) =  f_t(S\cup\{x\}) - f_t(S)$.
+$\Delta$ is an upper bound on the marginal contribution of any element. In our
+case, $\Delta = \max_{u\in\neigh{X}} w_u$ provides such an upper bound. The
+sampling in line 7 selects a small enough number of elements that the
+\texttt{while} loop from lines 8 to 14 can be executed on a single machine.
+Furthermore, lines 7 and 16 can be implemented in one round of MapReduce each.
 
+The approximation ratio of Algorithm~\ref{alg:combmr} is
+$1-\frac{1}{e}-\varepsilon$. The proof of this result as well as the optimal
+choice of $\ell$ follow from Theorem 10 in \cite{mr}.