\section{Adaptivity proofs}
\label{sec:ad-proofs}
\begin{proof}[of Proposition~\ref{prop:gap}]
    We will first show that the optimal adaptive policy can be interpreted as
    a non-adaptive policy. Let $S$ be the optimal adaptive
    solution and define $\delta_R:\neigh{X}\mapsto \{0,1\}$:
    \begin{displaymath}
        \delta_R(u) \defeq \begin{cases}
            1&\text{if } u\in\argmax\big\{f(T);\; T\subseteq R,\; |T|\leq
        k-|S|\big\} \\
            0&\text{otherwise}
        \end{cases},
    \end{displaymath}
    one can write
    \begin{displaymath}
    \begin{split}
        \sum_{R\subseteq\neigh{S}} p_R
        \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)
        &=
        \sum_{R\subseteq\neigh{S}} p_R
        \sum_{u\in\neigh{X}}\delta_R(u)w_u\\
        &=
        \sum_{u\in\neigh{X}}w_u\sum_{R\subseteq\neigh{S}}p_R\delta_R(u).
    \end{split}
    \end{displaymath}

    Let us now define for $u\in\neigh{X}$:
    \begin{displaymath}
        q_u \defeq \begin{cases}
            \sum_{R\subseteq\neigh{S}}\frac{p_R}{p_u}\delta_R(u)
            &\text{if }p_u\neq 0\\
            0&\text{otherwise}
        \end{cases}.
    \end{displaymath}
    This allows us to write:
    \begin{displaymath}
            \sum_{R\subseteq\neigh{S}} p_R
            \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)
            = \sum_{u\in\neigh{X}}p_uq_uw_u = F(\mathbf{p}\circ\mathbf{q})
    \end{displaymath}
    where the last equality is obtained from \eqref{eq:multi} by successively using the linearity of the expectation and the linearity of $f$.

    Furthermore, observe that $q_u\in[0,1]$, $q_u=0$ if $u\notin\neigh{S}$ and:
    \begin{displaymath}
        \begin{split}
        |S|+\sum_{u\in\neigh{X}}p_uq_u
        &= |S|+\sum_{R\subseteq\neigh{S}}p_R\sum_{u\in\neigh{X}}\delta_R(u)\\
        &\leq |S| + \sum_{R\subseteq\neigh{S}}p_R(k-|S|)\leq k
    \end{split}
    \end{displaymath}

    Hence, $(S,\mathbf{q})\in\mathcal{F}_{NA}$. In other words, we have written the optimal adaptive solution as a relaxed
    non-adaptive solution. This conclude the proof of the proposition.
\end{proof}

\vspace{0.5em}

\begin{proof}[of Proposition~\ref{prop:cr}]
    Using the definition of $\mathrm{A}(S)$, one can write:
    \begin{displaymath}
        \mathrm{A}(S) = \sum_{R\subseteq\neigh{S}} p_R
        \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)
        \geq \sum_{R\subseteq\neigh{S}} p_R \mathbf{E}\big[f(I)\big]
    \end{displaymath}
    where the inequality comes from the fact that $I$ is a feasible random set: $|I|\leq k-|S|$, hence the expected value of $f(I)$ is bounded by the maximum of $f$ over feasible sets.

Equation~\eqref{eq:cr} then implies:
\begin{equation}\label{eq:tmp}
    \mathrm{A}(S) 
    \geq (1-\varepsilon)\sum_{R\subseteq\neigh{S}} p_R F(\mathbf{q})
    = (1-\varepsilon)F(\mathbf{p}\circ\mathbf{q}).
\end{equation}

Equation~\eqref{eq:tmp} holds for any $\varepsilon\geq 0$. In particular, for $\varepsilon$ smaller than $\inf_{S\neq T} |A(S)-A(T)|$, we obtain that $\mathrm{A}(S)\geq F(\mathbf{p}\circ\mathbf{q})$. Note that such a $\varepsilon$ is at most polynomially small in the size of the instance.
$(S, \mathbf{q})$ is an $\alpha$-approximate non adaptive solution, hence $F(\mathbf{p}\circ\mathbf{q}) \geq \alpha\mathrm{OPT}_{NA}$. We can then conclude by applying Proposition~\ref{prop:gap}. 
\end{proof}

\section{Algorithms Proofs}
\label{sec:alg-proofs}
We first discuss the NP-hardness of the problem.

\noindent \textbf{\textsc{NP}-Hardness.} In contrast to standard influence maximization, adaptive seeding is already \textsc{NP}-Hard even for the simplest cases.  In the case when $f(S)=|S|$ and all probabilities equal one, the decision problem is whether given a budget $k$ and target value $\ell$ there exists a subset of $X$ of size $k-t$ which yields a solution with expected value of $\ell$ using $t$ nodes in $\mathcal{N}(X)$.  This is equivalent to deciding whether there are $k-t$ nodes in $X$ that have $t$ neighbors in $\mathcal{N}(X)$.  To see this is \textsc{NP}-hard, consider reducing from \textsc{Set-Cover} where there is one node $i$ for each input set $T_i$, $1\leq i\leq n$, with $\neigh{i}= T_i$ and integers $k,\ell$, and the output is ``yes'' if there is a family of $k$ sets in the input which cover at least $\ell$ elements, and ``no'' otherwise.


\subsection{LP-based approach}
\label{sec:lp-proofs}
In the LP-based approach we rounded the solution using the pipage rounding method.  We discuss this with greater detail here.

\noindent \textbf{Pipage Rounding.}
The pipage rounding method~\cite{pipage} is a deterministic rounding method that can be applied to a variety of problems.  In particular, it can be applied to LP-relaxations of the \textsc{Max-K-Cover} problem where we are given a family of sets that cover elements of a universe and the goal is to find $k$ subsets whose union has the maximal cardinality.  The LP-relaxation is a fractional solution over subsets, and the pipage rounding procedure then rounds the allocation in linear time, and the integral solution is guaranteed to be within a factor of $(1-1/e)$ of the fractional solution. 
We make the following key observation: for any given $\textbf{q}$, one can remove all elements in $\mathcal{N}(X)$ for which $q_{u}=0$, without changing the value of any solution $(\boldsymbol\lambda,\textbf{q})$.
Our rounding procedure can therefore be described as follows: given a solution $(\boldsymbol\lambda,\textbf{q})$ we remove all nodes $u \in \mathcal{N}(X)$ for which $q_{u}=0$, which leaves us with a fractional solution to a (weighted) version of the \textsc{Max-K-Cover} problem where nodes in $X$ are the sets and the universe is the set of weighted nodes in $\mathcal{N}(X)$ that were not removed.  We can therefore apply pipage rounding and lose only a factor of $(1-1/e)$ in quality of the solution.

\subsection{Combinatorial Algorithm}
\label{sec:comb-proofs}
We include the missing proofs from the combinatorial algorithm section.  The scalability and implementation in MapReduce are discussed in this section as well.

\begin{proof}[of Lemma~\ref{lemma:nd}]
\emph{W.l.o.g} we can rename and order the pairs in $T$ so that $w_1\geq w_2\geq\ldots\geq w_m $.
Then, $\mathcal{O}(T,b)$ has the following simple piecewise linear expression:
\begin{displaymath}\label{eq:pw}
    \mathcal{O}(T,b) = 
    \begin{cases}
        b w_1&\text{if }0\leq b<p_1\\
        \displaystyle
        \sum_{k=1}^{i-1}p_k(w_k-w_i)
        + b w_i
        &\displaystyle\text{if } 0\leq b - \sum_{k=1}^{i-1}p_k< p_i\\
        \displaystyle
        \sum_{k=1}^m p_kw_k
        &\displaystyle\text{if } b\geq\sum_{i=1}^m p_k

    \end{cases}
\end{displaymath}

Let us define for $t\in\reals^+$, $n(t)\defeq \inf\Big\{i\text{ s.t.
} \sum_{k=1}^i p_k > t\Big \}$ with $n(t)=+\infty$ when the set is empty. In
particular, note that $x\mapsto n(t)$ is non-decreasing. Denoting
$\partial_+\mathcal{O}_T$ the right derivative of $\mathcal{O}(T,\cdot)$, one
can write $\partial_+\mathcal{O}_T(t)=w_{n(t)}$, with the convention that
$w_\infty = 0$.

Writing $i \defeq \sup\Big\{j\text{ s.t.
} w_j\geq w_x\Big\}$, it is easy to see that
$\partial_+\mathcal{O}_{T\cup\{x\}}\geq\partial_+\mathcal{O}_T$. Indeed:
\begin{enumerate}
    \item if $n(t)\leq i$ then $\partial_+\mathcal{O}_{T\cup\{x\}}(t)
        = \partial_+\mathcal{O}_T(t)= w_{n(t)}$.
    \item if $n(t)\geq i+1$ and $n(t-c)\leq i$ then $\partial_+\mathcal{O}_{T\cup\{x\}}(t)
        = w_x\geq w_{n(t)}= \partial_+\mathcal{O}_T(t)$.
    \item if $n(t-c)\geq i+1$, then $\partial_+\mathcal{O}_{T\cup\{x\}}
        = w_{n(t-c)}\geq w_{n(t)}=\partial_+\mathcal{O}_T(t)$.
\end{enumerate}

Let us now consider $b$ and $c$ such that $b\leq c$. Then, using the integral
representation of $\mathcal{O}(T\cup\{x\},\cdot)$ and $\mathcal{O}(T,\cdot)$, we get:
\begin{multline*}
    \mathcal{O}(T\cup\{x\},c)-\mathcal{O}(T\cup\{x\},b)=\int_b^c\partial_+\mathcal{O}_{T\cup\{x\}}(t)dt\\
    \geq\int_b^c\partial_+\mathcal{O}_T(t)dt=\mathcal{O}(T,c)-\mathcal{O}(T,b)
\end{multline*}
Re-ordering the terms, $\mathcal{O}(T\cup\{x\},c)-\mathcal{O}(T,c)\geq
\mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)$
which concludes the proof of the lemma.
\end{proof}

\vspace{0.5em}

\begin{proof}[of Lemma~\ref{lemma:sub}]
    Let $T\subseteq\neigh{X}$ and $x, y\in\neigh{X}\setminus T$. Using the
    second-order characterization of submodular functions, it suffices to show
    that:
    \begin{displaymath}\label{eq:so}
        \mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)\geq
        \mathcal{O}(T\cup\{y, x\},b)-\mathcal{O}(T\cup\{y\},b)
    \end{displaymath}

    We distinguish two cases based on the relative position of $w_x$ and $w_y$.
    The following notations will be useful: $S_T^x \defeq \big\{u\in
    T\text{ s.t.  }w_x\leq w_u\big\}$ and $P_T^x\defeq
    T\setminus S_T^x$.

    \textbf{Case 1:} If $w_y\geq w_x$, then one can
    write:
    \begin{gather*}
        \mathcal{O}(T\cup\{y,x\},b) = \mathcal{O}(P_T^y\cup\{y\},b_1)+
        \mathcal{O}(S_T^y\cup\{x\},b_2)\\
        \mathcal{O}(T\cup\{y\},b) = \mathcal{O}(P_T^y\cup\{y\},b_1)
        + \mathcal{O}(S_T^y,b_2)
    \end{gather*}
    where $b_1$ is the fraction of the budget $b$ spent on $P_T^y\cup\{y\}$ and
    $b_2=b-b_1$.
    
    Similarly:
    \begin{gather*}
        \mathcal{O}(T\cup\{x\},b) = \mathcal{O}(P_T^y, c_1) + \mathcal{O}(S_T^y\cup\{x\},c_2)\\
        \mathcal{O}(T, b) = \mathcal{O}(P_T^y, c_1) + \mathcal{O}(S_T^y,c_2)
    \end{gather*}
    where $c_1$ is the fraction of the budget $b$ spent on $P_T^y$ and $c_2
    = b - c_1$. 

    Note that $b_1\geq c_1$: an optimal solution will first spent as much
    budget as possible on $P_T^y\cup\{y\}$ before adding elements in
    $S_T^y\cup\{x\}$.

    In this case:
    \begin{displaymath}
        \begin{split}
            \mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)&=
        \mathcal{O}(S_T^y\cup\{x\},c_2)+\mathcal{O}(S_T^y,c_2)\\
        &\geq \mathcal{O}(S_T^y\cup\{x\},b_2)+\mathcal{O}(S_T^y,b_2)\\
        & = \mathcal{O}(T\cup\{y, x\},b)-\mathcal{O}(T\cup\{y\},b)
    \end{split}
    \end{displaymath}
    where the inequality comes from Lemma~\ref{lemma:nd} and 
    $c_2\geq b_2$.

    \textbf{Case 2:} If $w_x > w_y$, we now decompose
    the solution on $P_T^x$ and $S_T^x$:
    \begin{gather*}
        \mathcal{O}(T\cup\{x\},b) = \mathcal{O}(P_T^x\cup\{x\},b_1)
        + \mathcal{O}(S_T^x,b_2)\\
        \mathcal{O}(T,b) = \mathcal{O}(P_T^x,c_1)+\mathcal{O}(S_T^x,c_2)\\
        %\intertext{and}
        \mathcal{O}(T\cup\{y, x\},b) = \mathcal{O}(P_T^x\cup\{x\},b_1)
        + \mathcal{O}(S_T^x\cup\{y\},b_2)\\
        \mathcal{O}(T\cup\{y\},b) = \mathcal{O}(P_T^x,c_1)+\mathcal{O}(S_T^x\cup\{y\},c_2)
    \end{gather*}
    with $b_1+b_2=b$, $c_1+c_2=b$ and $b_2\leq c_2$. 

    In this case again:
        \begin{multline*}
            \mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)=
        \mathcal{O}(S_T^x,b_2)-\mathcal{O}(S_T^x,c_2)\\
        \geq \mathcal{O}(S_T^x\cup\{y\},b_2)-\mathcal{O}(S_T^x\cup\{y\},c_2)\\
        = \mathcal{O}(T\cup\{y, x\},b)-\mathcal{O}(T\cup\{y\},b)
    \end{multline*}
    where the inequality uses Lemma~\ref{lemma:nd} and $c_2\geq b_2$.

    In both cases, we were able to obtain the second-order characterization of submodularity. This concludes the proof of the lemma.
\end{proof}

\vspace{0.5em}

\begin{proof}[of Proposition~\ref{prop:sub}]
    Let us consider $S$ and $T$ such that $S\subseteq T\subseteq X$ and $x\in
    X\setminus T$. In particular, note that $\neigh{S}\subseteq\neigh{T}$. 

    Let us write $\neigh{S\cup\{x\}}=\neigh{S}\cup R$ with $\neigh{S}\cap
    R=\emptyset$ and similarly, $\neigh{T\cup\{x\}}=\neigh{T}\cup R'$ with
    $\neigh{T}\cap R'=\emptyset$. It is clear that $R'\subseteq R$. Writing $R'=\{u_1,\ldots,u_k\}$:
    \begin{multline*}
            \mathcal{O}(\neigh{T\cup\{x\}},b)- \mathcal{O}(\neigh{T},b)\\
            =\sum_{i=1}^k\mathcal{O}(\neigh{T}\cup\{u_1,\ldots u_i\},b)
            -\mathcal{O}(\neigh{T}\cup\{u_1,\ldots u_{i-1}\},b)\\
            \leq \sum_{i=1}^k\mathcal{O}(\neigh{S}\cup\{u_1,\ldots u_i\},b)
            -\mathcal{O}(\neigh{S}\cup\{u_1,\ldots u_{i-1}\},b)\\
            =\mathcal{O}(\neigh{S}\cup R',b)-\mathcal{O}(\neigh{S},b)
    \end{multline*}
    where the inequality comes from the submodularity of $\mathcal{O}(\cdot,b)$ proved in Lemma~\ref{lemma:sub}. This same function is also obviously set-increasing, hence:
    \begin{multline*}
            \mathcal{O}(\neigh{S}\cup R',b)-\mathcal{O}(\neigh{S},b)\\
            \leq \mathcal{O}(\neigh{S}\cup R,b)-\mathcal{O}(\neigh{S},b)\\
            =\mathcal{O}(\neigh{S\cup\{x\}},b)-\mathcal{O}(\neigh{S},b)
    \end{multline*}
    This concludes the proof of the proposition.
\end{proof}


\begin{proof}[of Proposition~\ref{prop:main_result}]
    We simply note that the content of the outer \textsf{for} loop on line 2 of Algorithm~\ref{alg:comb} is the greedy submodular maximization algorithm of \cite{nemhauser}. Since $\mathcal{O}(\neigh{\cdot}, t)$ is submodular (Proposition~\ref{prop:sub}), this solves the inner $\max$ in \eqref{eq:sub-mod} with an approximation ratio of $(1-1/e)$. The outer \textsf{for} loop then computes the outer $\max$ of \eqref{eq:sub-mod}.

    As a consequence, Algorithm~\ref{alg:comb} computes a $(1-1/e)$-approximate non-adaptive solution. We conclude by applying Proposition~\ref{prop:cr}.
\end{proof}

\subsection{Parallelization}
\label{sec:para}
As discussed in the body of the paper, the algorithm can be parallelized across $k$ different machines, each one computing an approximation for a fixed budget $k-t$ in the first stage and $t$ in the second.
A slightly more sophisticated approach is to consider only $\log n$ splits: $(1,k-1),(2,k-2),\ldots,(2^{\lfloor \log n \rfloor},1)$ and then select the best solution from this set.  It is not hard to see that in comparison to the previous approach, this would reduce the approximation guarantee by a factor of at most 2: if the optimal solution is obtained by spending $t$ on the first stage and $k-t$ in the second stage, then since $t \leq 2\cdot2^{\lfloor \log t \rfloor}$ the solution computed for $(2^{\lfloor \log t \rfloor}, k - 2^{\lfloor \log t \rfloor})$ will have at least half that value.  
More generally, for any $\epsilon>0$ one can parallelize over $\log_{1+\epsilon}n$ machines at the cost of losing a factor of $(1+\epsilon)$ in the approximation guarantee.

\subsection{Implementation in MapReduce}
\label{sec:mr}

As noted in Section~\ref{sec:comb}, lines 4 to 7 of Algorithm~\ref{alg:comb}
correspond to the greedy heuristic of \cite{nemhauser} applied to the
submodular function $f_t(S) \defeq \mathcal{O}\big(\neigh{S}, t\big)$.
A variant of this heuristic, namely the $\varepsilon$-greedy heuristic,
combined with the \textsc{Sample\&Prune} method of \cite{mr} allows us to write
a MapReduce version of Algorithm~\ref{alg:comb}. The resulting algorithm is
described in Algorithm~\ref{alg:combmr}

\begin{algorithm}
    \caption{Combinatorial algorithm, MapReduce}
    \label{alg:combmr}
    \algsetup{indent=2em}
    \begin{algorithmic}[1]
        \STATE $S\leftarrow \emptyset$
        \FOR{$t=1$ \TO $k-1$}
            \STATE $S_t\leftarrow \emptyset$
            \FOR{$i=1$ \TO $\log_{1+\varepsilon}\Delta$}
                \STATE $U\leftarrow X$, $S'\leftarrow \emptyset$
                \WHILE{$|U|>0$}
                    \STATE $R\leftarrow$ sample from $U$ w.p. $\min\left(1,
                    \frac{\ell}{|U|}\right)$
                    \WHILE{$|R|>0$ \OR $|S_t\cup S'|< k$}
                        \STATE $x\leftarrow$ some element from $R$
                        \IF{$\nabla f_t(S_t\cup S', x)\geq\frac{\Delta}{(1+\varepsilon)^i}$}
                            \STATE $S'\leftarrow S'\cup\{x\}$
                        \ENDIF
                        \STATE $R\leftarrow R\setminus\{x\}$
                    \ENDWHILE
                    \STATE $S_t\leftarrow S_t\cup S'$
                    \STATE $U\leftarrow\{x\in U\,|\, \nabla f_t(S_t,
                    x)\geq\frac{\Delta}{(1+\varepsilon)^i}\}$
                \ENDWHILE
            \ENDFOR
            \IF{$\mathcal{O}(\neigh{S_t},t)>\mathcal{O}(\neigh{S},k-|S|)$}
                \STATE $S\leftarrow S_t$
            \ENDIF
        \ENDFOR
        \RETURN $S$
    \end{algorithmic}
\end{algorithm}

We denoted by $\nabla f_t(S, x)$ the marginal increment of $x$ to the set $S$
for the function $f_t$, $\nabla f_t(S, x) =  f_t(S\cup\{x\}) - f_t(S)$.
$\Delta$ is an upper bound on the marginal contribution of any element. In our
case, $\Delta = \max_{u\in\neigh{X}} w_u$ provides such an upper bound. The
sampling in line 7 selects a small enough number of elements that the
\texttt{while} loop from lines 8 to 14 can be executed on a single machine.
Furthermore, lines 7 and 16 can be implemented in one round of MapReduce each.

The approximation ratio of Algorithm~\ref{alg:combmr} is
$1-\frac{1}{e}-\varepsilon$. The proof of this result as well as the optimal
choice of $\ell$ follow from Theorem 10 in \cite{mr}.