paper/sections/appendix.tex


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309

\section{Adaptivity proofs}
\begin{proof}[of Proposition~\ref{prop:gap}]
    We will first show that the optimal adaptive policy can be interpreted as
    a non-adaptive policy. Let $S$ be the optimal adaptive
    solution and define $\delta_R:\neigh{X}\mapsto \{0,1\}$:
    \begin{displaymath}
        \delta_R(u) \defeq \begin{cases}
            1&\text{if } u\in\argmax\big\{f(T);\; T\subseteq R,\; |T|\leq
        k-|S|\big\} \\
            0&\text{otherwise}
        \end{cases},
    \end{displaymath}
    one can write
    \begin{displaymath}
    \begin{split}
        \sum_{R\subseteq\neigh{S}} p_R
        \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)
        &=
        \sum_{R\subseteq\neigh{S}} p_R
        \sum_{u\in\neigh{X}}\delta_R(u)w_u\\
        &=
        \sum_{u\in\neigh{X}}w_u\sum_{R\subseteq\neigh{S}}p_R\delta_R(u).
    \end{split}
    \end{displaymath}

    Let us now define for $u\in\neigh{X}$:
    \begin{displaymath}
        q_u \defeq \begin{cases}
            \sum_{R\subseteq\neigh{S}}\frac{p_R}{p_u}\delta_R(u)
            &\text{if }p_u\neq 0\\
            0&\text{otherwise}
        \end{cases}.
    \end{displaymath}
    This allows us to write:
    \begin{displaymath}
            \sum_{R\subseteq\neigh{S}} p_R
            \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)
            = \sum_{u\in\neigh{X}}p_uq_uw_u = F(\mathbf{p}\circ\mathbf{q})
    \end{displaymath}
    where the last equality is obtained from \eqref{eq:multi} by successively using the linearity of the expectation and the linearity of $f$.

    Furthermore, observe that $q_u\in[0,1]$, $q_u=0$ if $u\notin\neigh{S}$ and:
    \begin{displaymath}
        \begin{split}
        |S|+\sum_{u\in\neigh{X}}p_uq_u
        &= |S|+\sum_{R\subseteq\neigh{S}}p_R\sum_{u\in\neigh{X}}\delta_R(u)\\
        &\leq |S| + \sum_{R\subseteq\neigh{S}}p_R(k-|S|)\leq k
    \end{split}
    \end{displaymath}

    Hence, $(S,\mathbf{q})\in\mathcal{F}_{NA}$. In other words, we have written the optimal adaptive solution as a relaxed
    non-adaptive solution. This conclude the proof of the proposition.
\end{proof}

\vspace{0.5em}

\begin{proof}[of Proposition~\ref{prop:cr}]
    Using the definition of $\mathrm{A}(S)$, one can write:
    \begin{displaymath}
        \mathrm{A}(S) = \sum_{R\subseteq\neigh{S}} p_R
        \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)
        \geq \sum_{R\subseteq\neigh{S}} p_R \mathbf{E}\big[f(I)\big]
    \end{displaymath}
    where the inequality comes from the fact that $I$ is a feasible random set: $|I|\leq k-|S|$, hence the expected value of $f(I)$ is bounded by the maximum of $f$ over feasible sets.

Equation~\eqref{eq:cr} then implies:
\begin{equation}\label{eq:tmp}
    \mathrm{A}(S) 
    \geq (1-\varepsilon)\sum_{R\subseteq\neigh{S}} p_R F(\mathbf{q})
    = (1-\varepsilon)F(\mathbf{p}\circ\mathbf{q}).
\end{equation}

Equation~\eqref{eq:tmp} holds for any $\varepsilon\geq 0$. In particular, for $\varepsilon$ smaller than $\inf_{S\neq T} |A(S)-A(T)|$, we obtain that $\mathrm{A}(S)\geq F(\mathbf{p}\circ\mathbf{q})$. Note that such a $\varepsilon$ is at most polynomially small in the size of the instance.
$(S, \mathbf{q})$ is an $\alpha$-approximate non adaptive solution, hence $F(\mathbf{p}\circ\mathbf{q}) \geq \alpha\mathrm{OPT}_{NA}$. We can then conclude by applying Proposition~\ref{prop:gap}. 
\end{proof}

\section{Algorithms Proofs}
We first discuss the NP-hardness of the problem.

\noindent \textbf{\textsc{NP}-Hardness.} In contrast to standard influence maximization, adaptive seeding is already \textsc{NP}-Hard even for the simplest cases.  In the case when $f(S)=|S|$ and all probabilities equal one, the decision problem is whether given a budget $k$ and target value $\ell$ there exists a subset of $X$ of size $k-t$ which yields a solution with expected value of $\ell$ using $t$ nodes in $\mathcal{N}(X)$.  This is equivalent to deciding whether there are $k-t$ nodes in $X$ that have $t$ neighbors in $\mathcal{N}(X)$.  To see this is \textsc{NP}-hard, consider reducing from \textsc{Set-Cover} where there is one node $i$ for each input set $T_i$, $1\leq i\leq n$, with $\neigh{i}= T_i$ and integers $k,\ell$, and the output is ``yes'' if there is a family of $k$ sets in the input which cover at least $\ell$ elements, and ``no'' otherwise.


\subsection{LP-based approach}
In the LP-based approach we rounded the solution using the pipage rounding method.  We discuss this with greater detail here.

\noindent \textbf{Pipage Rounding.}
The pipage rounding method~\cite{pipage} is a deterministic rounding method that can be applied to a variety of problems.  In particular, it can be applied to LP-relaxations of the \textsc{Max-K-Cover} problem where we are given a family of sets that cover elements of a universe and the goal is to find $k$ subsets whose union has the maximal cardinality.  The LP-relaxation is a fractional solution over subsets, and the pipage rounding procedure then rounds the allocation in linear time, and the integral solution is guaranteed to be within a factor of $(1-1/e)$ of the fractional solution. 
We make the following key observation: for any given $\textbf{q}$, one can remove all elements in $\mathcal{N}(X)$ for which $q_{u}=0$, without changing the value of any solution $(\boldsymbol\lambda,\textbf{q})$.
Our rounding procedure can therefore be described as follows: given a solution $(\boldsymbol\lambda,\textbf{q})$ we remove all nodes $u \in \mathcal{N}(X)$ for which $q_{u}=0$, which leaves us with a fractional solution to a (weighted) version of the \textsc{Max-K-Cover} problem where nodes in $X$ are the sets and the universe is the set of weighted nodes in $\mathcal{N}(X)$ that were not removed.  We can therefore apply pipage rounding and lose only a factor of $(1-1/e)$ in quality of the solution.

\subsection{Combinatorial Algorithm}
We include the missing proofs from the combinatorial algorithm section.  The scalability and implementation in MapReduce are discussed in this section as well.

\begin{proof}[of Lemma~\ref{lemma:nd}]
\emph{W.l.o.g} we can rename and order the pairs in $T$ so that $w_1\geq w_2\geq\ldots\geq w_m $.
Then, $\mathcal{O}(T,b)$ has the following simple piecewise linear expression:
\begin{displaymath}\label{eq:pw}
    \mathcal{O}(T,b) = 
    \begin{cases}
        b w_1&\text{if }0\leq b<p_1\\
        \displaystyle
        \sum_{k=1}^{i-1}p_k(w_k-w_i)
        + b w_i
        &\displaystyle\text{if } 0\leq b - \sum_{k=1}^{i-1}p_k< p_i\\
        \displaystyle
        \sum_{k=1}^m p_kw_k
        &\displaystyle\text{if } b\geq\sum_{i=1}^m p_k

    \end{cases}
\end{displaymath}

Let us define for $t\in\reals^+$, $n(t)\defeq \inf\Big\{i\text{ s.t.
} \sum_{k=1}^i p_k > t\Big \}$ with $n(t)=+\infty$ when the set is empty. In
particular, note that $x\mapsto n(t)$ is non-decreasing. Denoting
$\partial_+\mathcal{O}_T$ the right derivative of $\mathcal{O}(T,\cdot)$, one
can write $\partial_+\mathcal{O}_T(t)=w_{n(t)}$, with the convention that
$w_\infty = 0$.

Writing $i \defeq \sup\Big\{j\text{ s.t.
} w_j\geq w_x\Big\}$, it is easy to see that
$\partial_+\mathcal{O}_{T\cup\{x\}}\geq\partial_+\mathcal{O}_T$. Indeed:
\begin{enumerate}
    \item if $n(t)\leq i$ then $\partial_+\mathcal{O}_{T\cup\{x\}}(t)
        = \partial_+\mathcal{O}_T(t)= w_{n(t)}$.
    \item if $n(t)\geq i+1$ and $n(t-c)\leq i$ then $\partial_+\mathcal{O}_{T\cup\{x\}}(t)
        = w_x\geq w_{n(t)}= \partial_+\mathcal{O}_T(t)$.
    \item if $n(t-c)\geq i+1$, then $\partial_+\mathcal{O}_{T\cup\{x\}}
        = w_{n(t-c)}\geq w_{n(t)}=\partial_+\mathcal{O}_T(t)$.
\end{enumerate}

Let us now consider $b$ and $c$ such that $b\leq c$. Then, using the integral
representation of $\mathcal{O}(T\cup\{x\},\cdot)$ and $\mathcal{O}(T,\cdot)$, we get:
\begin{multline*}
    \mathcal{O}(T\cup\{x\},c)-\mathcal{O}(T\cup\{x\},b)=\int_b^c\partial_+\mathcal{O}_{T\cup\{x\}}(t)dt\\
    \geq\int_b^c\partial_+\mathcal{O}_T(t)dt=\mathcal{O}(T,c)-\mathcal{O}(T,b)
\end{multline*}
Re-ordering the terms, $\mathcal{O}(T\cup\{x\},c)-\mathcal{O}(T,c)\geq
\mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)$
which concludes the proof of the lemma.
\end{proof}

\vspace{0.5em}

\begin{proof}[of Lemma~\ref{lemma:sub}]
    Let $T\subseteq\neigh{X}$ and $x, y\in\neigh{X}\setminus T$. Using the
    second-order characterization of submodular functions, it suffices to show
    that:
    \begin{displaymath}\label{eq:so}
        \mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)\geq
        \mathcal{O}(T\cup\{y, x\},b)-\mathcal{O}(T\cup\{y\},b)
    \end{displaymath}

    We distinguish two cases based on the relative position of $w_x$ and $w_y$.
    The following notations will be useful: $S_T^x \defeq \big\{u\in
    T\text{ s.t.  }w_x\leq w_u\big\}$ and $P_T^x\defeq
    T\setminus S_T^x$.

    \textbf{Case 1:} If $w_y\geq w_x$, then one can
    write:
    \begin{gather*}
        \mathcal{O}(T\cup\{y,x\},b) = \mathcal{O}(P_T^y\cup\{y\},b_1)+
        \mathcal{O}(S_T^y\cup\{x\},b_2)\\
        \mathcal{O}(T\cup\{y\},b) = \mathcal{O}(P_T^y\cup\{y\},b_1)
        + \mathcal{O}(S_T^y,b_2)
    \end{gather*}
    where $b_1$ is the fraction of the budget $b$ spent on $P_T^y\cup\{y\}$ and
    $b_2=b-b_1$.
    
    Similarly:
    \begin{gather*}
        \mathcal{O}(T\cup\{x\},b) = \mathcal{O}(P_T^y, c_1) + \mathcal{O}(S_T^y\cup\{x\},c_2)\\
        \mathcal{O}(T, b) = \mathcal{O}(P_T^y, c_1) + \mathcal{O}(S_T^y,c_2)
    \end{gather*}
    where $c_1$ is the fraction of the budget $b$ spent on $P_T^y$ and $c_2
    = b - c_1$. 

    Note that $b_1\geq c_1$: an optimal solution will first spent as much
    budget as possible on $P_T^y\cup\{y\}$ before adding elements in
    $S_T^y\cup\{x\}$.

    In this case:
    \begin{displaymath}
        \begin{split}
            \mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)&=
        \mathcal{O}(S_T^y\cup\{x\},c_2)+\mathcal{O}(S_T^y,c_2)\\
        &\geq \mathcal{O}(S_T^y\cup\{x\},b_2)+\mathcal{O}(S_T^y,b_2)\\
        & = \mathcal{O}(T\cup\{y, x\},b)-\mathcal{O}(T\cup\{y\},b)
    \end{split}
    \end{displaymath}
    where the inequality comes from Lemma~\ref{lemma:nd} and 
    $c_2\geq b_2$.

    \textbf{Case 2:} If $w_x > w_y$, we now decompose
    the solution on $P_T^x$ and $S_T^x$:
    \begin{gather*}
        \mathcal{O}(T\cup\{x\},b) = \mathcal{O}(P_T^x\cup\{x\},b_1)
        + \mathcal{O}(S_T^x,b_2)\\
        \mathcal{O}(T,b) = \mathcal{O}(P_T^x,c_1)+\mathcal{O}(S_T^x,c_2)\\
        %\intertext{and}
        \mathcal{O}(T\cup\{y, x\},b) = \mathcal{O}(P_T^x\cup\{x\},b_1)
        + \mathcal{O}(S_T^x\cup\{y\},b_2)\\
        \mathcal{O}(T\cup\{y\},b) = \mathcal{O}(P_T^x,c_1)+\mathcal{O}(S_T^x\cup\{y\},c_2)
    \end{gather*}
    with $b_1+b_2=b$, $c_1+c_2=b$ and $b_2\leq c_2$. 

    In this case again:
        \begin{multline*}
            \mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)=
        \mathcal{O}(S_T^x,b_2)-\mathcal{O}(S_T^x,c_2)\\
        \geq \mathcal{O}(S_T^x\cup\{y\},b_2)-\mathcal{O}(S_T^x\cup\{y\},c_2)\\
        = \mathcal{O}(T\cup\{y, x\},b)-\mathcal{O}(T\cup\{y\},b)
    \end{multline*}
    where the inequality uses Lemma~\ref{lemma:nd} and $c_2\geq b_2$.

    In both cases, we were able to obtain the second-order characterization of submodularity. This concludes the proof of the lemma.
\end{proof}

\vspace{0.5em}

\begin{proof}[of Proposition~\ref{prop:sub}]
    Let us consider $S$ and $T$ such that $S\subseteq T\subseteq X$ and $x\in
    X\setminus T$. In particular, note that $\neigh{S}\subseteq\neigh{T}$. 

    Let us write $\neigh{S\cup\{x\}}=\neigh{S}\cup R$ with $\neigh{S}\cap
    R=\emptyset$ and similarly, $\neigh{T\cup\{x\}}=\neigh{T}\cup R'$ with
    $\neigh{T}\cap R'=\emptyset$. It is clear that $R'\subseteq R$. Writing $R'=\{u_1,\ldots,u_k\}$:
    \begin{multline*}
            \mathcal{O}(\neigh{T\cup\{x\}},b)- \mathcal{O}(\neigh{T},b)\\
            =\sum_{i=1}^k\mathcal{O}(\neigh{T}\cup\{u_1,\ldots u_i\},b)
            -\mathcal{O}(\neigh{T}\cup\{u_1,\ldots u_{i-1}\},b)\\
            \leq \sum_{i=1}^k\mathcal{O}(\neigh{S}\cup\{u_1,\ldots u_i\},b)
            -\mathcal{O}(\neigh{S}\cup\{u_1,\ldots u_{i-1}\},b)\\
            =\mathcal{O}(\neigh{S}\cup R',b)-\mathcal{O}(\neigh{S},b)
    \end{multline*}
    where the inequality comes from the submodularity of $\mathcal{O}(\cdot,b)$ proved in Lemma~\ref{lemma:sub}. This same function is also obviously set-increasing, hence:
    \begin{multline*}
            \mathcal{O}(\neigh{S}\cup R',b)-\mathcal{O}(\neigh{S},b)\\
            \leq \mathcal{O}(\neigh{S}\cup R,b)-\mathcal{O}(\neigh{S},b)\\
            =\mathcal{O}(\neigh{S\cup\{x\}},b)-\mathcal{O}(\neigh{S},b)
    \end{multline*}
    This concludes the proof of the proposition.
\end{proof}


\begin{proof}[of Proposition~\ref{prop:main_result}]
    We simply note that the content of the outer \textsf{for} loop on line 2 of Algorithm~\ref{alg:comb} is the greedy submodular maximization algorithm of \cite{nemhauser}. Since $\mathcal{O}(\neigh{\cdot}, t)$ is submodular (Proposition~\ref{prop:sub}), this solves the inner $\max$ in \eqref{eq:sub-mod} with an approximation ratio of $(1-1/e)$. The outer \textsf{for} loop then computes the outer $\max$ of \eqref{eq:sub-mod}.

    As a consequence, Algorithm~\ref{alg:comb} computes a $(1-1/e)$-approximate non-adaptive solution. We conclude by applying Proposition~\ref{prop:cr}.
\end{proof}

\subsection{Parallelization}
As discussed in the body of the paper, the algorithm can be parallelized across $k$ different machines, each one computing an approximation for a fixed budget $k-t$ in the first stage and $t$ in the second.
A slightly more sophisticated approach is to consider only $\log n$ splits: $(1,k-1),(2,k-2),\ldots,(2^{\lfloor \log n \rfloor},1)$ and then select the best solution from this set.  It is not hard to see that in comparison to the previous approach, this would reduce the approximation guarantee by a factor of at most 2: if the optimal solution is obtained by spending $t$ on the first stage and $k-t$ in the second stage, then since $t \leq 2\cdot2^{\lfloor \log t \rfloor}$ the solution computed for $(2^{\lfloor \log t \rfloor}, k - 2^{\lfloor \log t \rfloor})$ will have at least half that value.  
More generally, for any $\epsilon>0$ one can parallelize over $\log_{1+\epsilon}n$ machines at the cost of losing a factor of $(1+\epsilon)$ in the approximation guarantee.

\subsection{Implementation in MapReduce}

As noted in Section~\ref{sec:comb}, lines 4 to 7 of Algorithm~\ref{alg:comb}
correspond to the greedy heuristic of \cite{nemhauser} applied to the
submodular function $f_t(S) \defeq \mathcal{O}\big(\neigh{S}, t\big)$.
A variant of this heuristic, namely the $\varepsilon$-greedy heuristic,
combined with the \textsc{Sample\&Prune} method of \cite{mr} allows us to write
a MapReduce version of Algorithm~\ref{alg:comb}. The resulting algorithm is
described in Algorithm~\ref{alg:combmr}

\begin{algorithm}
    \caption{Combinatorial algorithm, MapReduce}
    \label{alg:combmr}
    \algsetup{indent=2em}
    \begin{algorithmic}[1]
        \STATE $S\leftarrow \emptyset$
        \FOR{$t=1$ \TO $k-1$}
            \STATE $S_t\leftarrow \emptyset$
            \FOR{$i=1$ \TO $\log_{1+\varepsilon}\Delta$}
                \STATE $U\leftarrow X$, $S'\leftarrow \emptyset$
                \WHILE{$|U|>0$}
                    \STATE $R\leftarrow$ sample from $U$ w.p. $\min\left(1,
                    \frac{\ell}{|U|}\right)$
                    \WHILE{$|R|>0$ \OR $|S_t\cup S'|< k$}
                        \STATE $x\leftarrow$ some element from $R$
                        \IF{$\nabla f_t(S_t\cup S', x)\geq\frac{\Delta}{(1+\varepsilon)^i}$}
                            \STATE $S'\leftarrow S'\cup\{x\}$
                        \ENDIF
                        \STATE $R\leftarrow R\setminus\{x\}$
                    \ENDWHILE
                    \STATE $S_t\leftarrow S_t\cup S'$
                    \STATE $U\leftarrow\{x\in U\,|\, \nabla f_t(S_t,
                    x)\geq\frac{\Delta}{(1+\varepsilon)^i}\}$
                \ENDWHILE
            \ENDFOR
            \IF{$\mathcal{O}(\neigh{S_t},t)>\mathcal{O}(\neigh{S},k-|S|)$}
                \STATE $S\leftarrow S_t$
            \ENDIF
        \ENDFOR
        \RETURN $S$
    \end{algorithmic}
\end{algorithm}

We denoted by $\nabla f_t(S, x)$ the marginal increment of $x$ to the set $S$
for the function $f_t$, $\nabla f_t(S, x) =  f_t(S\cup\{x\}) - f_t(S)$.
$\Delta$ is an upper bound on the marginal contribution of any element. In our
case, $\Delta = \max_{u\in\neigh{X}} w_u$ provides such an upper bound. The
sampling in line 7 selects a small enough number of elements that the
\texttt{while} loop from lines 8 to 14 can be executed on a single machine.
Furthermore, lines 7 and 16 can be implemented in one round of MapReduce each.

The approximation ratio of Algorithm~\ref{alg:combmr} is
$1-\frac{1}{e}-\varepsilon$. The proof of this result as well as the optimal
choice of $\ell$ follow from Theorem 10 in \cite{mr}.