paper/sections/algorithms.tex


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508

%In our context, a non-adaptive policy is simply a policy that makes all its decision before the realizations occur.  That is, before the algorithm sees which neighbors arrive, it decides on how to allocate its seeding budget.  This is in contrast to the optimal adaptive policy which waits for nodes to arrive.
%

We start by introducing \emph{non-adaptive policies} for the adaptive seeding problem. These policies are used as a tool to obtain adaptive solutions as will be discussed in Sections~\ref{sec:gap} and~\ref{sec:round}.
We will say that a policy is non-adaptive if it selects a set of nodes $S
\subseteq X$ to be seeded in the first stage and a vector of probabilities
$\mathbf{q}\in[0,1]^n$, s.t.  each neighbor $u$ of $S$ which realizes
is included in the solution independently with probability $q_u$.  The
constraint will now be that the budget is only respected in expectation, i.e.
$|S| + \textbf{p}^T\textbf{q} \leq k$. Formally the optimization problem for non-adaptive policies can be written as:
\begin{equation}\label{eq:relaxed}
    \begin{split}
    %&\max_{}
    \max_{\substack{S\subseteq X\\q\in[0,1]^n} }& \;
    \sum_{R\subseteq\neigh{X}} \Big (\prod_{u\in R} p_uq_u\prod_{u\in\neigh{X}\setminus
    R}(1-p_uq_u) \Big )
 f(R)\\
    \text{s.t. } & \; |S|+\textbf{p}^T\textbf{q} \leq k,\;
q_u \leq \mathbf{1}_{\{u\in\neigh{S}\}}
\end{split}
\end{equation}

%$\textbf{p}\odot \textbf{q}$ denotes componentwise multiplication and $\textbf{q}_R$ denotes the positive entries of $\textbf{q}$ on nodes in $R$: 
%
%\begin{displaymath}
%    \textbf{p}\circ \textbf{q}_R = \prod_{u\in R} p_uq_u\prod_{u\in\neigh{X}\setminus
%    R}(1-p_uq_u)
%\end{displaymath}
%
%\begin{displaymath}
%    \Pr[R \ |\ \textbf{p}\odot \textbf{q} ] = \prod_{u\in R} p_uq_u\prod_{u\in\neigh{X}\setminus
%    R}(1-p_uq_u)
%\end{displaymath}

Note that because of the condition $q_u\leq \mathbf{1}_{\{u\in\neigh{S}\}}$,
the summand associated with $R$ in \eqref{eq:relaxed} vanishes whenever $R$
contains $u\in\neigh{X}\setminus\neigh{S}$. Hence, the summation is restricted
to $R\subseteq\neigh{S}$ as in \eqref{eq:problem}.

%The relaxed non-adaptive optimization \eqref{eq:relaxed} problem can be written
%more concisely using the \emph{multi-linear extension} of $f$:
%\begin{displaymath}
%    \forall p\in[0,1]^m,\; F(p)
%    \defeq\mathbb{E}_{p_R}\big[f(R)\big]
%    =\sum_{R\subseteq\neigh{X}} p_R f(R)
%\end{displaymath}
%This \emph{extension by expectation} is known to present cross-convecity
%properties which can exploited in maximization problems when $f$ is
%a submodular function \cite{dughmi, vondrak}. Using this definition, \eqref{eq:relaxed}
%can be re-written as:
%\begin{equation}\label{eq:relaxed-multi}
%    \begin{split}
%    &\max_{S\subseteq X} \max_{q\in[0,1]^n}F(p\circ q)\\
%    &\text{s.t. }|S|+\sum_{u\in\neigh{X}}p_uq_u \leq k,\;
%q_u \leq \mathbf{1}_{\{u\in\neigh{S}\}}
%\end{split}
%\end{equation}
%
%When $f$ is an ìnfluence function from the triggering model, it has been proved
%in \cite{singer} that adaptive seeding has a bounded \emph{adaptivity gap}:
%denoting by $OPT$ the optimal solution of \eqref{eq:problem} and by $OPT_{NA}$
%the optimal solution of \eqref{eq:relaxed}, we have $OPT\leq\alpha OPT_{NA}$.
%This inequality justifies using non-adaptive policies to approximate solutions
%to the adaptive seeding problem.

\subsection{Adaptivity gap for additive functions}\label{sec:gap}

We will now justify the use of non-adaptive strategies by showing that the optimal solution for this form of non-adaptive strategies yields a higher value than adaptive ones.
For brevity, given a probability vector $\mathbf{p}$ we will write:
\begin{equation}\label{eq:multi}
        F(\textbf{p}) \defeq\mathbb{E}_{\mathbf{p}}\big[f(R)\big]
        =\sum_{R\subseteq\neigh{X}}p_R f(R)
\end{equation}
as well as $\textbf{p}\circ \textbf{q}$ to denote the component-wise multiplication between vectors $\textbf{p}$ and $\textbf{q}$.  Finally, we will use $\mathcal{F}_{A} := \{S \subseteq X : |S|\leq k\}$, and $\mathcal{F}_{NA} :=\{(S,\textbf{q}), |S|+\textbf{p}^T\textbf{q} \leq k, q_u \leq \mathbf{1}_{\{u\in\neigh{S}\}}\}$ to denote the feasible regions of the adaptive and non adaptive problems, respectively.

%this form of non-adap
%We already know that the adaptivity gap is bounded for a general class of
%influence function. For adaptive functions, we get a stronger result:
%\emph{relaxed-non adaptive policies are stronger than non-adaptive policies}.
%
%
\begin{proposition}\label{prop:gap}
For additive functions given by \eqref{eq:voter}, non-adaptive
    policies are stronger than adaptive policies:
    \begin{displaymath}
        \begin{aligned}[t]
            &\max_{S\subseteq X} \sum_{R\subseteq\neigh{S}} p_R
            \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)\\
            &\text{s.t. }S \in \mathcal{F}_{A}
        \end{aligned}
        \leq
        \begin{aligned}[t]
            &\max_{\substack{S\subseteq X\\q\in[0,1]^n}}
            F(\mathbf{p}\circ\mathbf{q})\\
            &\text{s.t. } (S,q) \in \mathcal{F}_{NA}
                    \end{aligned}
    \end{displaymath}
\end{proposition}

%\begin{proposition}\label{prop:gap}
%Let $\mathcal{F}_{A} := \{T \subseteq X : |T|\leq k\}$, $\mathcal{F}_{NA} :=\{(T,\textbf{q}), |S|+\textbf{p}^T\textbf{q} \leq k, q_u \leq \mathbf{1}_{\{u\in\neigh{S}\}}\}$.
%For additive functions of the form given by \eqref{eq:voter}, non-adaptive
%    policies are stronger than adaptive policies:
%    \begin{displaymath}
%        \begin{aligned}[t]
%            &\max_{S\subseteq X} \sum_{R\subseteq\neigh{S}} p_R
%            \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)\\
%            &\text{s.t. }|S|\leq k
%        \end{aligned}
%        \leq
%        \begin{aligned}[t]
%            &\max_{S\subseteq X} \max_{q\in[0,1]^n}F(p\circ q)\\
%            &\text{s.t. }|S|+\sum_{u\in\neigh{X}}p_uq_u \leq k,\;
%            q_u \leq \mathbf{1}_{\{u\in\neigh{S}\}}
%        \end{aligned}
%    \end{displaymath}
%\end{proposition}

\begin{proof}
    We will first show that the optimal adaptive policy can be interpreted as
    a non-adaptive policy. Let $S$ be the optimal adaptive
    solution and define $\delta_R:\neigh{X}\mapsto \{0,1\}$:
    \begin{displaymath}
        \delta_R(u) \defeq \begin{cases}
            1&\text{if } u\in\argmax\big\{f(T);\; T\subseteq R,\; |T|\leq
        k-|S|\big\} \\
            0&\text{otherwise}
        \end{cases},
    \end{displaymath}
    one can write
    \begin{displaymath}
    \begin{split}
        \sum_{R\subseteq\neigh{S}} p_R
        \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)
        &=
        \sum_{R\subseteq\neigh{S}} p_R
        \sum_{u\in\neigh{X}}\delta_R(u)w_u\\
        &=
        \sum_{u\in\neigh{X}}w_u\sum_{R\subseteq\neigh{S}}p_R\delta_R(u).
    \end{split}
    \end{displaymath}

    Let us now define for $u\in\neigh{X}$:
    \begin{displaymath}
        q_u \defeq \begin{cases}
            \sum_{R\subseteq\neigh{S}}\frac{p_R}{p_u}\delta_R(u)
            &\text{if }p_u\neq 0\\
            0&\text{otherwise}
        \end{cases}.
    \end{displaymath}
    This allows us to write:
    \begin{displaymath}
            \sum_{R\subseteq\neigh{S}} p_R
            \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)
            = \sum_{u\in\neigh{X}}p_uq_uw_u = F(\mathbf{p}\circ\mathbf{q})
    \end{displaymath}
    where the last equality is obtained from \eqref{eq:multi} by successively using the linearity of the expectation and the linearity of $f$.

    Furthermore, observe that $q_u\in[0,1]$, $q_u=0$ if $u\notin\neigh{S}$ and:
    \begin{displaymath}
        \begin{split}
        |S|+\sum_{u\in\neigh{X}}p_uq_u
        &= |S|+\sum_{R\subseteq\neigh{S}}p_R\sum_{u\in\neigh{X}}\delta_R(u)\\
        &\leq |S| + \sum_{R\subseteq\neigh{S}}p_R(k-|S|)\leq k
    \end{split}
    \end{displaymath}

    Hence, $(S,\mathbf{q})\in\mathcal{F}_{NA}$. In other words, we have written the optimal adaptive solution as a relaxed
    non-adaptive solution. This conclude the proof of the proposition.
\end{proof}

\subsection{From non-adaptive to adaptive solutions}\label{sec:round}
From the above proposition we now know that optimal non-adaptive solutions have
higher values than adaptive solutions.  A priori, this does not imply that non-adaptive solutions are good adaptive solutions. More precisely, a non-adaptive solution is a pair $(S, \mathbf{q})$ such that starting from $S$ on the first stage and sampling nodes with probability $\mathbf{q}$ on the second stage will result in a solution which is as good as $\mathrm{A}(S)$ in expectation, where $\mathrm{A}$ denotes the objective function of the adaptive problem \eqref{eq:relaxed}. However, the set resulting from the sampling might exceed the budget on the second stage, hence preventing us from directly obtaining a feasible adaptive solution.

Fortunately, the probability of exceeding the budget is small enough and with high probability the sampled set will be feasible. This property is leveraged in \cite{vondrak} to design a randomized rounding method with approximation guarantees. This randomized roundings are called \emph{contention resolution schemes}. More precisely, we note that once the set $S$ is fixed, the feasibility constraint of problem~\eqref{eq:relaxed} becomes a single Knapsack constraint, for which \cite{vondrak} constructs a $(1-\varepsilon, 1-\varepsilon)$-balanced contention resolution scheme. Applying Theorem~1.3 of the same paper, this contention resolution scheme will compute from $\mathbf{q}$ a \emph{feasible} random set $I$, such that:
\begin{equation}\label{eq:cr}
\mathbb{E}\big[f(I)\big] 
\geq (1-\varepsilon) F(\mathbf{q}) 
\end{equation}

Equation~\eqref{eq:cr} allows us to prove the following proposition, where $\mathrm{OPT}_{NA}$ denotes the optimal value of problem~\eqref{eq:relaxed} and $\mathrm{OPT}_A$ denotes the optimal value of problem~\eqref{eq:problem}.

\begin{proposition}\label{prop:cr}
    Let $(S,\textbf{q})$ be an $\alpha$-approximate solution to the non-adaptive problem \eqref{eq:relaxed}, then $\mathrm{A}(S) \geq \alpha \mathrm{OPT}_A$.
\end{proposition}  

\begin{proof}
    Using the definition of $\mathrm{A}(S)$, one can write:
    \begin{displaymath}
        \mathrm{A}(S) = \sum_{R\subseteq\neigh{S}} p_R
        \max_{\substack{T\subseteq R\\|T|\leq k-|S|}}f(T)
        \geq \sum_{R\subseteq\neigh{S}} p_R \mathbf{E}\big[f(I)\big]
    \end{displaymath}
    where the inequality comes from the fact that $I$ is a feasible random set: $|I|\leq k-|S|$, hence the expected value of $f(I)$ is bounded by the maximum of $f$ over feasible sets.

Equation~\eqref{eq:cr} then implies:
\begin{equation}\label{eq:tmp}
    \mathrm{A}(S) 
    \geq (1-\varepsilon)\sum_{R\subseteq\neigh{S}} p_R F(\mathbf{q})
    = (1-\varepsilon)F(\mathbf{p}\circ\mathbf{q}).
\end{equation}

Equation~\eqref{eq:tmp} holds for any $\varepsilon\geq 0$. In particular, for $\varepsilon$ smaller than $\inf_{S\neq T} |A(S)-A(T)|$, we obtain that $\mathrm{A}(S)\geq F(\mathbf{p}\circ\mathbf{q})$. Note that such a $\varepsilon$ is at most polynomially small in the size of the instance.
$(S, \mathbf{q})$ is an $\alpha$-approximate non adaptive solution, hence $F(\mathbf{p}\circ\mathbf{q}) \geq \alpha\mathrm{OPT}_{NA}$. We can then conclude by applying Proposition~\ref{prop:gap}. 
\end{proof}

Therefore the adaptive seeding problem reduces to the non-adaptive problem.  We will now discuss two approaches to construct non-adaptive solutions. The first is an LP-based approach, and the second is a combinatorial algorithm. Both approaches have the same $(1-1/e)$ approximation ratio, which is then translated to a $(1-1/e)$ approximation ratio for the adaptive seeding problem~\eqref{eq:problem} via Proposition~\ref{prop:cr}.

\subsection{An LP-Based Approach}
Note that due to linearity of expectation, for a linear function $f$ of the form given by \eqref{eq:voter} we have:
\begin{equation}\label{eq:multi-voter}
    \begin{split}
        F(\textbf{p}) 
        &=\mathbb{E}_{p_R}\big[f(R)\big]
        =\mathbb{E}_{p_R}\Bigg[\sum_{u\in\neigh{X}}w_u\mathbf{1}_{\{u\in
        R\}}\Bigg]\\
        &=\sum_{u\in\neigh{X}}w_u\mathbb{P}[u\in R]
        =\sum_{u\in\neigh{X}}p_uw_u
    \end{split}
\end{equation}

Thus, the non-adaptive optimization problem \eqref{eq:relaxed} can be written as:
\begin{displaymath}
    \begin{split}
        \max_{\substack{S\subseteq X\\q\in[0,1]^n} } 
        & \sum_{u\in\neigh{X}}p_uq_uw_u\\
        \text{s.t. } & |S|+ \textbf{p}^T\textbf{q} \leq k,\;
                     q_u \leq \mathbf{1}_{\{u\in\neigh{S}\}}
    \end{split}
\end{displaymath}

The choice of the set $S$ can be relaxed by introducing a variable
$\lambda_v\in[0,1]$ for each $v\in X$. We obtain the following
LP for the adaptive seeding problem:
\begin{equation}\label{eq:lp}
    \begin{split}
        \max_{\substack{q\in[0,1]^n\\\lambda\in[0,1]^m}}
        & \;\sum_{u\in\neigh{X}}p_uq_uw_u\\
        \text{s.t. } & \sum_{v\in X}\lambda_v+\textbf{p}^T\textbf{q} \leq k,\;
        q_u \leq \sum_{v\in\neigh{u}} \lambda_v
\end{split}
\end{equation}

An optimal solution to the above problem can be found using standard LP-solvers, in polynomial time.  The solution returned by the LP is \emph{fractional}, and requires a rounding procedure to return a feasible solution to our problem, where $S$ is integral.  We briefly describe our approach which uses the \emph{pipage rounding} method.\newline 

\noindent \textbf{Pipage Rounding.}
The pipage rounding method~\cite{pipage} is a deterministic rounding method that can be applied to a variety of problems.  In particular, it can be applied to LP-relaxations of the \textsc{Max-K-Cover} problem where we are given a family of sets that cover elements of a universe and the goal is to find $k$ subsets whose union has the maximal cardinality.  The LP-relaxation is a fractional solution over subsets, and the pipage rounding procedure then rounds the allocation in linear time, and the integral solution is guaranteed to be within a factor of $(1-1/e)$ of the fractional solution. 
We make the following key observation: for any given $\textbf{q}$, one can remove all elements in $\mathcal{N}(X)$ for which $q_{u}=0$, without changing the value of any solution $(\lambda,\textbf{q})$.
Our rounding procedure can therefore be described as follows: given a solution $(\lambda,\textbf{q})$ we remove all nodes $u \in \mathcal{N}(X)$ for which $q_{u}=0$, which leaves us with a fractional solution to a (weighted) version of the \textsc{Max-K-Cover} problem where nodes in $X$ are the sets and the universe is the set of weighted nodes in $\mathcal{N}(X)$ that were not removed.  We can therefore apply pipage rounding and lose only a factor of $(1-1/e)$ in quality of the solution.

\begin{lemma}
    For \mbox{\textsc{AdaptiveSeeding-LP}} as defined in \eqref{eq:lp}, any fractional solution $(\lambda, \mathbf{q})\in[0,1]^m\times[0,1]^n$ can be rounded to an integral solution $\bar{\lambda} \in \{0,1\}^{m}$ s.t. $(1-1/e) F(\mathbf{p}\circ\mathbf{q}) \leq A(\bar{\lambda})$ in $O(m + n)$ steps.
\end{lemma}

\subsection{A Combinatorial Algorithm}
In this section, we will introduce a combinatorial algorithm with an identical approximation guarantee to the LP-based approach.  The main idea is to reduce the problem to a monotone submodular maximization problem and apply a variant of the celebrated greedy algorithm~\cite{nemhauser}.
This property is quite a remarkable feature of linear influence models; for influence models such as independent cascade and linear threshold, the adaptive seeding problem does not reduce to submodular maximization, and a completely different approach is required~\cite{singer}. In contrast with standard influence maximization, the submodularity of the non-adaptive seeding problem is not simply a consequence of properties of the influence function. It also strongly relies on the combinatorial structure of the two stage optimization. 

Intuitively, we can think of our problem as trying to find a set $S$ in the first stage, for which the nodes that can be seeded on the second stage have the largest possible value.  To formalize this, for a budget $b\in\reals^+$ used in the second stage and a set of neighbors $T\subseteq\mathcal{N}(X)$, we will use 
$\mathcal{O}(T,b)$ to denote the solution to:
\begin{equation}\label{eq:knap}
    \begin{split}
        \mathcal{O}(T,b)\defeq
        \max_{q\in[0,1]^n} & \sum_{u\in\neigh{X}} p_uq_uw_u\\
                            \text{s.t. } & \mathbf{p}^T\mathbf{q}\leq b\text{ and } q_u=0\text{ if
    }u\notin T
\end{split}
\end{equation}

The optimization problem \eqref{eq:relaxed} for
non-adaptive policies can now be written as:
\begin{equation}\label{eq:sub}
    \begin{split}
        \max_{S\subseteq X} &\; \mathcal{O}\big(\neigh{S},k-|S|\big)\\
        \text{s.t. } & |S|\leq k
    \end{split}
\end{equation}

We start by proving in Proposition~\ref{prop:sub} that for fixed $t$, $\mathcal{O}(\neigh{\cdot}, t)$ is submodular. This proposition relies on lemmas~\ref{lemma:nd} and~\ref{lemma:sub} about the properties of $\mathcal{O}(T,b)$ first seen as a function $b$, then as a function of $T$.

\begin{lemma}\label{lemma:nd}
    Let $T \subseteq \mathcal{N}(X)$ and $x \in \mathcal{N}(X)$, then
    $\mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)$ is
    non-decreasing in $b$.
\end{lemma}

\begin{proof}
\emph{W.l.o.g} we can rename and order the pairs in $T$ so that $w_1\geq w_2\geq\ldots\geq w_m $.
Then, $\mathcal{O}(T,b)$ has the following simple piecewise linear expression:
\begin{displaymath}\label{eq:pw}
    \mathcal{O}(T,b) = 
    \begin{cases}
        b w_1&\text{if }0\leq b<p_1\\
        \displaystyle
        \sum_{k=1}^{i-1}p_k(w_k-w_i)
        + b w_i
        &\displaystyle\text{if } 0\leq b - \sum_{k=1}^{i-1}p_k< p_i\\
        \displaystyle
        \sum_{k=1}^m p_kw_k
        &\displaystyle\text{if } b\geq\sum_{i=1}^m p_k

    \end{cases}
\end{displaymath}

Let us define for $t\in\reals^+$, $n(t)\defeq \inf\Big\{i\text{ s.t.
} \sum_{k=1}^i p_k > t\Big \}$ with $n(t)=+\infty$ when the set is empty. In
particular, note that $x\mapsto n(t)$ is non-decreasing. Denoting
$\partial_+\mathcal{O}_T$ the right derivative of $\mathcal{O}(T,\cdot)$, one
can write $\partial_+\mathcal{O}_T(t)=w_{n(t)}$, with the convention that
$w_\infty = 0$.

Writing $i \defeq \sup\Big\{j\text{ s.t.
} w_j\geq w_x\Big\}$, it is easy to see that
$\partial_+\mathcal{O}_{T\cup\{x\}}\geq\partial_+\mathcal{O}_T$. Indeed:
\begin{enumerate}
    \item if $n(t)\leq i$ then $\partial_+\mathcal{O}_{T\cup\{x\}}(t)
        = \partial_+\mathcal{O}_T(t)= w_{n(t)}$.
    \item if $n(t)\geq i+1$ and $n(t-c)\leq i$ then $\partial_+\mathcal{O}_{T\cup\{x\}}(t)
        = w_x\geq w_{n(t)}= \partial_+\mathcal{O}_T(t)$.
    \item if $n(t-c)\geq i+1$, then $\partial_+\mathcal{O}_{T\cup\{x\}}
        = w_{n(t-c)}\geq w_{n(t)}=\partial_+\mathcal{O}_T(t)$.
\end{enumerate}

Let us now consider $b$ and $c$ such that $b\leq c$. Then, using the integral
representation of $\mathcal{O}(T\cup\{x\},\cdot)$ and $\mathcal{O}(T,\cdot)$, we get:
\begin{multline*}
    \mathcal{O}(T\cup\{x\},c)-\mathcal{O}(T\cup\{x\},b)=\int_b^c\partial_+\mathcal{O}_{T\cup\{x\}}(t)dt\\
    \geq\int_b^c\partial_+\mathcal{O}_T(t)dt=\mathcal{O}(T,c)-\mathcal{O}(T,b)
\end{multline*}
Re-ordering the terms, $\mathcal{O}(T\cup\{x\},c)-\mathcal{O}(T,c)\geq
\mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)$
which concludes the proof of the lemma.
\end{proof}

\begin{lemma}\label{lemma:sub}
    For any $b\in\reals^+$, $\mathcal{O}(T,b)$ is submodular in $T$, $T\subseteq\neigh{X}$.
\end{lemma}

\begin{proof}
    Let $T\subseteq\neigh{X}$ and $x, y\in\neigh{X}\setminus T$. Using the
    second-order characterization of submodular functions, it suffices to show
    that:
    \begin{displaymath}\label{eq:so}
        \mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)\geq
        \mathcal{O}(T\cup\{y, x\},b)-\mathcal{O}(T\cup\{y\},b)
    \end{displaymath}

    We distinguish two cases based on the relative position of $w_x$ and $w_y$.
    The following notations will be useful: $S_T^x \defeq \big\{u\in
    T\text{ s.t.  }w_x\leq w_u\big\}$ and $P_T^x\defeq
    T\setminus S_T^x$.

    \textbf{Case 1:} If $w_y\geq w_x$, then one can
    write:
    \begin{gather*}
        \mathcal{O}(T\cup\{y,x\},b) = \mathcal{O}(P_T^y\cup\{y\},b_1)+
        \mathcal{O}(S_T^y\cup\{x\},b_2)\\
        \mathcal{O}(T\cup\{y\},b) = \mathcal{O}(P_T^y\cup\{y\},b_1)
        + \mathcal{O}(S_T^y,b_2)
    \end{gather*}
    where $b_1$ is the fraction of the budget $b$ spent on $P_T^y\cup\{y\}$ and
    $b_2=b-b_1$.
    
    Similarly:
    \begin{gather*}
        \mathcal{O}(T\cup\{x\},b) = \mathcal{O}(P_T^y, c_1) + \mathcal{O}(S_T^y\cup\{x\},c_2)\\
        \mathcal{O}(T, b) = \mathcal{O}(P_T^y, c_1) + \mathcal{O}(S_T^y,c_2)
    \end{gather*}
    where $c_1$ is the fraction of the budget $b$ spent on $P_T^y$ and $c_2
    = b - c_1$. 

    Note that $b_1\geq c_1$: an optimal solution will first spent as much
    budget as possible on $P_T^y\cup\{y\}$ before adding elements in
    $S_T^y\cup\{x\}$.

    In this case:
    \begin{displaymath}
        \begin{split}
            \mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)&=
        \mathcal{O}(S_T^y\cup\{x\},c_2)+\mathcal{O}(S_T^y,c_2)\\
        &\geq \mathcal{O}(S_T^y\cup\{x\},b_2)+\mathcal{O}(S_T^y,b_2)\\
        & = \mathcal{O}(T\cup\{y, x\},b)-\mathcal{O}(T\cup\{y\},b)
    \end{split}
    \end{displaymath}
    where the inequality comes from Lemma~\ref{lemma:nd} and 
    $c_2\geq b_2$.

    \textbf{Case 2:} If $w_x > w_y$, we now decompose
    the solution on $P_T^x$ and $S_T^x$:
    \begin{gather*}
        \mathcal{O}(T\cup\{x\},b) = \mathcal{O}(P_T^x\cup\{x\},b_1)
        + \mathcal{O}(S_T^x,b_2)\\
        \mathcal{O}(T,b) = \mathcal{O}(P_T^x,c_1)+\mathcal{O}(S_T^x,c_2)\\
        %\intertext{and}
        \mathcal{O}(T\cup\{y, x\},b) = \mathcal{O}(P_T^x\cup\{x\},b_1)
        + \mathcal{O}(S_T^x\cup\{y\},b_2)\\
        \mathcal{O}(T\cup\{y\},b) = \mathcal{O}(P_T^x,c_1)+\mathcal{O}(S_T^x\cup\{y\},c_2)
    \end{gather*}
    with $b_1+b_2=b$, $c_1+c_2=b$ and $b_2\leq c_2$. 

    In this case again:
        \begin{multline*}
            \mathcal{O}(T\cup\{x\},b)-\mathcal{O}(T,b)=
        \mathcal{O}(S_T^x,b_2)-\mathcal{O}(S_T^x,c_2)\\
        \geq \mathcal{O}(S_T^x\cup\{y\},b_2)-\mathcal{O}(S_T^x\cup\{y\},c_2)\\
        = \mathcal{O}(T\cup\{y, x\},b)-\mathcal{O}(T\cup\{y\},b)
    \end{multline*}
    where the inequality uses Lemma~\ref{lemma:nd} and $c_2\geq b_2$.

    In both cases, we were able to obtain the second-order characterization of submodularity. This concludes the proof of the lemma.
\end{proof}

\begin{proposition}\label{prop:sub}
    Let $b\in\mathbf{R}^+$, then $\mathcal{O}(\neigh{S},b)$ is submodular in $S$, $S\subseteq X$.
\end{proposition}

\begin{proof}
    Let us consider $S$ and $T$ such that $S\subseteq T\subseteq X$ and $x\in
    X\setminus T$. In particular, note that $\neigh{S}\subseteq\neigh{T}$. 

    Let us write $\neigh{S\cup\{x\}}=\neigh{S}\cup R$ with $\neigh{S}\cap
    R=\emptyset$ and similarly, $\neigh{T\cup\{x\}}=\neigh{T}\cup R'$ with
    $\neigh{T}\cap R'=\emptyset$. It is clear that $R'\subseteq R$. Writing $R'=\{u_1,\ldots,u_k\}$:
    \begin{multline*}
            \mathcal{O}(\neigh{T\cup\{x\}},b)- \mathcal{O}(\neigh{T},b)\\
            =\sum_{i=1}^k\mathcal{O}(\neigh{T}\cup\{u_1,\ldots u_i\},b)
            -\mathcal{O}(\neigh{T}\cup\{u_1,\ldots u_{i-1}\},b)\\
            \leq \sum_{i=1}^k\mathcal{O}(\neigh{S}\cup\{u_1,\ldots u_i\},b)
            -\mathcal{O}(\neigh{S}\cup\{u_1,\ldots u_{i-1}\},b)\\
            =\mathcal{O}(\neigh{S}\cup R',b)-\mathcal{O}(\neigh{S},b)
    \end{multline*}
    where the inequality comes from the submodularity of $\mathcal{O}(\cdot,b)$ proved in Lemma~\ref{lemma:sub}. This same function is also obviously set-increasing, hence:
    \begin{multline*}
            \mathcal{O}(\neigh{S}\cup R',b)-\mathcal{O}(\neigh{S},b)\\
            \leq \mathcal{O}(\neigh{S}\cup R,b)-\mathcal{O}(\neigh{S},b)\\
            =\mathcal{O}(\neigh{S\cup\{x\}},b)-\mathcal{O}(\neigh{S},b)
    \end{multline*}
    This concludes the proof of the proposition.
\end{proof}

We can now use Proposition~\ref{prop:sub} to reduce \eqref{eq:sub} to a monotone submodular maximization problem. First, we note that~\eqref{eq:sub} can be rewritten:
\begin{equation}\label{eq:sub-mod}
    \begin{split}
        &\max_{t}\max_{S\subseteq X} \; \mathcal{O}\big(\neigh{S},t\big)\\
        &\text{s.t. } |S| + t\leq k
    \end{split}
\end{equation}

Intuitively, we fix $t$ arbitrarily so that the inner $\max$ in \eqref{eq:sub-mod} becomes a submodular maximization problem with fixed budget $t$. We then optimize over the value of $t$. Combining this observation with the greedy algorithm for monotone submodular maximization~\cite{nemhauser}, we obtain Algorithm~\ref{alg:comb}, whose performance guarantee is summarized in Proposition~\ref{prop:main_result}.

\begin{algorithm}
    \caption{Combinatorial algorithm}
    \label{alg:comb}
    \algsetup{indent=2em}
    \begin{algorithmic}[1]
        \STATE $S\leftarrow \emptyset$
        \FOR{$t=1$ \TO $k-1$}
            \STATE $S_t\leftarrow \emptyset$
            \FOR{$i=1$ \TO $k-t$}
                \STATE $x^*\leftarrow\argmax_{x\in
                X\setminus S_t}\mathcal{O}(\neigh{S_t\cup\{x\}},t)
                -\mathcal{O}(\neigh{S_t},t)$\label{line:argmax}
                \STATE $S_t\leftarrow S_t\cup\{x^*\}$
            \ENDFOR
            \IF{$\mathcal{O}(\neigh{S_t},t)>\mathcal{O}(\neigh{S},k-|S|)$}
                \STATE $S\leftarrow S_t$
            \ENDIF
        \ENDFOR
        \RETURN $S$
    \end{algorithmic}
\end{algorithm}

\begin{proposition}\label{prop:main_result}
    Let $S$ be the set computed by Algorithm~\ref{alg:comb} and let us denote
    by $\mathrm{A}(S)$ the value of the adaptive policy selecting $S$ on the first
    stage. Then $\mathrm{A}(S) \geq (1-1/e)\mathrm{OPT}_A$.
\end{proposition}

\begin{proof}
    We simply note that the content of the outer \textsf{for} loop on line 2 of Algorithm~\ref{alg:comb} is the greedy submodular maximization algorithm of \cite{nemhauser}. Since $\mathcal{O}(\neigh{\cdot}, t)$ is submodular (Proposition~\ref{prop:sub}), this solves the inner $\max$ in \eqref{eq:sub-mod} with an approximation ratio of $(1-1/e)$. The outer \textsf{for} loop then computes the outer $\max$ of \eqref{eq:sub-mod}.

    As a consequence, Algorithm~\ref{alg:comb} computes a $(1-1/e)$-approximate non-adaptive solution. We conclude by applying Proposition~\ref{prop:cr}.
\end{proof}
\subsection{Running time}
The algorithm described above considers all possible ways to split the seeding budget between the first and the second stage.  For each possible split $\{(t,k-t)\}_{t=1\ldots,k-1}$, the algorithm computes an approximation to the optimal non adaptive solution that uses $k-t$ nodes in the first stage and $t$ nodes in the second stage, and returns the solution for the split with the highest value (breaking ties arbitrarily).  This process can be trivially parallelized across $k-1$ machines, each performing a computation of a single split.  

A slightly more sophisticated approach is to consider only $\log n$ splits: $(1,k-1),(2,k-2),\ldots,(2^{\lfloor \log n \rfloor},1)$ and then select the best solution from this set.  It is not hard to see that in comparison to the previous approach, this would reduce the approximation guarantee by a factor of at most 2: if the optimal solution is obtained by spending $t$ on the first stage and $k-t$ in the second stage, then since $t \leq 2\cdot2^{\lfloor \log t \rfloor}$ the solution computed for $(2^{\lfloor \log t \rfloor}, k - 2^{\lfloor \log t \rfloor})$ will have at least half that value.  
More generally, for any $\epsilon>0$ one can parallelize over $\log_{1+\epsilon}n$ machines at the cost of losing a factor of $(1+\epsilon)$ in the approximation guarantee.


To implement Algorithm~\ref{alg:comb} efficiently, the computation of the $\argmax$ on line 5 must be dealt with carefully. $\mathcal{O}(\neigh{S_t\cup\{x\}},t)$ is the optimal solution to the fractional Knapsack problem~\eqref{eq:knap} with budget $t$ and can be computed in time $\min(\frac{t}{p_\text{min}},n)$ by iterating over the list of nodes in $\neigh{S_t\cup\{x\}}$ by decreasing order of the degrees. This decreasing order of $\neigh{S_t}$ can be maintained throughout the greedy construction of $S_t$ by:
\begin{itemize}
\item ordering the list of neighbors of nodes in $X$ by decreasing order of the degrees when initially constructing the graph. This is responsible for the $n\log n$ additive factor.
\item when adding node $x$ to $S_t$, observe that $\neigh{S_t\cup\{x\}} = \neigh{S_t}\cup\neigh{\{x\}}$. Hence, if $\neigh{S_t}$ and $\neigh{\{x\}}$ are sorted lists, then $\mathcal{O}(\neigh{S_t\cup\{x\}},t)$ can be computed in a single iteration of length $\min(\frac{t}{p_\text{min}},n)$ where the two sorted lists are merged on the fly.
\end{itemize}
As a consequence, the running time of line 5 is bounded from above by $m\min(\frac{t}{p_\text{min}},n)$. The two nested \textsf{for} loops are responsible for the additional $k^2$ factor. The running time of Algorithm~\ref{alg:comb} is summarized in Proposition~\ref{prop:running_time}.

\begin{proposition}\label{prop:running_time}
    Algorithm~\ref{alg:comb} runs in time $O\big(n\log
    n + k^2 m \min(\frac{k}{p_\text{min}},n)\big)$, with $p_\text{min} =\min\{p_u,u\in\neigh{X}\}$.
\end{proposition}