diff options
Diffstat (limited to 'paper/sections/combinatorial.tex')
| -rw-r--r-- | paper/sections/combinatorial.tex | 299 |
1 files changed, 299 insertions, 0 deletions
diff --git a/paper/sections/combinatorial.tex b/paper/sections/combinatorial.tex new file mode 100644 index 0000000..b744564 --- /dev/null +++ b/paper/sections/combinatorial.tex @@ -0,0 +1,299 @@ +\subsection{Submodularity of the problem} +Let $\mathcal{U} = \{(v_u,c_u);\, u\in\neigh{X}\}$ with $v_u=p_uw_u$ and +$c_u=p_u$. Let $b\in\reals^+$ and $A\subseteq\mathcal{U}$, we denote by +$\mathcal{O}(A,b)$ the optimal solution to: +\begin{displaymath} + \begin{split} + &\max_{q\in[0,1]^n}\sum_{u\in\neigh{X}}q_uv_u\\ + &\text{s.t. }\sum_{u\in\neigh{X}}q_uc_u\leq b\text{ and } q_u=0\text{ if + }u\notin A +\end{split} +\end{displaymath} +so that the optimization problem \eqref{eq:relaxed-multi} for relaxed +non-adaptive policies can be written: +\begin{equation}\label{eq:sub} + \begin{split} + &\max_{S\subseteq X}\mathcal{O}\big(\neigh{S},k-|S|\big)\\ + &\text{s.t. }|S|\leq k + \end{split} +\end{equation} + +In this section, we will show how \eqref{eq:sub} can be reduced to a submodular +maximization problem, leading to a fast combinatorial algorithm for the +adaptive seeding model in the voter model. + +\begin{lemma}\label{lemma:nd} + Let $A\subseteq\mathcal{U}$ and $x=(v,c)\in\mathcal{U}\setminus A$, then + the function $b\mapsto \mathcal{O}(A\cup\{x\},b)-\mathcal{O}(A,b)$ is + non-decreasing over $\reals^+$. +\end{lemma} + +\begin{proof} +\emph{W.l.o.g} we can rename and order the pairs in $A$ so that: +\begin{displaymath} +\frac{v_1}{c_1}\geq \frac{v_2}{c_2}\geq\ldots\geq\frac{v_m}{c_m}. +\end{displaymath} +Then, $\mathcal{O}(A,b)$ has the following simple piecewise linear expression: +\begin{equation}\label{eq:pw} + \mathcal{O}(A,b) = + \begin{cases} + b\frac{v_1}{c_1}&\text{if }0\leq b<c_1\\ + \sum_{k=1}^{i-1}v_k + \left(b-\sum_{k=1}^{i-1}c_k\right)\frac{v_i}{c_i} + &\text{if } \sum_{k=1}^{i-1}c_k\leq b< \sum_{k=1}^ic_k\\ + \sum_{k=1}^m v_k&\text{if } b\geq\sum_{i=1}^m c_k + + \end{cases} +\end{equation} + +Let us define for $t\in\reals^+$, $n(t)\defeq \inf\Big\{i\text{ s.t. +} \sum_{k=1}^i c_k > t\Big \}$ with $n(t)=+\infty$ when the set is empty. In +particular, note that $x\mapsto n(t)$ is non-decreasing. Denoting +$\partial_+\mathcal{O}_A$ the right derivative of $\mathcal{O}(A,\cdot)$, one +can write $\partial_+\mathcal{O}_A(t) +=\frac{v_{n(t)}}{c_{n(t)}}$, with the convention that +$\frac{v_\infty}{c_\infty} = 0$. + +Writing $i \defeq \sup\Big\{j\text{ s.t. +} \frac{v_j}{c_j}\geq\frac{v}{c}\Big\}$, it is easy to see that +$\partial_+\mathcal{O}_{A\cup\{x\}}\geq\partial_+\mathcal{O}_A$. Indeed: +\begin{enumerate} + \item if $n(t)\leq i$ then $\partial_+\mathcal{O}_{A\cup\{x\}}(t) + = \partial_+\mathcal{O}_A(t)= \frac{v_{n(t)}}{c_{n(t)}}$. + \item if $n(t)\geq i+1$ and $n(t-c)\leq i$ then $\partial_+\mathcal{O}_{A\cup\{x\}}(t) + = \frac{v}{c}\geq\frac{v_{n(t)}}{c_{n(t)}}= \partial_+\mathcal{O}_A(t)$. + \item if $n(t-c)\geq i+1$, then $\partial_+\mathcal{O}_{A\cup\{x\}} + = \frac{v_{n(t-c)}}{c_{n(t-c)}}\geq + \frac{v_{n(t)}}{c_{n(t)}}=\partial_+\mathcal{O}_A(t)$. +\end{enumerate} + +Let us now consider $b$ and $c$ such that $b\leq c$. Then, using the integral +representation of $\mathcal{O}(A\cup\{x\},\cdot)$ and $\mathcal{O}(A,\cdot)$, we get: +\begin{multline*} + \mathcal{O}(A\cup\{x\},c)-\mathcal{O}(A\cup\{x\},b)=\int_b^c\partial_+\mathcal{O}_{A\cup\{x\}}(t)dt\\ + \geq\int_b^c\partial_+\mathcal{O}_A(t)dt=\mathcal{O}(A,c)-\mathcal{O}(A,b) +\end{multline*} +Re-ordering the terms, $\mathcal{O}(A\cup\{x\},c)-\mathcal{O}(A,c)\geq +\mathcal{O}(A\cup\{x\},b)-\mathcal{O}(A,b)$ +which concludes the proof of the lemma. +\end{proof} + +\begin{lemma}\label{lemma:sub} + Let $b\in\reals^+$, then the function $A\mapsto \mathcal{O}(A,b)$ is submodular. +\end{lemma} + +\begin{proof} + Let $A\subseteq\mathcal{U}$ and $x, y\in\mathcal{U}\setminus A$. Using the + second-order characterization of submodular functions, it suffices to show + that: + \begin{equation}\label{eq:so} + \mathcal{O}(A\cup\{x\},b)-\mathcal{O}(A,b)\geq + \mathcal{O}(A\cup\{y\}\cup\{x\},b)-\mathcal{O}(A\cup\{y\},b) + \end{equation} + + Let us write $x=(v_x, c_x)$ and $y=(v_y,c_y)$. We distinguish two cases + based on the relative position of $\frac{v_x}{c_x}$ and $\frac{v_y}{c_y}$. + The following notations will be useful: $S_A^x \defeq \big\{(v,c)\in + A\text{ s.t. }\frac{v_x}{c_x}\leq\frac{v}{c}\big\}$ and $P_A^x\defeq + A\setminus S_A^x$. + + \textbf{Case 1:} If $\frac{v_y}{c_y}\geq \frac{v_x}{c_x}$, then one can + write: + \begin{gather*} + \mathcal{O}(A\cup\{y\}\cup\{x\},b) = \mathcal{O}(P_A^y\cup\{y\},b_1)+ + \mathcal{O}(S_A^y\cup\{x\},b_2)\\ + \mathcal{O}(A\cup\{y\},b) = \mathcal{O}(P_A^y\cup\{y\},b_1) + + \mathcal{O}(S_A^y,b_2) + \end{gather*} + where $b_1$ is the fraction of the budget $b$ spent on $P_A^y\cup\{y\}$ and + $b_2=b-b_1$. + + Similarly: + \begin{gather*} + \mathcal{O}(A\cup\{x\},b) = \mathcal{O}(P_A^y, c_1) + \mathcal{O}(S_A^y\cup\{x\},c_2)\\ + \mathcal{O}(A, b) = \mathcal{O}(P_A^y, c_1) + \mathcal{O}(S_A^y,c_2) + \end{gather*} + where $c_1$ is the fraction of the budget $b$ spent on $P_A^y$ and $c_2 + = b - c_1$. + + Note that $b_1\geq c_1$: an optimal solution will first spent as much + budget as possible on $P_A^y\cup\{y\}$ before adding elements in + $S_A^y\cup\{x\}$. We can now rewrite \eqref{eq:so}: + \begin{displaymath} + \mathcal{O}(S_A^y\cup\{x\},c_2)+\mathcal{O}(S_A^y,c_2)\geq + \mathcal{O}(S_A^y\cup\{x\},b_2)+\mathcal{O}(S_A^y,b_2) + \end{displaymath} + with $c_2\geq b_2$. This inequality is true by Lemma~\ref{lemma:nd}. + + \textbf{Case 2:} If $\frac{v_x}{c_x}\geq \frac{v_y}{c_y}$, we now decompose + the solution on $P_A^x$ and $S_A^x$: + \begin{gather*} + \mathcal{O}(A\cup\{x\},b) = \mathcal{O}(P_A^x\cup\{x\},b_1) + + \mathcal{O}(S_A^x,b_2)\\ + \mathcal{O}(A,b) = \mathcal{O}(P_A^x,c_1)+\mathcal{O}(S_A^x,c_2) + \intertext{and} + \mathcal{O}(A\cup\{y\}\cup\{x\},b) = \mathcal{O}(P_A^x\cup\{x\},b_1) + + \mathcal{O}(S_A^x\cup\{y\},b_2)\\ + \mathcal{O}(A\cup\{y\},b) = \mathcal{O}(P_A^x,c_1)+\mathcal{O}(S_A^x\cup\{y\},c_2) + \end{gather*} + with $b_1+b_2=b$, $c_1+c_2=b$ and $b_2\leq c_2$. The equation \eqref{eq:so} + can be rewritten: + \begin{displaymath} + \mathcal{O}(S_A^x,b_2)-\mathcal{O}(S_A^x,c_2)\geq + \mathcal{O}(S_A^x\cup\{u\},b_2)-\mathcal{O}(S_A^x\cup\{y\},c_2) + \end{displaymath} + Reordering the terms, we get: + \begin{displaymath} + \mathcal{O}(S_A^x\cup\{y\},c_2) -\mathcal{O}(S_A^x,c_2)\geq + \mathcal{O}(S_A^x\cup\{u\},b_2)-\mathcal{O}(S_A^x,b_2) + \end{displaymath} + with $c_2\geq b_2$. This is again true by Lemma~\ref{lemma:nd}. +\end{proof} + +\begin{proposition}\label{prop:sub} + Let $b\in\mathbf{R}^+$, then the function + $S\mapsto\mathcal{O}(\neigh{S},b)$ is submodular over $2^X$. +\end{proposition} + +\begin{proof} + Let us consider $S$ and $T$ such that $S\subseteq T\subseteq X$ and $x\in + X\setminus T$. In particular, note that $\neigh{S}\subseteq\neigh{T}$. + + Let us write $\neigh{S\cup\{x\}}=\neigh{S}\cup R$ with $\neigh{S}\cap + R=\emptyset$ and similarly, $\neigh{T\cup\{x\}}=\neigh{T}\cup R'$ with + $\neigh{T}\cap R'=\emptyset$. Using these notations, it is clear that + $R'\subseteq R$. Let us enumerate the elements in $R'$ using an arbitrary + order: $R'=\{u_1,\ldots,u_k\}$. Then one can write: + \begin{displaymath} + \begin{split} + \mathcal{O}(\neigh{T\cup\{x\}},b)&=\mathcal{O}(\neigh{T}\cup R',b)\\ + &=\sum_{i=1}^k\mathcal{O}(\neigh{T}\cup\{u_1,\ldots u_i\},b) + -\mathcal{O}(\neigh{T}\cup\{u_1,\ldots u_{i-1}\},b)\\ + &\leq \sum_{i=1}^k\mathcal{O}(\neigh{S}\cup\{u_1,\ldots u_i\},b) + -\mathcal{O}(\neigh{S}\cup\{u_1,\ldots u_{i-1}\},b)\\ + &=\mathcal{O}(\neigh{S}\cup R',b)-\mathcal{O}(\neigh{S},b) + \end{split} + \end{displaymath} + where the inequality comes from the submodularity of + $A\mapsto\mathcal{O}(A,b)$ proved in Lemma~\ref{lemma:sub}. This same + function is also obviously set-increasing, hence: + \begin{displaymath} + \begin{split} + \mathcal{O}(\neigh{S}\cup R',b)-\mathcal{O}(\neigh{S},b) + &\leq \mathcal{O}(\neigh{S}\cup R,b)-\mathcal{O}(\neigh{S},b)\\ + &=\mathcal{O}(\neigh{S\cup\{x\}},b)-\mathcal{O}(\neigh{S},b) + \end{split} + \end{displaymath} + This concludes the proof of the proposition. +\end{proof} + +Using Proposition~\ref{prop:sub}, we can finally reduce \eqref{eq:sub} to +a submodular optimization problem by treating $|S|$ as a variable to optimize. +Specifically, \eqref{eq:sub} is equivalent to the following problem: +\begin{equation}\label{eq:sub2} +\begin{split} + \max_{1\leq t\leq k}\max_{|S|\leq k-t}\mathcal{O}(\neigh{S},t) +\end{split} +\end{equation} + +\begin{algorithm} + \caption{Combinatorial algorithm} + \label{alg:comb} + \algsetup{indent=2em} + \begin{algorithmic}[1] + \STATE $S\leftarrow \emptyset$ + \FOR{$t=1$ \TO $k-1$} + \STATE $S_t\leftarrow \emptyset$ + \FOR{$i=1$ \TO $k-t$} + \STATE $x^*\leftarrow\argmax_{x\in + X\setminus S_t}\mathcal{O}(\neigh{S_t\cup\{x\}},t) + -\mathcal{O}(\neigh{S_t},t)$\label{line:argmax} + \STATE $S_t\leftarrow S_t\cup\{x^*\}$ + \ENDFOR + \IF{$\mathcal{O}(\neigh{S_t},t)>\mathcal{O}(\neigh{S},k-|S|)$} + \STATE $S\leftarrow S_t$ + \ENDIF + \ENDFOR + \RETURN $S$ + \end{algorithmic} +\end{algorithm} + +\begin{proposition} + Let $S$ be the set computed by Algorithm~\ref{alg:comb} and let us denote + by $A(S)$ the value of the adaptive policy which selects $S$ on the first + day. Then: + \begin{displaymath} + A(S) \geq \frac{1}{2}\left(1-\frac{1}{e}\right)OPT. + \end{displaymath} + Furthermore, Algorithm~\ref{alg:comb} runs in time $O\left(n\log + n+k^2m\min(\frac{k}{p_\text{min}},n)\right)$, where $m=|X|$, $n=|\neigh{X}|$ + and $p_\text{min} =\min\{p_u,u\in\neigh{X}\}$. +\end{proposition} + +\begin{proof} + For each realization of the neighbors $R$, let us define by $X_R$ the + random set which includes each $u\in R$ with probability $q_u$ on the + second day and: + \begin{displaymath} + \tilde{X}_R= + \begin{cases} + X_R&\text{if } |X_R|\leq t\\ + \emptyset&\text{otherwise} + \end{cases} + \end{displaymath} + where $t = k-|S|$. $\tilde{X}_R$ is a random variable over the feasible + solutions on the second day. As a consequence: + \begin{displaymath} + A(S) \geq \sum_{R\subseteq\neigh{S}} p_R\mathbb{E}\big[f(\tilde{X}_R)\big] + \end{displaymath} + Let us define by $Y$ the random set which includes each $u\in\neigh{X}$ + with probability $p_uq_u$ and: + \begin{displaymath} + \tilde{Y}= + \begin{cases} + Y&\text{if } |Y|\leq t\\ + \emptyset&\text{otherwise} + \end{cases}. + \end{displaymath} + It is easy to see that $\tilde{X}_R$ is the conditional expectation of + $\tilde{Y}$ given + $R$. Hence: + \begin{displaymath} + \sum_{R\subseteq\neigh{S}} p_R\mathbb{E}\big[f(\tilde{X}_R)\big] + = \mathbb{E}\big[f(\tilde{Y})\big] + \end{displaymath} + But: + \begin{align*} + \mathbb{E}\big[f(\tilde{Y})\big] + &=\sum_{T\subseteq\neigh{X}}\mathbb{P}\big[\tilde{Y}=T\big]f(T) + = \sum_{\substack{T\subseteq\neigh{X}\\|T|\leq + t}}\mathbb{P}\big[Y=T\big]f(T)\\ + &= \mathbb{E}\big[f(Y)\big]-\sum_{\substack{T\subseteq\neigh{X}\\|T|> + t}}\mathbb{P}\big[Y=T\big]f(T) + \end{align*} + Roughly: + \begin{displaymath} + \sum_{\substack{T\subseteq\neigh{X}\\|T|> + t}}\mathbb{P}\big[Y=T\big]f(T)\leq + \frac{1}{2}\mathbb{E}\big[f(Y)\big] + \end{displaymath} + Combining the above inequalities we get: + \begin{displaymath} + A(S)\geq \frac{1}{2} \mathbb{E}\big[f(Y)\big] + \end{displaymath} + But $\mathbb{E}\big[f(Y)\big]$ is precisely the value of the non-adaptive + solution computed by Algorithm~\ref{alg:comb} which is + a $\left(1-\frac{1}{e}\right)$ approximation of the + optimal non adaptive solution. Hence: + \begin{displaymath} + A(S) \geq \frac{1}{2}\left(1-\frac{1}{e}\right) OPT_{NA} + \end{displaymath} + Finally, we conclude with Proposition~\ref{prop:gap} +\end{proof} + +\subsection{From expectation to high probability} + +\subsection{Questions} + +\begin{itemize} + \item Can it be parallelized (map-reduce)? +\end{itemize} |
