summaryrefslogtreecommitdiffstats
path: root/paper/sections/combinatorial.tex
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2014-10-24 12:32:08 -0400
committerThibaut Horel <thibaut.horel@gmail.com>2014-10-24 12:32:08 -0400
commit4f7d4804234f5515a4dded8b05d9568653b7ae3c (patch)
tree98d3bbb27692a861d602d52d1650e6d60c2b045c /paper/sections/combinatorial.tex
parentece1d828d53d6123fcecb5ea8bf9b126d1728ccc (diff)
downloadfast-seeding-4f7d4804234f5515a4dded8b05d9568653b7ae3c.tar.gz
Add paper
Diffstat (limited to 'paper/sections/combinatorial.tex')
-rw-r--r--paper/sections/combinatorial.tex299
1 files changed, 299 insertions, 0 deletions
diff --git a/paper/sections/combinatorial.tex b/paper/sections/combinatorial.tex
new file mode 100644
index 0000000..b744564
--- /dev/null
+++ b/paper/sections/combinatorial.tex
@@ -0,0 +1,299 @@
+\subsection{Submodularity of the problem}
+Let $\mathcal{U} = \{(v_u,c_u);\, u\in\neigh{X}\}$ with $v_u=p_uw_u$ and
+$c_u=p_u$. Let $b\in\reals^+$ and $A\subseteq\mathcal{U}$, we denote by
+$\mathcal{O}(A,b)$ the optimal solution to:
+\begin{displaymath}
+ \begin{split}
+ &\max_{q\in[0,1]^n}\sum_{u\in\neigh{X}}q_uv_u\\
+ &\text{s.t. }\sum_{u\in\neigh{X}}q_uc_u\leq b\text{ and } q_u=0\text{ if
+ }u\notin A
+\end{split}
+\end{displaymath}
+so that the optimization problem \eqref{eq:relaxed-multi} for relaxed
+non-adaptive policies can be written:
+\begin{equation}\label{eq:sub}
+ \begin{split}
+ &\max_{S\subseteq X}\mathcal{O}\big(\neigh{S},k-|S|\big)\\
+ &\text{s.t. }|S|\leq k
+ \end{split}
+\end{equation}
+
+In this section, we will show how \eqref{eq:sub} can be reduced to a submodular
+maximization problem, leading to a fast combinatorial algorithm for the
+adaptive seeding model in the voter model.
+
+\begin{lemma}\label{lemma:nd}
+ Let $A\subseteq\mathcal{U}$ and $x=(v,c)\in\mathcal{U}\setminus A$, then
+ the function $b\mapsto \mathcal{O}(A\cup\{x\},b)-\mathcal{O}(A,b)$ is
+ non-decreasing over $\reals^+$.
+\end{lemma}
+
+\begin{proof}
+\emph{W.l.o.g} we can rename and order the pairs in $A$ so that:
+\begin{displaymath}
+\frac{v_1}{c_1}\geq \frac{v_2}{c_2}\geq\ldots\geq\frac{v_m}{c_m}.
+\end{displaymath}
+Then, $\mathcal{O}(A,b)$ has the following simple piecewise linear expression:
+\begin{equation}\label{eq:pw}
+ \mathcal{O}(A,b) =
+ \begin{cases}
+ b\frac{v_1}{c_1}&\text{if }0\leq b<c_1\\
+ \sum_{k=1}^{i-1}v_k + \left(b-\sum_{k=1}^{i-1}c_k\right)\frac{v_i}{c_i}
+ &\text{if } \sum_{k=1}^{i-1}c_k\leq b< \sum_{k=1}^ic_k\\
+ \sum_{k=1}^m v_k&\text{if } b\geq\sum_{i=1}^m c_k
+
+ \end{cases}
+\end{equation}
+
+Let us define for $t\in\reals^+$, $n(t)\defeq \inf\Big\{i\text{ s.t.
+} \sum_{k=1}^i c_k > t\Big \}$ with $n(t)=+\infty$ when the set is empty. In
+particular, note that $x\mapsto n(t)$ is non-decreasing. Denoting
+$\partial_+\mathcal{O}_A$ the right derivative of $\mathcal{O}(A,\cdot)$, one
+can write $\partial_+\mathcal{O}_A(t)
+=\frac{v_{n(t)}}{c_{n(t)}}$, with the convention that
+$\frac{v_\infty}{c_\infty} = 0$.
+
+Writing $i \defeq \sup\Big\{j\text{ s.t.
+} \frac{v_j}{c_j}\geq\frac{v}{c}\Big\}$, it is easy to see that
+$\partial_+\mathcal{O}_{A\cup\{x\}}\geq\partial_+\mathcal{O}_A$. Indeed:
+\begin{enumerate}
+ \item if $n(t)\leq i$ then $\partial_+\mathcal{O}_{A\cup\{x\}}(t)
+ = \partial_+\mathcal{O}_A(t)= \frac{v_{n(t)}}{c_{n(t)}}$.
+ \item if $n(t)\geq i+1$ and $n(t-c)\leq i$ then $\partial_+\mathcal{O}_{A\cup\{x\}}(t)
+ = \frac{v}{c}\geq\frac{v_{n(t)}}{c_{n(t)}}= \partial_+\mathcal{O}_A(t)$.
+ \item if $n(t-c)\geq i+1$, then $\partial_+\mathcal{O}_{A\cup\{x\}}
+ = \frac{v_{n(t-c)}}{c_{n(t-c)}}\geq
+ \frac{v_{n(t)}}{c_{n(t)}}=\partial_+\mathcal{O}_A(t)$.
+\end{enumerate}
+
+Let us now consider $b$ and $c$ such that $b\leq c$. Then, using the integral
+representation of $\mathcal{O}(A\cup\{x\},\cdot)$ and $\mathcal{O}(A,\cdot)$, we get:
+\begin{multline*}
+ \mathcal{O}(A\cup\{x\},c)-\mathcal{O}(A\cup\{x\},b)=\int_b^c\partial_+\mathcal{O}_{A\cup\{x\}}(t)dt\\
+ \geq\int_b^c\partial_+\mathcal{O}_A(t)dt=\mathcal{O}(A,c)-\mathcal{O}(A,b)
+\end{multline*}
+Re-ordering the terms, $\mathcal{O}(A\cup\{x\},c)-\mathcal{O}(A,c)\geq
+\mathcal{O}(A\cup\{x\},b)-\mathcal{O}(A,b)$
+which concludes the proof of the lemma.
+\end{proof}
+
+\begin{lemma}\label{lemma:sub}
+ Let $b\in\reals^+$, then the function $A\mapsto \mathcal{O}(A,b)$ is submodular.
+\end{lemma}
+
+\begin{proof}
+ Let $A\subseteq\mathcal{U}$ and $x, y\in\mathcal{U}\setminus A$. Using the
+ second-order characterization of submodular functions, it suffices to show
+ that:
+ \begin{equation}\label{eq:so}
+ \mathcal{O}(A\cup\{x\},b)-\mathcal{O}(A,b)\geq
+ \mathcal{O}(A\cup\{y\}\cup\{x\},b)-\mathcal{O}(A\cup\{y\},b)
+ \end{equation}
+
+ Let us write $x=(v_x, c_x)$ and $y=(v_y,c_y)$. We distinguish two cases
+ based on the relative position of $\frac{v_x}{c_x}$ and $\frac{v_y}{c_y}$.
+ The following notations will be useful: $S_A^x \defeq \big\{(v,c)\in
+ A\text{ s.t. }\frac{v_x}{c_x}\leq\frac{v}{c}\big\}$ and $P_A^x\defeq
+ A\setminus S_A^x$.
+
+ \textbf{Case 1:} If $\frac{v_y}{c_y}\geq \frac{v_x}{c_x}$, then one can
+ write:
+ \begin{gather*}
+ \mathcal{O}(A\cup\{y\}\cup\{x\},b) = \mathcal{O}(P_A^y\cup\{y\},b_1)+
+ \mathcal{O}(S_A^y\cup\{x\},b_2)\\
+ \mathcal{O}(A\cup\{y\},b) = \mathcal{O}(P_A^y\cup\{y\},b_1)
+ + \mathcal{O}(S_A^y,b_2)
+ \end{gather*}
+ where $b_1$ is the fraction of the budget $b$ spent on $P_A^y\cup\{y\}$ and
+ $b_2=b-b_1$.
+
+ Similarly:
+ \begin{gather*}
+ \mathcal{O}(A\cup\{x\},b) = \mathcal{O}(P_A^y, c_1) + \mathcal{O}(S_A^y\cup\{x\},c_2)\\
+ \mathcal{O}(A, b) = \mathcal{O}(P_A^y, c_1) + \mathcal{O}(S_A^y,c_2)
+ \end{gather*}
+ where $c_1$ is the fraction of the budget $b$ spent on $P_A^y$ and $c_2
+ = b - c_1$.
+
+ Note that $b_1\geq c_1$: an optimal solution will first spent as much
+ budget as possible on $P_A^y\cup\{y\}$ before adding elements in
+ $S_A^y\cup\{x\}$. We can now rewrite \eqref{eq:so}:
+ \begin{displaymath}
+ \mathcal{O}(S_A^y\cup\{x\},c_2)+\mathcal{O}(S_A^y,c_2)\geq
+ \mathcal{O}(S_A^y\cup\{x\},b_2)+\mathcal{O}(S_A^y,b_2)
+ \end{displaymath}
+ with $c_2\geq b_2$. This inequality is true by Lemma~\ref{lemma:nd}.
+
+ \textbf{Case 2:} If $\frac{v_x}{c_x}\geq \frac{v_y}{c_y}$, we now decompose
+ the solution on $P_A^x$ and $S_A^x$:
+ \begin{gather*}
+ \mathcal{O}(A\cup\{x\},b) = \mathcal{O}(P_A^x\cup\{x\},b_1)
+ + \mathcal{O}(S_A^x,b_2)\\
+ \mathcal{O}(A,b) = \mathcal{O}(P_A^x,c_1)+\mathcal{O}(S_A^x,c_2)
+ \intertext{and}
+ \mathcal{O}(A\cup\{y\}\cup\{x\},b) = \mathcal{O}(P_A^x\cup\{x\},b_1)
+ + \mathcal{O}(S_A^x\cup\{y\},b_2)\\
+ \mathcal{O}(A\cup\{y\},b) = \mathcal{O}(P_A^x,c_1)+\mathcal{O}(S_A^x\cup\{y\},c_2)
+ \end{gather*}
+ with $b_1+b_2=b$, $c_1+c_2=b$ and $b_2\leq c_2$. The equation \eqref{eq:so}
+ can be rewritten:
+ \begin{displaymath}
+ \mathcal{O}(S_A^x,b_2)-\mathcal{O}(S_A^x,c_2)\geq
+ \mathcal{O}(S_A^x\cup\{u\},b_2)-\mathcal{O}(S_A^x\cup\{y\},c_2)
+ \end{displaymath}
+ Reordering the terms, we get:
+ \begin{displaymath}
+ \mathcal{O}(S_A^x\cup\{y\},c_2) -\mathcal{O}(S_A^x,c_2)\geq
+ \mathcal{O}(S_A^x\cup\{u\},b_2)-\mathcal{O}(S_A^x,b_2)
+ \end{displaymath}
+ with $c_2\geq b_2$. This is again true by Lemma~\ref{lemma:nd}.
+\end{proof}
+
+\begin{proposition}\label{prop:sub}
+ Let $b\in\mathbf{R}^+$, then the function
+ $S\mapsto\mathcal{O}(\neigh{S},b)$ is submodular over $2^X$.
+\end{proposition}
+
+\begin{proof}
+ Let us consider $S$ and $T$ such that $S\subseteq T\subseteq X$ and $x\in
+ X\setminus T$. In particular, note that $\neigh{S}\subseteq\neigh{T}$.
+
+ Let us write $\neigh{S\cup\{x\}}=\neigh{S}\cup R$ with $\neigh{S}\cap
+ R=\emptyset$ and similarly, $\neigh{T\cup\{x\}}=\neigh{T}\cup R'$ with
+ $\neigh{T}\cap R'=\emptyset$. Using these notations, it is clear that
+ $R'\subseteq R$. Let us enumerate the elements in $R'$ using an arbitrary
+ order: $R'=\{u_1,\ldots,u_k\}$. Then one can write:
+ \begin{displaymath}
+ \begin{split}
+ \mathcal{O}(\neigh{T\cup\{x\}},b)&=\mathcal{O}(\neigh{T}\cup R',b)\\
+ &=\sum_{i=1}^k\mathcal{O}(\neigh{T}\cup\{u_1,\ldots u_i\},b)
+ -\mathcal{O}(\neigh{T}\cup\{u_1,\ldots u_{i-1}\},b)\\
+ &\leq \sum_{i=1}^k\mathcal{O}(\neigh{S}\cup\{u_1,\ldots u_i\},b)
+ -\mathcal{O}(\neigh{S}\cup\{u_1,\ldots u_{i-1}\},b)\\
+ &=\mathcal{O}(\neigh{S}\cup R',b)-\mathcal{O}(\neigh{S},b)
+ \end{split}
+ \end{displaymath}
+ where the inequality comes from the submodularity of
+ $A\mapsto\mathcal{O}(A,b)$ proved in Lemma~\ref{lemma:sub}. This same
+ function is also obviously set-increasing, hence:
+ \begin{displaymath}
+ \begin{split}
+ \mathcal{O}(\neigh{S}\cup R',b)-\mathcal{O}(\neigh{S},b)
+ &\leq \mathcal{O}(\neigh{S}\cup R,b)-\mathcal{O}(\neigh{S},b)\\
+ &=\mathcal{O}(\neigh{S\cup\{x\}},b)-\mathcal{O}(\neigh{S},b)
+ \end{split}
+ \end{displaymath}
+ This concludes the proof of the proposition.
+\end{proof}
+
+Using Proposition~\ref{prop:sub}, we can finally reduce \eqref{eq:sub} to
+a submodular optimization problem by treating $|S|$ as a variable to optimize.
+Specifically, \eqref{eq:sub} is equivalent to the following problem:
+\begin{equation}\label{eq:sub2}
+\begin{split}
+ \max_{1\leq t\leq k}\max_{|S|\leq k-t}\mathcal{O}(\neigh{S},t)
+\end{split}
+\end{equation}
+
+\begin{algorithm}
+ \caption{Combinatorial algorithm}
+ \label{alg:comb}
+ \algsetup{indent=2em}
+ \begin{algorithmic}[1]
+ \STATE $S\leftarrow \emptyset$
+ \FOR{$t=1$ \TO $k-1$}
+ \STATE $S_t\leftarrow \emptyset$
+ \FOR{$i=1$ \TO $k-t$}
+ \STATE $x^*\leftarrow\argmax_{x\in
+ X\setminus S_t}\mathcal{O}(\neigh{S_t\cup\{x\}},t)
+ -\mathcal{O}(\neigh{S_t},t)$\label{line:argmax}
+ \STATE $S_t\leftarrow S_t\cup\{x^*\}$
+ \ENDFOR
+ \IF{$\mathcal{O}(\neigh{S_t},t)>\mathcal{O}(\neigh{S},k-|S|)$}
+ \STATE $S\leftarrow S_t$
+ \ENDIF
+ \ENDFOR
+ \RETURN $S$
+ \end{algorithmic}
+\end{algorithm}
+
+\begin{proposition}
+ Let $S$ be the set computed by Algorithm~\ref{alg:comb} and let us denote
+ by $A(S)$ the value of the adaptive policy which selects $S$ on the first
+ day. Then:
+ \begin{displaymath}
+ A(S) \geq \frac{1}{2}\left(1-\frac{1}{e}\right)OPT.
+ \end{displaymath}
+ Furthermore, Algorithm~\ref{alg:comb} runs in time $O\left(n\log
+ n+k^2m\min(\frac{k}{p_\text{min}},n)\right)$, where $m=|X|$, $n=|\neigh{X}|$
+ and $p_\text{min} =\min\{p_u,u\in\neigh{X}\}$.
+\end{proposition}
+
+\begin{proof}
+ For each realization of the neighbors $R$, let us define by $X_R$ the
+ random set which includes each $u\in R$ with probability $q_u$ on the
+ second day and:
+ \begin{displaymath}
+ \tilde{X}_R=
+ \begin{cases}
+ X_R&\text{if } |X_R|\leq t\\
+ \emptyset&\text{otherwise}
+ \end{cases}
+ \end{displaymath}
+ where $t = k-|S|$. $\tilde{X}_R$ is a random variable over the feasible
+ solutions on the second day. As a consequence:
+ \begin{displaymath}
+ A(S) \geq \sum_{R\subseteq\neigh{S}} p_R\mathbb{E}\big[f(\tilde{X}_R)\big]
+ \end{displaymath}
+ Let us define by $Y$ the random set which includes each $u\in\neigh{X}$
+ with probability $p_uq_u$ and:
+ \begin{displaymath}
+ \tilde{Y}=
+ \begin{cases}
+ Y&\text{if } |Y|\leq t\\
+ \emptyset&\text{otherwise}
+ \end{cases}.
+ \end{displaymath}
+ It is easy to see that $\tilde{X}_R$ is the conditional expectation of
+ $\tilde{Y}$ given
+ $R$. Hence:
+ \begin{displaymath}
+ \sum_{R\subseteq\neigh{S}} p_R\mathbb{E}\big[f(\tilde{X}_R)\big]
+ = \mathbb{E}\big[f(\tilde{Y})\big]
+ \end{displaymath}
+ But:
+ \begin{align*}
+ \mathbb{E}\big[f(\tilde{Y})\big]
+ &=\sum_{T\subseteq\neigh{X}}\mathbb{P}\big[\tilde{Y}=T\big]f(T)
+ = \sum_{\substack{T\subseteq\neigh{X}\\|T|\leq
+ t}}\mathbb{P}\big[Y=T\big]f(T)\\
+ &= \mathbb{E}\big[f(Y)\big]-\sum_{\substack{T\subseteq\neigh{X}\\|T|>
+ t}}\mathbb{P}\big[Y=T\big]f(T)
+ \end{align*}
+ Roughly:
+ \begin{displaymath}
+ \sum_{\substack{T\subseteq\neigh{X}\\|T|>
+ t}}\mathbb{P}\big[Y=T\big]f(T)\leq
+ \frac{1}{2}\mathbb{E}\big[f(Y)\big]
+ \end{displaymath}
+ Combining the above inequalities we get:
+ \begin{displaymath}
+ A(S)\geq \frac{1}{2} \mathbb{E}\big[f(Y)\big]
+ \end{displaymath}
+ But $\mathbb{E}\big[f(Y)\big]$ is precisely the value of the non-adaptive
+ solution computed by Algorithm~\ref{alg:comb} which is
+ a $\left(1-\frac{1}{e}\right)$ approximation of the
+ optimal non adaptive solution. Hence:
+ \begin{displaymath}
+ A(S) \geq \frac{1}{2}\left(1-\frac{1}{e}\right) OPT_{NA}
+ \end{displaymath}
+ Finally, we conclude with Proposition~\ref{prop:gap}
+\end{proof}
+
+\subsection{From expectation to high probability}
+
+\subsection{Questions}
+
+\begin{itemize}
+ \item Can it be parallelized (map-reduce)?
+\end{itemize}