1 files changed, 299 insertions, 0 deletions
diff --git a/paper/sections/combinatorial.tex b/paper/sections/combinatorial.tex
new file mode 100644
index 0000000..b744564
--- /dev/null
+++ b/paper/sections/combinatorial.tex
@@ -0,0 +1,299 @@
+\subsection{Submodularity of the problem}
+Let $\mathcal{U} = \{(v_u,c_u);\, u\in\neigh{X}\}$ with $v_u=p_uw_u$ and
+$c_u=p_u$.  Let $b\in\reals^+$ and $A\subseteq\mathcal{U}$, we denote by
+$\mathcal{O}(A,b)$ the optimal solution to:
+\begin{displaymath}
+    \begin{split}
+        &\max_{q\in[0,1]^n}\sum_{u\in\neigh{X}}q_uv_u\\
+        &\text{s.t. }\sum_{u\in\neigh{X}}q_uc_u\leq b\text{ and } q_u=0\text{ if
+    }u\notin A
+\end{split}
+\end{displaymath}
+so that the optimization problem \eqref{eq:relaxed-multi} for relaxed
+non-adaptive policies can be written:
+\begin{equation}\label{eq:sub}
+    \begin{split}
+        &\max_{S\subseteq X}\mathcal{O}\big(\neigh{S},k-|S|\big)\\
+        &\text{s.t. }|S|\leq k
+    \end{split}
+\end{equation}
+
+In this section, we will show how \eqref{eq:sub} can be reduced to a submodular
+maximization problem, leading to a fast combinatorial algorithm for the
+adaptive seeding model in the voter model.
+
+\begin{lemma}\label{lemma:nd}
+    Let $A\subseteq\mathcal{U}$ and $x=(v,c)\in\mathcal{U}\setminus A$, then
+    the function $b\mapsto \mathcal{O}(A\cup\{x\},b)-\mathcal{O}(A,b)$ is
+    non-decreasing over $\reals^+$.
+\end{lemma}
+
+\begin{proof}
+\emph{W.l.o.g} we can rename and order the pairs in $A$ so that:
+\begin{displaymath}
+\frac{v_1}{c_1}\geq \frac{v_2}{c_2}\geq\ldots\geq\frac{v_m}{c_m}.
+\end{displaymath}
+Then, $\mathcal{O}(A,b)$ has the following simple piecewise linear expression:
+\begin{equation}\label{eq:pw}
+    \mathcal{O}(A,b) = 
+    \begin{cases}
+        b\frac{v_1}{c_1}&\text{if }0\leq b<c_1\\
+        \sum_{k=1}^{i-1}v_k + \left(b-\sum_{k=1}^{i-1}c_k\right)\frac{v_i}{c_i}
+        &\text{if } \sum_{k=1}^{i-1}c_k\leq b< \sum_{k=1}^ic_k\\
+        \sum_{k=1}^m v_k&\text{if } b\geq\sum_{i=1}^m c_k
+
+    \end{cases}
+\end{equation}
+
+Let us define for $t\in\reals^+$, $n(t)\defeq \inf\Big\{i\text{ s.t.
+} \sum_{k=1}^i c_k > t\Big \}$ with $n(t)=+\infty$ when the set is empty. In
+particular, note that $x\mapsto n(t)$ is non-decreasing. Denoting
+$\partial_+\mathcal{O}_A$ the right derivative of $\mathcal{O}(A,\cdot)$, one
+can write $\partial_+\mathcal{O}_A(t)
+=\frac{v_{n(t)}}{c_{n(t)}}$, with the convention that
+$\frac{v_\infty}{c_\infty} = 0$.
+
+Writing $i \defeq \sup\Big\{j\text{ s.t.
+} \frac{v_j}{c_j}\geq\frac{v}{c}\Big\}$, it is easy to see that
+$\partial_+\mathcal{O}_{A\cup\{x\}}\geq\partial_+\mathcal{O}_A$. Indeed:
+\begin{enumerate}
+    \item if $n(t)\leq i$ then $\partial_+\mathcal{O}_{A\cup\{x\}}(t)
+        = \partial_+\mathcal{O}_A(t)= \frac{v_{n(t)}}{c_{n(t)}}$.
+    \item if $n(t)\geq i+1$ and $n(t-c)\leq i$ then $\partial_+\mathcal{O}_{A\cup\{x\}}(t)
+        = \frac{v}{c}\geq\frac{v_{n(t)}}{c_{n(t)}}= \partial_+\mathcal{O}_A(t)$.
+    \item if $n(t-c)\geq i+1$, then $\partial_+\mathcal{O}_{A\cup\{x\}}
+    = \frac{v_{n(t-c)}}{c_{n(t-c)}}\geq
+    \frac{v_{n(t)}}{c_{n(t)}}=\partial_+\mathcal{O}_A(t)$.
+\end{enumerate}
+
+Let us now consider $b$ and $c$ such that $b\leq c$. Then, using the integral
+representation of $\mathcal{O}(A\cup\{x\},\cdot)$ and $\mathcal{O}(A,\cdot)$, we get:
+\begin{multline*}
+    \mathcal{O}(A\cup\{x\},c)-\mathcal{O}(A\cup\{x\},b)=\int_b^c\partial_+\mathcal{O}_{A\cup\{x\}}(t)dt\\
+    \geq\int_b^c\partial_+\mathcal{O}_A(t)dt=\mathcal{O}(A,c)-\mathcal{O}(A,b)
+\end{multline*}
+Re-ordering the terms, $\mathcal{O}(A\cup\{x\},c)-\mathcal{O}(A,c)\geq
+\mathcal{O}(A\cup\{x\},b)-\mathcal{O}(A,b)$
+which concludes the proof of the lemma.
+\end{proof}
+
+\begin{lemma}\label{lemma:sub}
+    Let $b\in\reals^+$, then the function $A\mapsto \mathcal{O}(A,b)$ is submodular.
+\end{lemma}
+
+\begin{proof}
+    Let $A\subseteq\mathcal{U}$ and $x, y\in\mathcal{U}\setminus A$. Using the
+    second-order characterization of submodular functions, it suffices to show
+    that:
+    \begin{equation}\label{eq:so}
+        \mathcal{O}(A\cup\{x\},b)-\mathcal{O}(A,b)\geq
+        \mathcal{O}(A\cup\{y\}\cup\{x\},b)-\mathcal{O}(A\cup\{y\},b)
+    \end{equation}
+
+    Let us write $x=(v_x, c_x)$ and $y=(v_y,c_y)$. We distinguish two cases
+    based on the relative position of $\frac{v_x}{c_x}$ and $\frac{v_y}{c_y}$.
+    The following notations will be useful: $S_A^x \defeq \big\{(v,c)\in
+    A\text{ s.t.  }\frac{v_x}{c_x}\leq\frac{v}{c}\big\}$ and $P_A^x\defeq
+    A\setminus S_A^x$.
+
+    \textbf{Case 1:} If $\frac{v_y}{c_y}\geq \frac{v_x}{c_x}$, then one can
+    write:
+    \begin{gather*}
+        \mathcal{O}(A\cup\{y\}\cup\{x\},b) = \mathcal{O}(P_A^y\cup\{y\},b_1)+
+        \mathcal{O}(S_A^y\cup\{x\},b_2)\\
+        \mathcal{O}(A\cup\{y\},b) = \mathcal{O}(P_A^y\cup\{y\},b_1)
+        + \mathcal{O}(S_A^y,b_2)
+    \end{gather*}
+    where $b_1$ is the fraction of the budget $b$ spent on $P_A^y\cup\{y\}$ and
+    $b_2=b-b_1$.
+    
+    Similarly:
+    \begin{gather*}
+        \mathcal{O}(A\cup\{x\},b) = \mathcal{O}(P_A^y, c_1) + \mathcal{O}(S_A^y\cup\{x\},c_2)\\
+        \mathcal{O}(A, b) = \mathcal{O}(P_A^y, c_1) + \mathcal{O}(S_A^y,c_2)
+    \end{gather*}
+    where $c_1$ is the fraction of the budget $b$ spent on $P_A^y$ and $c_2
+    = b - c_1$. 
+
+    Note that $b_1\geq c_1$: an optimal solution will first spent as much
+    budget as possible on $P_A^y\cup\{y\}$ before adding elements in
+    $S_A^y\cup\{x\}$. We can now rewrite \eqref{eq:so}:
+    \begin{displaymath}
+        \mathcal{O}(S_A^y\cup\{x\},c_2)+\mathcal{O}(S_A^y,c_2)\geq
+        \mathcal{O}(S_A^y\cup\{x\},b_2)+\mathcal{O}(S_A^y,b_2)
+    \end{displaymath}
+    with $c_2\geq b_2$. This inequality is true by Lemma~\ref{lemma:nd}.
+
+    \textbf{Case 2:} If $\frac{v_x}{c_x}\geq \frac{v_y}{c_y}$, we now decompose
+    the solution on $P_A^x$ and $S_A^x$:
+    \begin{gather*}
+        \mathcal{O}(A\cup\{x\},b) = \mathcal{O}(P_A^x\cup\{x\},b_1)
+        + \mathcal{O}(S_A^x,b_2)\\
+        \mathcal{O}(A,b) = \mathcal{O}(P_A^x,c_1)+\mathcal{O}(S_A^x,c_2)
+        \intertext{and}
+        \mathcal{O}(A\cup\{y\}\cup\{x\},b) = \mathcal{O}(P_A^x\cup\{x\},b_1)
+        + \mathcal{O}(S_A^x\cup\{y\},b_2)\\
+        \mathcal{O}(A\cup\{y\},b) = \mathcal{O}(P_A^x,c_1)+\mathcal{O}(S_A^x\cup\{y\},c_2)
+    \end{gather*}
+    with $b_1+b_2=b$, $c_1+c_2=b$ and $b_2\leq c_2$. The equation \eqref{eq:so}
+    can be rewritten:
+    \begin{displaymath}
+        \mathcal{O}(S_A^x,b_2)-\mathcal{O}(S_A^x,c_2)\geq
+        \mathcal{O}(S_A^x\cup\{u\},b_2)-\mathcal{O}(S_A^x\cup\{y\},c_2)
+    \end{displaymath}
+    Reordering the terms, we get:
+    \begin{displaymath}
+        \mathcal{O}(S_A^x\cup\{y\},c_2) -\mathcal{O}(S_A^x,c_2)\geq
+        \mathcal{O}(S_A^x\cup\{u\},b_2)-\mathcal{O}(S_A^x,b_2)
+    \end{displaymath}
+    with $c_2\geq b_2$. This is again true by Lemma~\ref{lemma:nd}.
+\end{proof}
+
+\begin{proposition}\label{prop:sub}
+    Let $b\in\mathbf{R}^+$, then the function
+    $S\mapsto\mathcal{O}(\neigh{S},b)$ is submodular over $2^X$.
+\end{proposition}
+
+\begin{proof}
+    Let us consider $S$ and $T$ such that $S\subseteq T\subseteq X$ and $x\in
+    X\setminus T$. In particular, note that $\neigh{S}\subseteq\neigh{T}$. 
+
+    Let us write $\neigh{S\cup\{x\}}=\neigh{S}\cup R$ with $\neigh{S}\cap
+    R=\emptyset$ and similarly, $\neigh{T\cup\{x\}}=\neigh{T}\cup R'$ with
+    $\neigh{T}\cap R'=\emptyset$. Using these notations, it is clear that
+    $R'\subseteq R$. Let us enumerate the elements in $R'$ using an arbitrary
+    order: $R'=\{u_1,\ldots,u_k\}$. Then one can write:
+    \begin{displaymath}
+        \begin{split}
+            \mathcal{O}(\neigh{T\cup\{x\}},b)&=\mathcal{O}(\neigh{T}\cup R',b)\\
+            &=\sum_{i=1}^k\mathcal{O}(\neigh{T}\cup\{u_1,\ldots u_i\},b)
+            -\mathcal{O}(\neigh{T}\cup\{u_1,\ldots u_{i-1}\},b)\\
+            &\leq \sum_{i=1}^k\mathcal{O}(\neigh{S}\cup\{u_1,\ldots u_i\},b)
+            -\mathcal{O}(\neigh{S}\cup\{u_1,\ldots u_{i-1}\},b)\\
+            &=\mathcal{O}(\neigh{S}\cup R',b)-\mathcal{O}(\neigh{S},b)
+        \end{split}
+    \end{displaymath}
+    where the inequality comes from the submodularity of
+    $A\mapsto\mathcal{O}(A,b)$ proved in Lemma~\ref{lemma:sub}. This same
+    function is also obviously set-increasing, hence:
+    \begin{displaymath}
+        \begin{split}
+            \mathcal{O}(\neigh{S}\cup R',b)-\mathcal{O}(\neigh{S},b)
+            &\leq \mathcal{O}(\neigh{S}\cup R,b)-\mathcal{O}(\neigh{S},b)\\
+            &=\mathcal{O}(\neigh{S\cup\{x\}},b)-\mathcal{O}(\neigh{S},b)
+        \end{split}
+    \end{displaymath}
+    This concludes the proof of the proposition.
+\end{proof}
+
+Using Proposition~\ref{prop:sub}, we can finally reduce \eqref{eq:sub} to
+a submodular optimization problem by treating $|S|$ as a variable to optimize.
+Specifically, \eqref{eq:sub} is equivalent to the following problem:
+\begin{equation}\label{eq:sub2}
+\begin{split}
+    \max_{1\leq t\leq k}\max_{|S|\leq k-t}\mathcal{O}(\neigh{S},t)
+\end{split}
+\end{equation}
+
+\begin{algorithm}
+    \caption{Combinatorial algorithm}
+    \label{alg:comb}
+    \algsetup{indent=2em}
+    \begin{algorithmic}[1]
+        \STATE $S\leftarrow \emptyset$
+        \FOR{$t=1$ \TO $k-1$}
+            \STATE $S_t\leftarrow \emptyset$
+            \FOR{$i=1$ \TO $k-t$}
+                \STATE $x^*\leftarrow\argmax_{x\in
+                X\setminus S_t}\mathcal{O}(\neigh{S_t\cup\{x\}},t)
+                -\mathcal{O}(\neigh{S_t},t)$\label{line:argmax}
+                \STATE $S_t\leftarrow S_t\cup\{x^*\}$
+            \ENDFOR
+            \IF{$\mathcal{O}(\neigh{S_t},t)>\mathcal{O}(\neigh{S},k-|S|)$}
+                \STATE $S\leftarrow S_t$
+            \ENDIF
+        \ENDFOR
+        \RETURN $S$
+    \end{algorithmic}
+\end{algorithm}
+
+\begin{proposition}
+    Let $S$ be the set computed by Algorithm~\ref{alg:comb} and let us denote
+    by $A(S)$ the value of the adaptive policy which selects $S$ on the first
+    day. Then:
+    \begin{displaymath}
+        A(S) \geq \frac{1}{2}\left(1-\frac{1}{e}\right)OPT.
+    \end{displaymath}
+    Furthermore, Algorithm~\ref{alg:comb} runs in time $O\left(n\log
+    n+k^2m\min(\frac{k}{p_\text{min}},n)\right)$, where $m=|X|$, $n=|\neigh{X}|$
+    and $p_\text{min} =\min\{p_u,u\in\neigh{X}\}$.
+\end{proposition}
+
+\begin{proof}
+    For each realization of the neighbors $R$, let us define by $X_R$ the
+    random set which includes each $u\in R$ with probability $q_u$ on the
+    second day and:
+    \begin{displaymath}
+        \tilde{X}_R=
+    \begin{cases}
+        X_R&\text{if } |X_R|\leq t\\
+        \emptyset&\text{otherwise}
+    \end{cases}
+    \end{displaymath}
+    where $t = k-|S|$. $\tilde{X}_R$ is a random variable over the feasible
+    solutions on the second day. As a consequence:
+    \begin{displaymath}
+        A(S) \geq \sum_{R\subseteq\neigh{S}} p_R\mathbb{E}\big[f(\tilde{X}_R)\big]
+    \end{displaymath}
+    Let us define by $Y$ the random set which includes each $u\in\neigh{X}$
+    with probability $p_uq_u$ and:
+    \begin{displaymath}
+        \tilde{Y}=
+        \begin{cases}
+            Y&\text{if } |Y|\leq t\\
+            \emptyset&\text{otherwise}
+        \end{cases}.
+    \end{displaymath}
+    It is easy to see that $\tilde{X}_R$ is the conditional expectation of
+    $\tilde{Y}$ given
+    $R$. Hence:
+    \begin{displaymath}
+        \sum_{R\subseteq\neigh{S}} p_R\mathbb{E}\big[f(\tilde{X}_R)\big]
+        = \mathbb{E}\big[f(\tilde{Y})\big]
+    \end{displaymath}
+    But:
+    \begin{align*}
+        \mathbb{E}\big[f(\tilde{Y})\big]
+        &=\sum_{T\subseteq\neigh{X}}\mathbb{P}\big[\tilde{Y}=T\big]f(T)
+    = \sum_{\substack{T\subseteq\neigh{X}\\|T|\leq
+    t}}\mathbb{P}\big[Y=T\big]f(T)\\
+    &= \mathbb{E}\big[f(Y)\big]-\sum_{\substack{T\subseteq\neigh{X}\\|T|>
+    t}}\mathbb{P}\big[Y=T\big]f(T)
+    \end{align*}
+    Roughly:
+    \begin{displaymath}
+        \sum_{\substack{T\subseteq\neigh{X}\\|T|>
+        t}}\mathbb{P}\big[Y=T\big]f(T)\leq
+        \frac{1}{2}\mathbb{E}\big[f(Y)\big]
+    \end{displaymath}
+    Combining the above inequalities we get:
+    \begin{displaymath}
+        A(S)\geq \frac{1}{2} \mathbb{E}\big[f(Y)\big]
+    \end{displaymath}
+    But $\mathbb{E}\big[f(Y)\big]$ is precisely the value of the non-adaptive
+    solution computed by Algorithm~\ref{alg:comb} which is
+    a $\left(1-\frac{1}{e}\right)$ approximation of the
+    optimal non adaptive solution. Hence:
+    \begin{displaymath}
+        A(S) \geq \frac{1}{2}\left(1-\frac{1}{e}\right) OPT_{NA}
+    \end{displaymath}
+    Finally, we conclude with Proposition~\ref{prop:gap}
+\end{proof}
+
+\subsection{From expectation to high probability}
+
+\subsection{Questions}
+
+\begin{itemize}
+    \item Can it be parallelized (map-reduce)?
+\end{itemize}