adding influence function

author: jeanpouget-abadie <jean.pougetabadie@gmail.com> 2015-03-20 11:45:52 -0400
committer: jeanpouget-abadie <jean.pougetabadie@gmail.com> 2015-03-20 11:45:52 -0400
commit: 3dd861100ecbcef42e8e92758cd0e2bdd6b6d623 (patch)
tree: df7b7ffac06b9a80fc9ed904f49e488c94c65085
parent: da096d3c4353efae4f9003e5567f0fe9ded30150 (diff)
download: learn-optimize-3dd861100ecbcef42e8e92758cd0e2bdd6b6d623.tar.gz
1 files changed, 101 insertions, 7 deletions
diff --git a/results.tex b/results.tex
index d5fc5cb..abb5f8e 100644
--- a/results.tex
+++ b/results.tex
@@ -1,11 +1,19 @@
 \documentclass[10pt]{article}
-\usepackage{fullpage, amsmath, amssymb, amsthm}
+\usepackage{fullpage, amsmath, amssymb, amsthm, bbm}
 \usepackage[utf8x]{inputenc}
 
+\DeclareMathOperator{\E}{\mathbb{E}}
+\let\P\relax
+\DeclareMathOperator{\P}{\mathbb{P}}
+\newcommand{\ex}[1]{\E\left[#1\right]}
+\newcommand{\prob}[1]{\P\left[#1\right]}
+
+
 \newtheorem{proposition}{Proposition}
 \newtheorem{corollary}{Corollary}
 \newtheorem{problem}{Problem}
 \newtheorem{theorem}{Theorem}
+\newtheorem{claim}{Claim}
 
 \title{Learn and/or Optimize}
 \author{}
@@ -225,13 +233,13 @@ fraction of the non-zero sets is a big problem
 
 \section{Passive Optimization vs. Passive Learning}
 
-\subsection{Returning max of observations}
+\subsection{Failed Attempt: returning max of observations}
 
 This doesn't work. Give examples as to why! Remember that there are strong
 concentration results for submodular functions -> look at expected value of
 observed sets
 
-\subsection{Passive Optimization can be easier than Passive Learning}
+\subsection{Example where optimization possible, learning impossible}
 
 Recall the matroid construction from~\cite{balcan2011learning}:
 \begin{theorem}
@@ -259,10 +267,10 @@ optimize $f$ exactly! Indeed, the largest possible value for $f$ under
 cardinality constraint $n^{1/3}$ is $\max_{A\in 2^{[n]}} |A| = n^{1/3}$.
 
 One example of a distribution under which this occurs with probability at least
-a constant is $D_u$, the uniform distribution over all sets of ${\cal A}$.
-For $c>0$, after $n^c$ observations $O \equiv (S_i, f(S_i))_i$ for $S_i \sim D_u$, we will
-observe at least one element from $\cal{A}\backslash\cal{B}$ with constant
-probability:
+a constant is $D_u$, the uniform distribution over all sets of ${\cal A}$.  For
+$c>0$, after $n^c$ observations $O \equiv (S_i, f(S_i))_i$ for $S_i \sim D_u$,
+we will observe at least one element from $\cal{A}\backslash\cal{B}$ with
+constant probability:
 
 \begin{equation} \nonumber \mathbb{P}(\bigcup_{S_i} S_i \in {\cal
 A}\backslash{\cal B}) \geq 1 - (1 - \epsilon)^{n^c} \approx \epsilon n^c
@@ -278,6 +286,92 @@ A}\backslash {\cal B}$.
 
 {\bf TODO:} A cleaner simpler example would be nice.
 
+\section{The case of parametric submodular functions}
+
+\subsection{Influence Functions}
+
+Let $G = (V, E)$ be a weighted directed graph. Without loss of generality, we
+can assign a weight $p_{u,v} \in [0,1]$ to every possible edge $(u,v) \in V^2$.
+Let $m$ be the number of non-zero edges of $G$. Let $\sigma_G(S, p)$ be the
+influence of the set $S \subseteq V$ in $G$ under the IC model with parameters
+$p$. 
+
+Recall from \cite{Kempe:03} that:
+\begin{equation}
+    \sigma_G(S, p) = \sum_{v\in V} \P_{G_p}\big[r_{G_p}(S\leadsto v)\big]
+\end{equation}
+where $G_p$ is a random graph where each edge $(u,v)\in E$ appears with
+probability $p_{u,v}$ and $r_{G_p}(S\leadsto v)$ is the indicator variable of
+\emph{there exists a path from $S$ to $v$ in $G_p$}.
+
+\begin{claim}
+\label{cla:oracle}
+If for all $(u,v) \in V^2$, $p_{u,v} \geq p'_{u,v} \geq p_{u,v}
+- \frac{1}{\alpha m}$, then:
+\begin{displaymath}
+    \forall S \subseteq V,\, \sigma_{G}(S, p) \geq \sigma_{G}(S,p') \geq (1
+    - 1/\alpha) \sigma_{G}(S,p)
+\end{displaymath}
+\end{claim}
+
+\begin{proof}
+    We define two coupled random graphs $G_p$ and $G_p'$ as follows: for each
+    edge $(u,v)\in E$, draw a uniform random variable $\mathcal{U}_{u,v}\in
+    [0,1]$. Include $(u,v)$ in $G_p$ (resp. $G_{p'}$) iff
+    $\mathcal{U}_{u,v}\leq p_{u,v}$ (resp. $\mathcal{U}_{u,v}\leq p'_{u,v}$).
+    It is clear from the construction that each edge $(u,v)$ will be present in
+    $G_p$ (resp. $G_p'$) with probability $p_{u,v}$ (resp. $p'_{u,v}$). Hence:
+    \begin{displaymath}
+        \sigma_G(S, p) = \sum_{v\in V} \P_{G_p}\big[r_{G_p}(S\leadsto v)\big]
+        \text{  and  }
+        \sigma_G(S, p') = \sum_{v\in V} \P_{G_p'}\big[r_{G_p'}(S\leadsto v)\big]
+    \end{displaymath}
+
+    By construction $G_{p'}$ is always a subgraph of $G_p$, i.e
+    $r_{G_p'}(S\leadsto v)\leq r_{G_p}(S\leadsto v)$. This proves the left-hand
+    side of the Claim's inequality.
+
+    The probability that an edge is present in $G_p$ but not in $G_p'$ is at
+    most $\frac{1}{\alpha m}$, so with probability $\left(1-\frac{1}{\alpha
+    m}\right)^m$, $G_p$ and $G_p'$ have the same set of edges. This implies that:
+    \begin{displaymath} \P_{G_{p'}}\big[r_{G_p'}(S\leadsto v)\big]\geq
+    \left(1-\frac{1}{\alpha m}\right)^m\P_{G_{p}}\big[r_{G_p}(S\leadsto v)\big]
+  \end{displaymath}
+    which proves the right-hand side of the claim after observing that
+    $\left(1-\frac{1}{\alpha m}\right)^m\geq 1-\frac{1}{\alpha}$ with equality
+    iff $m=1$.
+\end{proof}
+
+ We can use Claim~\ref{cla:oracle} to find a constant factor approximation
+ algorithm to maximising influence on $G$ by maximising influence on $G'$. For
+ $k \in \mathbb{N}^*$, let $O_k \in \arg\max_{|S| \leq k} \sigma_G(S)$ and
+ $\sigma_{G}^* = \sigma_G(O_k)$ where we omit the $k$ when it is unambiguous.
+
+
+\begin{proposition}
+\label{prop:approx_optim}
+Suppose we have an unknown graph $G$ and a known graph $G'$ such that $V = V'$
+and for all $(u,v) \in V^2, |p_{u,v} - p_{u,v}'| \leq  \frac{1}{\alpha m}$.
+Then for all $k \in \mathbb{N}^*$, we can find a set $\hat O_k$ such that
+$\sigma_{G}(\hat O_k) \geq (1 - e^{\frac{2}{\alpha} - 1}) \sigma^*_{G}$
+\end{proposition}
+
+\begin{proof} For every edge $(u,v) \in V^2$, let $\hat p = p'_{u,v} -
+  \frac{1}{\alpha m}$. We are now in the conditions of Claim~\ref{cla:oracle}
+  with $\alpha \leftarrow \alpha/2$. We return the set $\hat O_k$ obtained by
+  greedy maximisation on $\hat G$. It is a classic exercise to show then that
+  $\sigma_G(\hat O_k) \geq 1 - e^{\frac{2}{\alpha} - 1}$ (see Pset 1, CS284r).
+\end{proof}
+
+A small note on the approximation factor: it is only $>0$ for $\alpha > 2$.
+Note that $\alpha \geq 7 \implies 1 - e^{\frac{2}{\alpha} - 1} \geq
+\frac{1}{2}$ and that it converges to $(1 - 1/e)$ which is the best possible
+polynomial-time approximation ratio to influence maximisation unless $P = NP$.
+Also note that in the limit of large $m$, $(1 -\frac{1}{\alpha m})^m
+\rightarrow \exp(-1/\alpha)$ and the approximation ratio goes to $1 -
+\exp(-\exp(-2/\alpha))$.
+
+\subsection{Active set selection of stationary Gaussian Processes}
 
 
 \bibliography{optimize} \bibliographystyle{apalike}
author	jeanpouget-abadie <jean.pougetabadie@gmail.com>	2015-03-20 11:45:52 -0400
committer	jeanpouget-abadie <jean.pougetabadie@gmail.com>	2015-03-20 11:45:52 -0400
commit	3dd861100ecbcef42e8e92758cd0e2bdd6b6d623 (patch)
tree	df7b7ffac06b9a80fc9ed904f49e488c94c65085
parent	da096d3c4353efae4f9003e5567f0fe9ded30150 (diff)
download	learn-optimize-3dd861100ecbcef42e8e92758cd0e2bdd6b6d623.tar.gz