aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--results.tex108
1 files changed, 101 insertions, 7 deletions
diff --git a/results.tex b/results.tex
index d5fc5cb..abb5f8e 100644
--- a/results.tex
+++ b/results.tex
@@ -1,11 +1,19 @@
\documentclass[10pt]{article}
-\usepackage{fullpage, amsmath, amssymb, amsthm}
+\usepackage{fullpage, amsmath, amssymb, amsthm, bbm}
\usepackage[utf8x]{inputenc}
+\DeclareMathOperator{\E}{\mathbb{E}}
+\let\P\relax
+\DeclareMathOperator{\P}{\mathbb{P}}
+\newcommand{\ex}[1]{\E\left[#1\right]}
+\newcommand{\prob}[1]{\P\left[#1\right]}
+
+
\newtheorem{proposition}{Proposition}
\newtheorem{corollary}{Corollary}
\newtheorem{problem}{Problem}
\newtheorem{theorem}{Theorem}
+\newtheorem{claim}{Claim}
\title{Learn and/or Optimize}
\author{}
@@ -225,13 +233,13 @@ fraction of the non-zero sets is a big problem
\section{Passive Optimization vs. Passive Learning}
-\subsection{Returning max of observations}
+\subsection{Failed Attempt: returning max of observations}
This doesn't work. Give examples as to why! Remember that there are strong
concentration results for submodular functions -> look at expected value of
observed sets
-\subsection{Passive Optimization can be easier than Passive Learning}
+\subsection{Example where optimization possible, learning impossible}
Recall the matroid construction from~\cite{balcan2011learning}:
\begin{theorem}
@@ -259,10 +267,10 @@ optimize $f$ exactly! Indeed, the largest possible value for $f$ under
cardinality constraint $n^{1/3}$ is $\max_{A\in 2^{[n]}} |A| = n^{1/3}$.
One example of a distribution under which this occurs with probability at least
-a constant is $D_u$, the uniform distribution over all sets of ${\cal A}$.
-For $c>0$, after $n^c$ observations $O \equiv (S_i, f(S_i))_i$ for $S_i \sim D_u$, we will
-observe at least one element from $\cal{A}\backslash\cal{B}$ with constant
-probability:
+a constant is $D_u$, the uniform distribution over all sets of ${\cal A}$. For
+$c>0$, after $n^c$ observations $O \equiv (S_i, f(S_i))_i$ for $S_i \sim D_u$,
+we will observe at least one element from $\cal{A}\backslash\cal{B}$ with
+constant probability:
\begin{equation} \nonumber \mathbb{P}(\bigcup_{S_i} S_i \in {\cal
A}\backslash{\cal B}) \geq 1 - (1 - \epsilon)^{n^c} \approx \epsilon n^c
@@ -278,6 +286,92 @@ A}\backslash {\cal B}$.
{\bf TODO:} A cleaner simpler example would be nice.
+\section{The case of parametric submodular functions}
+
+\subsection{Influence Functions}
+
+Let $G = (V, E)$ be a weighted directed graph. Without loss of generality, we
+can assign a weight $p_{u,v} \in [0,1]$ to every possible edge $(u,v) \in V^2$.
+Let $m$ be the number of non-zero edges of $G$. Let $\sigma_G(S, p)$ be the
+influence of the set $S \subseteq V$ in $G$ under the IC model with parameters
+$p$.
+
+Recall from \cite{Kempe:03} that:
+\begin{equation}
+ \sigma_G(S, p) = \sum_{v\in V} \P_{G_p}\big[r_{G_p}(S\leadsto v)\big]
+\end{equation}
+where $G_p$ is a random graph where each edge $(u,v)\in E$ appears with
+probability $p_{u,v}$ and $r_{G_p}(S\leadsto v)$ is the indicator variable of
+\emph{there exists a path from $S$ to $v$ in $G_p$}.
+
+\begin{claim}
+\label{cla:oracle}
+If for all $(u,v) \in V^2$, $p_{u,v} \geq p'_{u,v} \geq p_{u,v}
+- \frac{1}{\alpha m}$, then:
+\begin{displaymath}
+ \forall S \subseteq V,\, \sigma_{G}(S, p) \geq \sigma_{G}(S,p') \geq (1
+ - 1/\alpha) \sigma_{G}(S,p)
+\end{displaymath}
+\end{claim}
+
+\begin{proof}
+ We define two coupled random graphs $G_p$ and $G_p'$ as follows: for each
+ edge $(u,v)\in E$, draw a uniform random variable $\mathcal{U}_{u,v}\in
+ [0,1]$. Include $(u,v)$ in $G_p$ (resp. $G_{p'}$) iff
+ $\mathcal{U}_{u,v}\leq p_{u,v}$ (resp. $\mathcal{U}_{u,v}\leq p'_{u,v}$).
+ It is clear from the construction that each edge $(u,v)$ will be present in
+ $G_p$ (resp. $G_p'$) with probability $p_{u,v}$ (resp. $p'_{u,v}$). Hence:
+ \begin{displaymath}
+ \sigma_G(S, p) = \sum_{v\in V} \P_{G_p}\big[r_{G_p}(S\leadsto v)\big]
+ \text{ and }
+ \sigma_G(S, p') = \sum_{v\in V} \P_{G_p'}\big[r_{G_p'}(S\leadsto v)\big]
+ \end{displaymath}
+
+ By construction $G_{p'}$ is always a subgraph of $G_p$, i.e
+ $r_{G_p'}(S\leadsto v)\leq r_{G_p}(S\leadsto v)$. This proves the left-hand
+ side of the Claim's inequality.
+
+ The probability that an edge is present in $G_p$ but not in $G_p'$ is at
+ most $\frac{1}{\alpha m}$, so with probability $\left(1-\frac{1}{\alpha
+ m}\right)^m$, $G_p$ and $G_p'$ have the same set of edges. This implies that:
+ \begin{displaymath} \P_{G_{p'}}\big[r_{G_p'}(S\leadsto v)\big]\geq
+ \left(1-\frac{1}{\alpha m}\right)^m\P_{G_{p}}\big[r_{G_p}(S\leadsto v)\big]
+ \end{displaymath}
+ which proves the right-hand side of the claim after observing that
+ $\left(1-\frac{1}{\alpha m}\right)^m\geq 1-\frac{1}{\alpha}$ with equality
+ iff $m=1$.
+\end{proof}
+
+ We can use Claim~\ref{cla:oracle} to find a constant factor approximation
+ algorithm to maximising influence on $G$ by maximising influence on $G'$. For
+ $k \in \mathbb{N}^*$, let $O_k \in \arg\max_{|S| \leq k} \sigma_G(S)$ and
+ $\sigma_{G}^* = \sigma_G(O_k)$ where we omit the $k$ when it is unambiguous.
+
+
+\begin{proposition}
+\label{prop:approx_optim}
+Suppose we have an unknown graph $G$ and a known graph $G'$ such that $V = V'$
+and for all $(u,v) \in V^2, |p_{u,v} - p_{u,v}'| \leq \frac{1}{\alpha m}$.
+Then for all $k \in \mathbb{N}^*$, we can find a set $\hat O_k$ such that
+$\sigma_{G}(\hat O_k) \geq (1 - e^{\frac{2}{\alpha} - 1}) \sigma^*_{G}$
+\end{proposition}
+
+\begin{proof} For every edge $(u,v) \in V^2$, let $\hat p = p'_{u,v} -
+ \frac{1}{\alpha m}$. We are now in the conditions of Claim~\ref{cla:oracle}
+ with $\alpha \leftarrow \alpha/2$. We return the set $\hat O_k$ obtained by
+ greedy maximisation on $\hat G$. It is a classic exercise to show then that
+ $\sigma_G(\hat O_k) \geq 1 - e^{\frac{2}{\alpha} - 1}$ (see Pset 1, CS284r).
+\end{proof}
+
+A small note on the approximation factor: it is only $>0$ for $\alpha > 2$.
+Note that $\alpha \geq 7 \implies 1 - e^{\frac{2}{\alpha} - 1} \geq
+\frac{1}{2}$ and that it converges to $(1 - 1/e)$ which is the best possible
+polynomial-time approximation ratio to influence maximisation unless $P = NP$.
+Also note that in the limit of large $m$, $(1 -\frac{1}{\alpha m})^m
+\rightarrow \exp(-1/\alpha)$ and the approximation ratio goes to $1 -
+\exp(-\exp(-2/\alpha))$.
+
+\subsection{Active set selection of stationary Gaussian Processes}
\bibliography{optimize} \bibliographystyle{apalike}