aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2015-02-24 15:48:03 -0500
committerThibaut Horel <thibaut.horel@gmail.com>2015-02-24 15:48:03 -0500
commitac88475d00cbc54ad68ce1a9c3ce90c36cfde62a (patch)
treeae6d11e2dd30a60b1f995fa160484eb171c121d6
parent132e429a3a642bed57975056a8496ebdd6511976 (diff)
downloadcascades-ac88475d00cbc54ad68ce1a9c3ce90c36cfde62a.tar.gz
Clean coupling proof
-rw-r--r--paper/supplementary.tex68
1 files changed, 54 insertions, 14 deletions
diff --git a/paper/supplementary.tex b/paper/supplementary.tex
index cc7a82f..762ad0c 100644
--- a/paper/supplementary.tex
+++ b/paper/supplementary.tex
@@ -1,6 +1,11 @@
\documentclass[10pt]{article}
-
+\usepackage[utf8x]{inputenc}
\usepackage{amsmath, amssymb, amsthm, bbm, fullpage}
+\DeclareMathOperator{\E}{\mathbb{E}}
+\let\P\relax
+\DeclareMathOperator{\P}{\mathbb{P}}
+\newcommand{\ex}[1]{\E\left[#1\right]}
+\newcommand{\prob}[1]{\P\left[#1\right]}
\date{}
\author{}
\newtheorem{claim}{Claim}
@@ -16,29 +21,64 @@
\subsection*{Learnability and Optimization}
-\paragraph{Notation}
+Let $G = (V, E)$ be a weighted directed graph. Without loss of generality, we
+can assign a weight $p_{u,v} \in [0,1]$ to every possible edge $(u,v) \in V^2$.
+Let $m$ be the number of non-zero edges of $G$. Let $\sigma_G(S, p)$ be the
+influence of the set $S \subseteq V$ in $G$ under the IC model with parameters
+$p$.
-Let $G = (V, E)$ be a weighted directed graph. Without loss of generality, we can assign a weight $p_{u,v} \in [0,1]$ to every possible edge $(u,v) \in V^2$. Let $m$ be the number of non-zero edges of $G$. Let $\sigma_G(S)$ be the influence of the set $S \subseteq V$ in graph $G$ under the independent cascade model. For $k \in \mathbb{N}^*$, let $O_k \in \arg\max_{|S| \leq k} \sigma_G(S)$ and $\sigma_{G}^* = \sigma_G(O_k)$ where we omit the $k$ when it is unambiguous.
+Recall from \cite{Kempe:03} that:
+\begin{equation}
+ \sigma_G(S, p) = \sum_{v\in V} \P_{G_p}\big[r_{G_p}(S\leadsto v)\big]
+\end{equation}
+where $G_p$ is a random graph where each edge $(u,v)\in E$ appears with
+probability $p_{u,v}$ and $r_{G_p}(S\leadsto v)$ is the indicator variable of
+\emph{there exists a path from $S$ to $v$ in $G_p$}.
\begin{claim}
\label{cla:oracle}
-For two graphs $G$ and $G'$ such that $V = V'$. If for all $(u,v) \in V^2$, $p_{u,v} \geq p'_{u,v} \geq p_{u,v} - \frac{1}{\alpha m}$, then $\forall S \subseteq V, \sigma_{G}(S) \geq \sigma_{G'}(S) \geq (1 - 1/\alpha) \sigma_{G}(S)$
+If for all $(u,v) \in V^2$, $p_{u,v} \geq p'_{u,v} \geq p_{u,v}
+- \frac{1}{\alpha m}$, then:
+\begin{displaymath}
+ \forall S \subseteq V,\, \sigma_{G}(S, p) \geq \sigma_{G}(S,p') \geq (1
+ - 1/\alpha) \sigma_{G}(S,p)
+\end{displaymath}
\end{claim}
\begin{proof}
-Let $S \subseteq V$ be any seed set. From \cite{Kempe:03}, by sampling unweighted subgraph $H \sim G$, where $(u,v)$ is an edge in $H$ with probability $p_{u,v}$, we can write:
+ We define two coupled random graphs $G_p$ and $G_p'$ as follows: for each
+ edge $(u,v)\in E$, draw a uniform random variable $\mathcal{U}_{u,v}\in
+ [0,1]$. Include $(u,v)$ in $G_p$ (resp. $G_{p'}$) iff
+ $\mathcal{U}_{u,v}\leq p_{u,v}$ (resp. $\mathcal{U}_{u,v}\leq p'_{u,v}$).
+ It is clear from the construction that each edge $(u,v)$ will be present in
+ $G_p$ (resp. $G_p'$) with probability $p_{u,v}$ (resp. $p'_{u,v}$). Hence:
+ \begin{displaymath}
+ \sigma_G(S, p) = \sum_{v\in V} \P_{G_p}\big[r_{G_p}(S\leadsto v)\big]
+ \text{ and }
+ \sigma_G(S, p') = \sum_{v\in V} \P_{G_p'}\big[r_{G_p'}(S\leadsto v)\big]
+ \end{displaymath}
-\begin{equation}\sigma_G(S) = \sum_{v\in V} \sigma_G(S)[v] = \sum_{v \in V} \mathbb{E}_{H \sim G} \left[ \mathbbm{1}_{v \text{ reachable from S in H}}\right]
-\end{equation}
+ By construction $G_{p'}$ is always a subgraph of $G_p$, i.e
+ $r_{G_p'}(S\leadsto v)\leq r_{G_p}(S\leadsto v)$. This proves the left-hand
+ side of the Claim's inequality.
-where $\sigma_G(S)[v]$ is the influence of $S$ on $v\in V$ in $G$. A similar equation can be written for $H' \sim G'$. Note that if $v \in S$, then $\sigma_G(S)[v] = \sigma_G'(S)[v]$ trivially. Suppose $v \in V \backslash S$. We can couple the sampling of $H'$ and $H$ such that $H'$ is always a subgraph of $H$ and such that that any edge $(u,v)$ is in $H$ but not in $H'$ with probability $p_{u,v} - p'_{u,v} \leq \frac{1}{\alpha m}$. Since any non-zero edge of $H'$ is always an edge in $H$, we have $\sigma_{G}(S)[v] \geq \sigma_G'(S)[v]$. The probability that $H$ and $H'$ have the same non-zero edge set is $(1 -\frac{1}{\alpha m})^m \geq 1 - 1/\alpha$, with equality when $m=1$. By adopting the convention $0/0 = 1$,
-
-$$\frac{\sigma_{G'}(S)[v]}{\sigma_{G}(S)[v]} = \mathbb{E} \left[ \frac{\mathbbm{1}_{v \text{ reachable from S in H'}}}{\mathbbm{1}_{v \text{ reachable from S in H}}}\right] \geq \left(1 - \frac{1}{\alpha}\right) \mathbb{E} \left[ \frac{\mathbbm{1}_{v \text{ reachable from S in H'}}}{\mathbbm{1}_{v \text{ reachable from S in H}}} \middle| H = H'\right] = (1 - 1/\alpha)$$
-
-It follows that $\sigma_{G'}(S)[v] \geq (1 -1/\alpha) \sigma_{G}(S)[v]$.
+ The probability that an edge is present in $G_p$ but not in $G_p'$ is at
+ most $\frac{1}{\alpha m}$, so with probability $\left(1-\frac{1}{\alpha
+ m}\right)^m$, $G_p$ and $G_p'$ have the same set of edges. This implies that:
+ \begin{displaymath}
+ \P_{G_{p'}}\big[r_{G_p'}(S\leadsto v)\big]\geq
+ \left(1-\frac{1}{\alpha m}\right)^m\P_{G_{p}}\big[r_{G_p}(S\leadsto v)\big]
+ \end{displaymath}
+ which proves the right-hand side of the claim after observing that
+ $\left(1-\frac{1}{\alpha m}\right)^m\geq 1-\frac{1}{\alpha}$ with equality
+ iff $m=1$.
\end{proof}
- We can use Claim~\ref{cla:oracle} to find a constant factor approximation algorithm to maximising influence on $G$ by maximising influence on $G'$:
+ We can use Claim~\ref{cla:oracle} to find a constant factor approximation
+ algorithm to maximising influence on $G$ by maximising influence on $G'$. For
+ $k \in \mathbb{N}^*$, let $O_k \in \arg\max_{|S| \leq k} \sigma_G(S)$ and
+ $\sigma_{G}^* = \sigma_G(O_k)$ where we omit the $k$ when it is unambiguous.
+
\begin{proposition}
\label{prop:approx_optim}
@@ -114,4 +154,4 @@ Since $\det(D(A + (I-D))) \geq det(D)det(A) + det(D)det(I-D) \geq \min(p_i (1-p_
\bibliography{./sparse}
\bibliographystyle{plain}
-\end{document} \ No newline at end of file
+\end{document}