aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--paper/supplementary.tex78
1 files changed, 76 insertions, 2 deletions
diff --git a/paper/supplementary.tex b/paper/supplementary.tex
index 541c6fb..8839b7c 100644
--- a/paper/supplementary.tex
+++ b/paper/supplementary.tex
@@ -1,8 +1,11 @@
-\documentclass[11pt]{article}
+\documentclass[10pt]{article}
-\usepackage{amsmath, amssymb, amsthm}
+\usepackage{amsmath, amssymb, amsthm, bbm, fullpage}
\date{}
\author{}
+\newtheorem{claim}{Claim}
+\newtheorem{proposition}{Proposition}
+\newtheorem{lemma}{Lemma}
\title{Supplementary Material \\ {\small Inferring Graphs from Cascades: A Sparse Recovery Framework}}
@@ -10,6 +13,62 @@
\maketitle
+
+\subsection*{Learnability and Optimization}
+
+\paragraph{Notation}
+
+Let $G = (V, E)$ be a weighted directed graph. Without loss of generality, we can assign a weight $p_{u,v} \in [0,1]$ to every possible edge $(u,v) \in V^2$. Let $m$ be the number of non-zero edges of $G$. Let $\sigma_G(S)$ be the influence of the set $S \subseteq V$ in graph $G$ under the independent cascade model. For $k \in \mathbb{N}^*$, let $O_k \in \arg\max_{|S| \leq k} \sigma_G(S)$ and $\sigma_{k,G}^* = \sigma_G(O_k)$
+
+\begin{claim}
+\label{cla:oracle}
+For two graphs $G$ and $G'$ such that $V = V'$. If for all $(u,v) \in V^2$, $p_{u,v} \geq p'_{u,v} \geq p_{u,v} - \frac{1}{\alpha m}$, then $\forall S \subseteq V, \sigma_{G}(S) \geq \sigma_{G'}(S) \geq (1 - 1/\alpha) \sigma_{G}(S)$
+\end{claim}
+
+\begin{proof}
+Let $S \subseteq V$ be any seed set. From \cite{Kempe:03}, by sampling unweighted subgraph $H \sim G$, where $(u,v)$ is an edge in $H$ with probability $p_{u,v}$, we can write:
+
+\begin{equation}\sigma_G(S) = \sum_{v\in V} \sigma_G(S)[v] = \sum_{v \in V} \mathbb{E}_{H \sim G} \left[ \mathbbm{1}_{v \text{ reachable from S in H}}\right]
+\end{equation}
+
+where $\sigma_G(S)[v]$ is the influence of $S$ on $v\in V$ in $G$. A similar equation can be written for $H' \sim G'$. Note that if $v \in S$, then $\sigma_G(S)[v] = \sigma_G'(S)[v]$ trivially. Suppose $v \in V \backslash S$. We can couple the sampling of $H'$ and $H$ such that $H'$ is always a subgraph of $H$ and such that that any edge $(u,v)$ is in $H$ but not in $H'$ with probability $p_{u,v} - p'_{u,v} \leq \frac{1}{\alpha m}$. Since any non-zero edge of $H'$ is always an edge in $H$, we have $\sigma_{G}(S)[v] \geq \sigma_G'(S)[v]$. The probability that $H$ and $H'$ have the same non-zero edge set is $(1 -\frac{1}{\alpha m})^m \geq 1 - 1/\alpha$, with equality when $m=1$. By adopting the convention $0/0 = 1$,
+
+$$\frac{\sigma_{G'}(S)[v]}{\sigma_{G}(S)[v]} \geq \mathbb{E} \left[ \frac{\mathbbm{1}_{v \text{ reachable from S in H'}}}{\mathbbm{1}_{v \text{ reachable from S in H}}}\right] \geq \left(1 - \frac{1}{\alpha}\right) \mathbb{E} \left[ \frac{\mathbbm{1}_{v \text{ reachable from S in H'}}}{\mathbbm{1}_{v \text{ reachable from S in H}}} \middle| H = H'\right] \geq (1 - 1/\alpha)$$
+
+It follows that $\sigma_{G'}(S)[v] \geq (1 -1/\alpha) \sigma_{G}(S)[v]$.
+\end{proof}
+
+Note that in the limit of large $m$, $(1 -\frac{1}{\alpha m})^m \rightarrow \exp(-1/\alpha)$ and the approximation ratio goes to $\exp(-1/\alpha)$. We can use Claim~\ref{cla:oracle} to find a constant factor approximation algorithm to maximising influence on $G$ by maximising influence on $G'$:
+
+\begin{proposition}
+\label{prop:approx_optim}
+Suppose we have an unknown graph $G$ and a known graph $G'$ such that $V = V'$ and for all $(u,v) \in V^2, |p_{u,v} - p_{u,v}'| \leq \frac{1}{\alpha m}$. Then for all $k \in \mathbb{N}^*$, we can find a set $\hat O_k$ such that $\sigma_{G}(\hat O_k) \geq (1 - e^{\frac{2}{\alpha} - 1}) \sigma^*_{G}$
+\end{proposition}
+
+\begin{proof}
+For every edge $(u,v) \in V^2$, let $\hat p = p'_{u,v} - \frac{1}{\alpha m}$. We are now in the conditions of Claim~\ref{cla:oracle} with $\alpha \leftarrow \alpha/2$. We return the set $\hat O_k$ obtained by greedy maximisation on $\hat G$. It is a classic exercise to show then that $\sigma_G(\hat O_k) \geq 1 - e^{\frac{2}{\alpha} - 1}$ (see Pset 1, CS284r).
+\end{proof}
+
+A small note on the approximation factor: it is only $>0$ for $\alpha > 2$. Note that $\alpha \geq 7 \implies 1 - e^{\frac{2}{\alpha} - 1} \geq \frac{1}{2}$ and that it converges to $(1 - 1/e)$
+
+
+\subsection*{Obtaining $\| p - p^*\|_{\infty} \leq 1/2m$}
+
+Note that with $n \geq C \frac{s^2}{\gamma \alpha} \log m$ measurements for $C$ an absolute constant, we have for every node $i$, $$\|\theta_i - \theta_i^*\|_2 \leq \frac{6}{\gamma}\sqrt{\frac{s \log m}{\alpha n^{1-\delta}}}$$ with probability $1 - 2e^{-n^\delta \log m}$
+
+Suppose we want this for every node, then by using the first measurements of every cascade where a node isn't infected at $t=0$, which occurs with probability $1 - \frac{1}{p_{\text{init}}}$ (we'll ignore this for now), we have by union bound, for $n \geq C \frac{\max(s)^2}{\gamma \alpha} \log m$ measurements:
+$$\|\theta - \theta^*\|_{2} \leq \frac{6}{\gamma}\sqrt{\frac{|E| \log m}{\alpha n^{1-\delta}}}$$
+with probability $1 - 2\exp^{-(n^{\delta} -1) \log m}$ (by union bound). Note that we could also have reasoned with the $\|\cdot\|_{\infty}$-norm. In which case, we have for $\Delta:= \max(s)$, the same number of measurements and the same probability:
+$$\|\theta - \theta^*\|_{\infty} \leq \frac{6}{\gamma}\sqrt{\frac{\Delta\log m}{\alpha n^{1-\delta}}}$$
+
+Suppose $n^{1-\delta} \geq \frac{36 C \Delta |E|}{\gamma^2 \alpha} \log m$, then with high probability:
+$$|\theta_{ji} - \theta^*_{ji}| \leq \| \theta - \theta^* \|_{\infty} \leq \frac{1}{\sqrt{|E|C}}$$
+
+We can now show that $f_{\theta^*} \geq \frac{1}{\sqrt{C}} f_{\theta}$ in the independent cascade model.
+
+
+
+
\subsection*{(RE) with high probability}
\subsubsection*{(RE) for the gram matrix}
@@ -37,8 +96,23 @@ c_j &\leq p_i + \sum_{j \in S} w_{ji} c_j \tag{PageRank}\\
c_j &\geq p_i + \max_{j \in S} w_{ji} c_j \tag{?}
\end{align}
+\paragraph{Simple claim}
+
+If we assume that every node has an independent probability $p_i \in ]0,1[$ to be a seed node, then by taking the first measuremnent of every cascade, we get a matrix of the form
+
+$[[p_1, p_1 p_2, \dots],[p_1 p_2, p_2, p_2 p_3, \dots] \dots]$
+
+which can be written as $D \times (A + (I - D))$ where $D = diag(p_i)$ and $A_{i,j} = p_j$ for all $i$.
+
+Since $\det(D(A + (I-D))) \geq det(D)det(A) + det(D)det(I-D) \geq \min(p_i (1-p_i))^m > 0$ and we have our $\gamma > 0$
+
+
\subsection*{Relaxed (RE) with high probability}
\subsection*{Lower Bound}
+
+\bibliography{./sparse}
+\bibliographystyle{plain}
+
\end{document} \ No newline at end of file