aboutsummaryrefslogtreecommitdiffstats
path: root/notes/cracking_cascades_classposter.tex
diff options
context:
space:
mode:
Diffstat (limited to 'notes/cracking_cascades_classposter.tex')
-rw-r--r--notes/cracking_cascades_classposter.tex239
1 files changed, 125 insertions, 114 deletions
diff --git a/notes/cracking_cascades_classposter.tex b/notes/cracking_cascades_classposter.tex
index 2b29109..52fd78d 100644
--- a/notes/cracking_cascades_classposter.tex
+++ b/notes/cracking_cascades_classposter.tex
@@ -75,25 +75,30 @@
%\vspace{4.6 cm}
-\begin{block}{Introduction}
+\begin{block}{Problem Statement}
+\begin{center}
+\bf{How can we reconstruct a graph on which observed cascades spread?}
+\end{center}
+\end{block}
-{\bf Graph Reconstruction}:
-\begin{itemize}
-\item \{${\cal G}, \vec p$\}: directed graph, edge probabilities
-\item $F$: cascade generating model
-\item ${\cal M} := F\{{\cal G}, \vec p\}$: cascade
-\end{itemize}
+%{\bf Graph Reconstruction}:
-{\bf Objective}:
-\begin{itemize}
-\item Find algorithm which computes $F^{-1}({\cal M}) = \{{\cal G}, \vec p\}$ w.h.p., i.e. recovers graph from cascades.
-\end{itemize}
+%\begin{itemize}
+%\item \{${\cal G}, \vec p$\}: directed graph, edge probabilities
+%\item $F$: cascade generating model
+%\item ${\cal M} := F\{{\cal G}, \vec p\}$: cascade
+%\end{itemize}
-{\bf Approach}
-\begin{itemize}
-\item Frame graph reconstruction as a {\it Sparse Recovery} problem for two cascade generating models.
-\end{itemize}
+%{\bf Objective}:
+%\begin{itemize}
+%\item Find algorithm which computes $F^{-1}({\cal M}) = \{{\cal G}, \vec p\}$ w.h.p., i.e. recovers graph from cascades.
+%\end{itemize}
+
+%{\bf Approach}
+%\begin{itemize}
+%\item Frame graph reconstruction as a {\it Sparse Recovery} problem for two cascade generating models.
+%\end{itemize}
%Given a set of observed cascades, the \textbf{graph reconstruction problem} consists of finding the underlying graph on which these cascades spread. We assume that these cascades come from the classical \textbf{Independent Cascade Model} where at each time step, newly infected nodes infect each of their neighbor with some probability.
@@ -101,47 +106,65 @@
%We formulate a novel approach to this problem in which we use \textbf{Sparse Recovery} to find the edges in the unknown underlying network. Sparse Recovery is the problem of finding the sparsest vector $x$ such that $\mathbf{M x =b}$. In our case, for each node $i$, we wish to recover the vector $x = p_i$ where $p_{i_j}$ is the probability that node $j$ infects node $i$ if $j$ is active. To recover this vector, we are given $M$, where row $M_{t,k}$ indicates which nodes are infected at time $t$ in observed cascade $k$, and $b$, where $b_{t+1,k}$ indicates if node $i$ is infected at time $t+1$ in cascade $k$. Since most nodes have a small number of neighbors in large networks, we can assume that these vectors are sparse. Sparse Recovery is a well studied problem which can be solved efficiently and with small error if $M$ satisfies certain properties. In this project, we empirically study to what extent $M$ satisfies the Restricted Isometry Property.
-\end{block}
%---------------------------------------------------------------------------------
%---------------------------------------------------------------------------------
\begin{block}{Voter Model}
+
+\begin{figure}
+\centering
+\includegraphics[width=0.6\textwidth]{voter.png}
+\end{figure}
+
+
+
+\vspace{0.5 cm}
{\bf Description}
+\vspace{0.5 cm}
+
\begin{itemize}
\item Two types of nodes, {\color{red} red} and {\color{blue} blue}.
\item $\mathbb{P}$(node is {\color{blue} blue} at $t=0) = p_{\text{init}}$
\item $\mathbb{P}$(node is {\color{blue} blue} at t+1) = $\frac{\text{Number of {\color{blue}blue} neighbors}}{\text{Total number of neighbors}}$
-\item Observe until horizon T
\end{itemize}
+\vspace{0.5 cm}
+
{\bf Sparse Recovery Formulation}
-\begin{itemize}
-\item Definitions:
-\end{itemize}
-\begin{align}
-(\vec m^t_k)_j &:= \left[\text{node j is {\color{blue} blue} at t in cascade k}\right] \nonumber \\
-(\vec x_{i})_j &:= \frac{\text{1}}{\text{deg}(i)} \cdot \left[\text{j parent of i in }{\cal G}\right] \nonumber \\
-b^t_{k,i} &:= \text{node i is {\color{blue} blue} at t+1 in cascade k} \nonumber
-\end{align}
-\begin{itemize}
-\item Observation:
-\end{itemize}
-\begin{align}
-\langle \vec m^t_k, \vec x_i \rangle &= \mathbb{P}(\text{node i is {\color{blue} blue} at t+1 in cascade k}) \nonumber \\
-&=: w^{t+1}_{k,i} \nonumber
-\end{align}
+\vspace{0.5 cm}
+
+
+To recover the neighbors of $v_1$:
+
+Observe which nodes are {\color{red} red} (1) or {\color{blue} blue} (0) in the two first iterations:
+\begin{align*}
+&v_2 \hspace{0.2 cm} v_3 \hspace{0.2 cm} v_4 \hspace{0.2 cm} v_5 \\
+\vspace{1 cm}
+M = & \left( \begin{array}{cccc}
+0 & 0 & 1 & 1 \\
+1 & 1 & 0 & 0 \\
+\end{array} \right)
+\end{align*}
+
+and which color $v_1$ is in the two last iterations:
+
+\begin{align*}
+b_1 = & \left( \begin{array}{c}
+1 \\
+1 \\
+\end{array} \right)
+\end{align*}
+
+If $(\vec x_{1})_j := \frac{\text{1}}{\text{deg}(i)} \cdot \left[\text{j parent of 1 in }{\cal G}\right] $, then our goal is to solve:
-\begin{itemize}
-\item We observe $M := \text{vstack}(\vec m^t_k)$
-\item We observe $b_i := \text{vstack}(b^t_{k,i})$ where $$b^t_{k,i} \sim {\cal B}(w^t_{k,i}) = w^t_{k,i} - \epsilon$$
-\item For each node i, solve for $\vec x_i$:
\begin{equation}
-\boxed{M \vec x_i = \vec b_i + \epsilon \nonumber}
+\boxed{M \vec x_1 = \vec b_1 + \epsilon \nonumber}
\end{equation}
-\end{itemize}
+
+This problem is a {\bf Noisy Sparse Recovery} problem, which has been studied extensively. Here, the vectors $\vec x_i$ are deg(i)-sparse.
\end{block}
@@ -155,37 +178,6 @@ b^t_{k,i} &:= \text{node i is {\color{blue} blue} at t+1 in cascade k} \nonumber
%---------------------------------------------------------------------------------
%---------------------------------------------------------------------------------
-\begin{block}{Independent Cascade Model}
-
-{\bf Description}
-\begin{itemize}
-\item Three possible states: susceptible, infected, inactive
-\item Infected node $j$ infects its susceptible neighbors $i$ with probability $p_{j,i}$ independently
-\item $\mathbb{P}$(inactive at t+1| infected at t) = 1
-\item $\mathbb{P}$(infected at t=0)$=p_{\text{init}}$
-\end{itemize}
-
-
-{\bf Sparse Recovery Formulation}
-\begin{itemize}
-
-\item Definitions:
-\begin{align}
-(\vec \theta_i)_j &:= \log ( 1 - p_{j,i}) \nonumber \\
-(\vec m^t_k)_j &= \left[\text{node j is infected at t in cascade k}\right] \nonumber
-\end{align}
-
-\item Observation:
-\begin{equation}
-\langle \vec m^t_k, \vec x_i \rangle = \log \mathbb{P}(\text{node i {\it not} infect. at t+1 in casc. k}) \nonumber
-\end{equation}
-\item With same notations, we solve for each node i:
-\begin{equation}
-\boxed{e^{M \vec \theta_i} = (1 - \vec b_i) + \epsilon} \nonumber
-\end{equation}
-where: $e^{M \vec \theta_i}$ is taken element-wise
-\end{itemize}
-\end{block}
@@ -205,34 +197,60 @@ where: $e^{M \vec \theta_i}$ is taken element-wise
%----------------------------------------------------------------------------------------
% CONSTRAINT SATISFACTION - BACKTRACKING
%----------------------------------------------------------------------------------------
-\begin{block}{Example}
-
+\begin{block}{Independent Cascades Model}
\begin{figure}
\centering
-\includegraphics[width=0.6\textwidth]{cascades.png}
-\caption{Two cascades with two time steps each. Red nodes are the infected and active nodes.}
+\includegraphics[width=0.6\textwidth]{icc.png}
\end{figure}
-Figure 1 illustrates two cascades. In this case, if we wish to recover $p_A$, then our problem would be is given by the formula
-\[ \left( \begin{array}{cccc}
+{\bf Description}
+
+
+
+\begin{itemize}
+\item Three possible states: susceptible {\color{blue} blue}, infected {\color{red} red}, inactive {\color{yellow} yellow}
+\item $\mathbb{P}$(infected at t=0)$=p_{\text{init}}$
+\item Infected node $j$ infects its susceptible neighbors $i$ with probability $p_{j,i}$ independently
+\end{itemize}
+
+
+{\bf Sparse Recovery Formulation}
+
+To recover the neighbors of $v_5$:
+
+Observe which nodes are {\color{red} red} (1), {\color{blue} blue} (0), or yellow {\color{yellow} yellow} (0) in the two first iterations:
+\begin{align*}
+&v_1 \hspace{0.2 cm} v_2 \hspace{0.2 cm} v_3 \hspace{0.2 cm} v_4 \\
+\vspace{1 cm}
+M = & \left( \begin{array}{cccc}
1 & 0 & 0 & 0 \\
-0 & 1 & 0 & 0 \\
-\end{array} \right) x_A = \left( \begin{array}{c}
+0 & 1 & 1 & 0 \\
+\end{array} \right)
+\end{align*}
+
+and if $v_5$ is infected in the two last iterations:
+
+\begin{align*}
+b_1 = & \left( \begin{array}{c}
0 \\
-1 \\\end{array} \right)\]
+1 \\
+\end{array} \right)
+\end{align*}
+
+If $(\vec \theta_i)_j := \log ( 1 - p_{j,i}) $, then our goal is to solve:
+
+\begin{equation}
+\boxed{e^{M \vec \theta_i} = (1 - \vec b_i) + \epsilon} \nonumber
+\end{equation}
+
+
+
\end{block}
%----------------------------------------------------------------------------------------
-\begin{block}{Motivation}
-\begin{itemize}
-\item Vectors $\vec x_i$ are deg(i)-sparse.
-\item Matrix $M$ is well-conditioned (see experimental section)
-\item Voter model: quasi-textbook noisy sparse recovery
-\item Independent cascade: if $p_{j,i} < 1 - \eta$, exponential can be approximated by affine function
-\end{itemize}
-\end{block}
+
\begin{block}{Algorithms}
@@ -252,7 +270,6 @@ Figure 1 illustrates two cascades. In this case, if we wish to recover $p_A$, th
\begin{equation}
\min_{\vec \theta_i} \|\vec \theta_i\|_1 + \lambda \|e^{M \vec \theta_i} - \vec b_i \|_2 \nonumber
\end{equation}
-where: $e^{M \vec \theta_i}$ is taken element-wise
\end{itemize}
\end{block}
@@ -291,35 +308,9 @@ where: $e^{M \vec \theta_i}$ is taken element-wise
%----------------------------------------------------------------------------------------
-\begin{block}{Theoretical Guarantees}
-{\bf Assumptions}
-\begin{itemize}
-\item Suppose that each row of $M$ is taken from a distribution $F$, such that
-\begin{itemize}
- \item $\mathbb{E}_{a \in F}(a a^T) = I_n$
- \item $\|a\|_{\infty} \leq \mu$
-\end{itemize}
-\item Suppose that w.h.p $\| M^T \epsilon \|_{\infty} \leq 2.5 \sqrt{\log n} $
-\end{itemize}
-
-{\bf Theorem \cite{candes}}
-
-If node $i$ has degree $\Delta$ and $n_{\text{rows}}(M) \geq C \mu \Delta \log n$, then, w.h.p.,
-
-$$\| \hat x - x^* \|_2 \leq C (1 + \log^{3/2}(n))\sqrt{\frac{\Delta \log n}{m}}$$
-
-{\bf Discussion}
-
-\begin{itemize}
-\item If we consider $M/p_{init}$, then $\mathbb{E}_{a \in F}(a a^T) \approx I_n$
-\item $\mathbb{E}(\epsilon) = 0$, hence $\| M^T \epsilon \|_{\infty} \leq 2.5 \sqrt{\log n}$ should hold w.h.p
-\end{itemize}
-
-\end{block}
-
-\begin{block}{Restriced Isometry Property (RIP)}
+\begin{block}{Restricted Isometry Property (RIP)}
{\bf Definition}
\begin{itemize}
\item The Restricted Isometry Property (RIP) characterizes a quasi-orthonormality of the measurement matrix M on sparse vectors.
@@ -343,6 +334,20 @@ $$\| \hat x - x^* \|_2 \leq C (1 + \log^{3/2}(n))\sqrt{\frac{\Delta \log n}{m}}$
\end{block}
+\begin{block}{Theoretical Guarantees}
+
+In the case of small RIP constants $(\delta \leq 0.25)$ for $M$ and some assumption on the noise $\epsilon$:
+
+{\bf Theorem \cite{candes}}
+
+If node $i$ has degree $\Delta$ and $n_{\text{rows}}(M) \geq C_1 \mu \Delta \log n$, then, w.h.p.,
+
+$$\| \hat x - x^* \|_2 \leq C (1 + \log^{3/2}(n))\sqrt{\frac{\Delta \log n}{m}}$$
+
+
+\end{block}
+
+
%----------------------------------------------------------------------------------------
@@ -351,6 +356,12 @@ $$\| \hat x - x^* \|_2 \leq C (1 + \log^{3/2}(n))\sqrt{\frac{\Delta \log n}{m}}$
\begin{block}{Description of Experiments}
+\begin{tabular}{ c | c | c | c | c }
+& c = 100, &c = 1000,&c = 100, &c = 1000,\\
+& i = 0.1&i = 0.1& i = 0.05& i = 0.05\\
+ \hline
+ $\delta_4$ & 0.54 & 0.37 &0.43&0.23 \\
+\end{tabular}
\end{block}