1 files changed, 78 insertions, 35 deletions
diff --git a/notes/presentation/beamer_2.tex b/notes/presentation/beamer_2.tex
index 0066511..ecc66ed 100644
--- a/notes/presentation/beamer_2.tex
+++ b/notes/presentation/beamer_2.tex
@@ -62,11 +62,17 @@ What do we know? What do we want to know?
 
 \begin{itemize}
 \item At $t=0$, nodes are in three possible states: susceptible, {\color{blue} infected}, {\color{red} dead}
-\item Each {\color{blue} infected} node $i$ has a ``one-shot'' probability $p_{i,j}$ of infecting each of his susceptible neighbors $j$ at $t+1$.
+\pause
+\item At time step $t$, each {\color{blue} infected} node $i$ has a ``one-shot'' probability $p_{i,j}$ of infecting each of his susceptible neighbors $j$ at $t+1$.
+\pause
 \item A node stays {\color{blue}  infected} for one round, then it {\color{red} dies}
+\pause
 \item At $t=0$, each node is {\color{blue} infected} with probability $p_{\text{init}}$
+\pause
 \item Process continues until random time $T$ when no more nodes can become infected. 
+\pause
 \item $X_t$: set of {\color{blue}  infected} nodes at time $t$
+\pause
 \item A {\bf cascade} is an instance of the ICC model: $(X_t)_{t=0, t=T}$
 \end{itemize}
 
@@ -158,7 +164,7 @@ $$(j \in X_{t+1} | X_t) \sim {\cal B} \big(f(X_t \cdot \theta_j) \big)$$
 
 \begin{block}{Decomposability}
 \begin{itemize}
-\item Conditioned on $X_t$, the state of node $j$ is sampled independently from node $j+1$
+\item Conditioned on $X_t$, the state of the nodes at the next time step are mutually independent
 \item We can learn the parents of each node independently
 \end{itemize}
 \end{block}
@@ -173,14 +179,17 @@ $$(j \in X_{t+1} | X_t) \sim {\cal B} \big(f(X_t \cdot \theta_j) \big)$$
 
 
 
-%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%
 
 \begin{frame}
-\frametitle{Sparse Recovery}
-\begin{figure}
-\includegraphics[scale=.6]{../images/sparse_recovery_illustration_copy.pdf}
-\caption{$\mathbb{P}(j \in X_{t+1}| X_t) = f(X_t\cdot \theta)$}
-\end{figure}
+\frametitle{Learning from Diffusion Processes}
+\begin{block}{Problem Statement}
+\begin{itemize}
+\item We are given a graph ${\cal G}$, and a diffusion process $f$ parameterized by $\left((\theta_j)_j, p_{\text{init}}\right)$. 
+\item Suppose we {\bf only} observe $(X_t)$ from the diffusion process.
+\item Under what conditions can we learn $\theta_{i,j}$ for all $(i,j)$?
+\end{itemize}
+\end{block}
 \end{frame}
 
 %%%%%%%%%%%%%%%%%%%%%%%%%
@@ -188,24 +197,12 @@ $$(j \in X_{t+1} | X_t) \sim {\cal B} \big(f(X_t \cdot \theta_j) \big)$$
 \begin{frame}
 \frametitle{Sparse Recovery}
 \begin{figure}
-\includegraphics[scale=.6]{../images/sparse_recovery_illustration.pdf}
+\includegraphics[scale=.6]{../images/sparse_recovery_illustration_copy.pdf}
 \caption{$\mathbb{P}(j \in X_{t+1}| X_t) = f(X_t\cdot \theta)$}
 \end{figure}
 \end{frame}
 
 
-%%%%%%%%%%%%%%%%%%%%%%%
-
-\begin{frame}
-\frametitle{Learning from Diffusion Processes}
-\begin{block}{Problem Statement}
-\begin{itemize}
-\item We are given a graph ${\cal G}$, and a diffusion process $f$ parameterized by $\left((\theta_j)_j, p_{\text{init}}\right)$. 
-\item Suppose we {\bf only} observe $(X_t)$ from the diffusion process.
-\item Under what conditions can we learn $\theta_{i,j}$ for all $(i,j)$?
-\end{itemize}
-\end{block}
-\end{frame}
 
 %%%%%%%%%%%%%%%%%%%%%
 
@@ -225,7 +222,7 @@ $$(j \in X_{t+1} | X_t) \sim {\cal B} \big(f(X_t \cdot \theta_j) \big)$$
 \end{block}
 
 \begin{block}{MLE}
-For each node $i$, $$\theta_i \in \arg \max {\cal {L}}_i(\theta_i | X_1, X_2, \dots, X_n) - \lambda \|\theta_i\|_1$$
+For each node $i$, $$\theta_i \in \arg \max \frac{1}{n_i}{\cal {L}}_i(\theta_i | X_1, X_2, \dots, X_{n_i}) - \lambda \|\theta_i\|_1$$
 \end{block}
 
 \end{frame}
@@ -243,7 +240,7 @@ For each node $i$, $$\theta_i \in \arg \max {\cal {L}}_i(\theta_i | X_1, X_2, \d
 
 \begin{block}{On $(X_t)$}
 \begin{itemize}
-\item Want ${\cal H}$ be the hessian of ${\cal L}$ with respect to $\theta$ to be ``inversible''
+\item Want ${\cal H}$, the hessian of ${\cal L}$ with respect to $\theta$, to be well-conditioned.
 \item $ n < dim(\theta) \implies {\cal H}$ is degenerate.
 \item {\bf Restricted Eigenvalue condition} = ``almost invertible'' on sparse vectors. 
 \end{itemize}
@@ -257,7 +254,7 @@ For each node $i$, $$\theta_i \in \arg \max {\cal {L}}_i(\theta_i | X_1, X_2, \d
 \frametitle{Restricted Eigenvalue Condition}
 
 \begin{definition}
-Let $S$ be the set of parents of node $i$.
+For a set $S$,
 $${\cal C} := \{ \Delta : \|\Delta\|_2 = 1, \|\Delta_{\bar S}\|_1 \leq 3 \| \Delta_S\|_1 \}$$
 ${\cal H}$ verifies the $(S, \gamma)$-RE condition if:
 $$\forall \Delta \in {\cal C}, \Delta {\cal H} \Delta \geq \gamma$$
@@ -270,9 +267,9 @@ $$\forall \Delta \in {\cal C}, \Delta {\cal H} \Delta \geq \gamma$$
 Adapting a result from \cite{Negahban:2009}, we have the following theorem:
 
 \begin{theorem}
-Assume 
+For node $i$, assume
 \begin{itemize}
-\item the Hessian verifies the $(S,\gamma)$-RE condition
+\item the Hessian verifies the $(S,\gamma)$-RE condition where $S$ is the set of parents of node $i$ (support of $\theta_i$)
 \item $f$ and $1-f$ are log-concave
 \item $|(\log f)'| < \frac{1}{\alpha}$ and $|(\log 1- f)'| < \frac{1}{\alpha}$ 
 \end{itemize} then with high probability:
@@ -311,37 +308,83 @@ By thresholding $\hat \theta_i$, if $n > C' s \log m$, we recover the support of
 \begin{frame}
 \frametitle{Restricted Eigenvalue Condition}
 
-\begin{block}{From Hessian to Expected Hessian}
+\begin{block}{From Hessian to Gram Matrix}
 \begin{itemize}
-\item If $n > C' s^2 \log m$ and $(S, \gamma)$-RE holds for $\mathbb{E}({\cal H})$, then $(S, \gamma/2)$-RE holds for ${\cal H}$
-\item $\mathbb{E}({\cal H})$ only depends on $p_{\text{init}}$ and $(\theta_j)_j$
+\item If $\log f$ and $\log 1 -f$ are strictly log-concave with constant $c$, then if $(S, \gamma)$-RE holds for the gram matrix $\frac{1}{n}X X^T$ , then $(S, c \gamma)$-RE holds for ${\cal H}$
 \end{itemize}
 \end{block}
 
-\begin{block}{From Hessian to Gram Matrix}
+\begin{block}{From Gram Matrix to Expected Gram Matrix}
 \begin{itemize}
-\item If $\log f$ and $\log 1 -f$ are strictly log-concave with constant $c$, then if $(S, \gamma)$-RE holds for $\mathbb{E}({\cal H})$, then $(S, c \gamma)$-RE holds for the gram matrix $X X^T$
-\item Gram Matrix has natural interpretation:
+\item If $n > C' s^2 \log m$ and $(S, \gamma)$-RE holds for $\mathbb{E}(\frac{1}{n}XX^T)$, then $(S, \gamma/2)$-RE holds for $\frac{1}{n}XX^T$ w.h.p
+\item $\mathbb{E}(\frac{1}{n}XX^T)$ only depends on $p_{\text{init}}$ and $(\theta_j)_j$
+\end{itemize}
+\end{block}
+
+\begin{block}{Expected Gram Matrix}
 \begin{itemize}
 \item Diagonal : average number of times node is infected
 \item Outer-diagonal : average number of times pair of nodes is infected {\emph together}
 \end{itemize}
-\end{itemize}
 \end{block}
 
 \end{frame}
 
+%%%%%%%%%%%%%%%%%%%
+\begin{frame}
+\frametitle{Approximate Sparsity}
+\begin{itemize}
+\item $\theta^*_{\lceil s \rceil}$ best s-sparse approximation to $\theta^*$
+\item $\|\theta^* - \theta^*_{\lceil s \rceil} \|_1$: `tail' of $\theta^*$
+\end{itemize}
+\begin{theorem}
+Under similar conditions on $f$ and a relaxed RE condition, there $\exists C_1, C_2>0$ such that with high probability:
+\begin{equation}
+\|\hat \theta_i - \theta^*_i\|_2 \leq C_1 \sqrt{\frac{s\log m}{n}} + C_2 \sqrt[4]{\frac{s\log m}{n}}\|\theta^* - \theta^*_{\lceil s \rceil} \|_1
+\end{equation}
+\end{theorem}
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Lower Bound}
+\begin{itemize}
+\item Under correlation decay assumption for the IC model, ${\Omega}(s \log N/s)$ cascades necessary for graph reconstruction (Netrapalli et Sanghavi SIGMETRICS'12)
+\item Adapting (Price \& Woodruff STOC'12), in the approximately sparse case, any algorithm for any generalized linear cascade model such that:
+$$\|\hat \theta - \theta^*\|_2 \leq C \|\theta^* - \theta^*_{\lfloor s \rfloor}\|_2$$
+requires ${\cal O}(s \log (n/s)/\log C)$ measurement.
+\end{itemize}
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Voter Model}
+\begin{itemize}
+\pause
+\item {\color{red} Red} and {\color{blue} Blue} nodes. At every step, each node $i$ chooses one of its neighbors $j$ with probability $p_{j,i}$ and adopts that color at $t+1$
+\pause
+\item If {\color{blue} Blue} is `contagious' state:
+\pause
+\begin{equation}
+\nonumber
+\mathbb{P}(i \in X^{t+1}|X^t) = \sum_{j \in {\cal N}(i)\cap X^t} p_{ji} = X^t \cdot \theta_i
+\end{equation}
+\end{itemize}
+\end{frame}
+
 %%%%%%%%%%%%%%%%%%%%%%%
 
 \begin{frame}
 \frametitle{Future Work}
 \begin{itemize}
-\item Better lower bounds
-\item Active Learning
 \item Lower bound restricted eigenvalues of expected gram matrix
 \item Confidence Intervals
 \item Show that $n > C' s \log m$ measurements are necessary w.r.t. expected hessian.
 \item Linear Threshold model $\rightarrow$ 1-bit compressed sensing formulation
+\item Better lower bounds
+\item Active Learning
 \end{itemize}
 \end{frame}