1 files changed, 0 insertions, 381 deletions
diff --git a/notes/presentation/beamer.tex b/notes/presentation/beamer.tex
deleted file mode 100644
index 7d4b5c1..0000000
--- a/notes/presentation/beamer.tex
+++ /dev/null
@@ -1,381 +0,0 @@
-\documentclass[10pt]{beamer}
-
-\usepackage{amssymb, amsmath, graphicx, amsfonts, color}
-
-\title{Estimating a Graph's Parameters from Cascades}
-\author{Jean (John) Pouget-Abadie \\ Joint Work with Thibaut (T-bo) Horel}
-\date{}
-
-\begin{document}
-
-\begin{frame}
-\titlepage
-\end{frame}
-
-\begin{frame}
-\frametitle{Example}
-\begin{figure}
-\includegraphics[scale=.25]{../images/drawing.pdf}
-\caption{A network}
-\end{figure}
-\end{frame}
-
-\begin{frame}
-\frametitle{Example}
-\begin{figure}
-\includegraphics[scale=.25]{../images/noedges_step1.pdf}
-\caption{Cascade 1: $t=0$}
-\end{figure}
-\end{frame}
-
-\begin{frame}
-\frametitle{Example}
-\begin{figure}
-\includegraphics[scale=.25]{../images/noedges_step2.pdf}
-\caption{Cascade 1: $t=1$}
-\end{figure}
-\end{frame}
-
-\begin{frame}
-\frametitle{Example}
-\begin{figure}
-\includegraphics[scale=.25]{../images/noedges_step3.pdf}
-\caption{Cascade 1: $t=2$}
-\end{figure}
-\end{frame}
-
-\begin{frame}
-\frametitle{Example}
-\begin{figure}
-\includegraphics[scale=.25]{../images/noedges_step1_cascade2}
-\caption{Cascade 2: $t=0$}
-\end{figure}
-\end{frame}
-
-\begin{frame}
-\frametitle{Example}
-\begin{figure}
-\includegraphics[scale=.25]{../images/noedges_step2_cascade2}
-\caption{Cascade 2: $t=1$}
-\end{figure}
-\end{frame}
-
-\begin{frame}
-\frametitle{Example}
-\begin{figure}
-\includegraphics[scale=.25]{../images/noedges_step3_cascade2}
-\caption{Cascade 2: $t=2$}
-\end{figure}
-\end{frame}
-
-
-\begin{frame}
-\frametitle{Context}
-
-Notation:
-\begin{itemize}
-\item $({\cal G}, \theta)$ : graph, parameters
-\item Cascade: diffusion process of a `behavior' on $({\cal G}, \theta)$
-\item $(X_t)_c$ : set of `active' nodes at time t for cascade $c$
-\end{itemize}
-
-
-\begin{table}
-\begin{tabular}{c c}
-Graph $\implies$ Cascades & Cascades $\implies$ Graph \\ \hline
-$({\cal G}, \theta)$ is known & $(X_t)_c$ is observed \\
-Predict $(X_t) | X_0$ & Recover $({\cal G}, \theta)$ \\
-\end{tabular}
-\end{table}
-
-Summary:
-\begin{itemize}
-\item Many algorithms \emph{require} knowledge of $({\cal G}, \theta)$
-\item {\bf Graph Inference} is the problem of \emph{learning} $({\cal G}, \theta)$
-\end{itemize}
-\end{frame}
-
-\begin{frame}
-\begin{block}{Decomposability}
-Learning the graph $\Leftrightarrow$ Learning the parents of a single node
-\end{block}
-\end{frame}
-
-
-\begin{frame}
-\frametitle{Problem Statement}
-\begin{itemize}
-\pause
-\item Can we learn ${\cal G}$ from $(X_t)_c$?
-\pause
-\item How many cascades? How many steps in each cascade?
-\pause
-\item Can we learn $\theta$ from $(X_t)_c$?
-\pause
-\item How does the error decrease with $n_{\text{cascades}}$?
-\pause
-\item Are there graphs which are easy to learn? Harder to learn?
-\pause
-\item What kind of diffusion processes can we consider?
-\pause
-\item What is the minimal number of cascades required to learn $({\cal G}, \theta)$?
-\end{itemize}
-\end{frame}
-
-\begin{frame}
-\frametitle{Notation}
-\begin{itemize}
-\item n : number of measurements
-\item N : number of cascades
-\item m : number of nodes
-\item s : degree of node considered
-\end{itemize}
-\end{frame}
-
-
-\begin{frame}
-\frametitle{Related Work}
-
-\begin{itemize}
-\pause
-\item Can we learn ${\cal G}$ from $(X_t)_c$?
-\pause 
-\\{\color{blue} Yes}
-\pause
-\item How many cascades? How many steps in each cascade? 
-\pause
-\\ {\color{blue} poly(s)$ \log m$ cascades}
-\pause
-\item Can we learn $\theta$ from $(X_t)_c$? 
-\pause
-\\ {\color{blue} (?)}
-\pause
-\item How does the error decrease with $n_{\text{cascades}}$?
-\pause
-\\ {\color{blue} (?)}
-\pause
-\item Are there graphs which are easy to learn? Harder to learn?
-\pause
-\\{\color{blue} Sparse Graphs are easy}
-\pause
-\item What kind of diffusion processes can we consider?
-\pause
-\\{\color{blue} IC Model (discrete and continuous)}
-\pause
-\item What is the minimal number of cascades required to learn $({\cal G}, \theta)$? 
-\pause
-\\{\color{blue} (?)\dots$s \log m/s$ in specific setting}
-\end{itemize}
-\end{frame}
-
-
-
-\begin{frame}
-\frametitle{Our Work}
-\begin{itemize}
-\pause
-\item Can we learn ${\cal G}$ from $(X_t)_c$? 
-\pause
-\\{\color{blue} Yes} $\rightarrow$ {\color{red} Yes}
-\pause
-\item How many cascades? How many steps in each cascade? 
-\pause
-\\ {\color{blue} poly(s)$ \log m$ cascades} $\rightarrow$ {\color{red}   $s\log m$ measurements}
-\pause
-\item Can we learn $\theta$ from $(X_t)_c$? 
-\pause
-\\ {\color{blue} (?)} $\rightarrow$ {\color{red} Yes!}
-\pause
-\item How does the error decrease with $n_{\text{cascades}}$?  
-\pause
-\\ {\color{blue} (?)} $\rightarrow$ {\color{red} ${\cal O}(\sqrt{s\log m/n})$}
-\pause
-\item Are there graphs which are easy to learn? Harder to learn? 
-\pause
-\\ {\color{blue} Sparse Graphs are easy} $\rightarrow$ {\color{red} Approx. sparsity is also easy}
-\pause
-\item What kind of diffusion processes can we consider? 
-\pause
-\\ {\color{blue} IC Model (discrete, continuous)} $\rightarrow$ {\color{red} Large class of Cascade Models}
-\pause
-\item What is the minimal number of cascades required to learn $({\cal G}, \theta)$? 
-\pause
-\\{\color{blue} $s \log m/s$ in specific setting} $\rightarrow$ {\color{red} $s \log m/s$ for approx. sparse graphs}
-\end{itemize}
-
-\end{frame}
-
-\begin{frame}
-\frametitle{Voter Model}
-\begin{itemize}
-\pause
-\item {\color{red} Red} and {\color{blue} Blue} nodes. At every step, each node $i$ chooses one of its neighbors $j$ with probability $p_{j,i}$ and adopts that color at $t+1$
-\pause
-\item If {\color{blue} Blue} is `contagious' state:
-\pause
-\begin{equation}
-\nonumber
-\mathbb{P}(i \in X^{t+1}|X^t) = \sum_{j \in {\cal N}(i)\cap X^t} p_{ji} = X^t \cdot \theta_i
-\end{equation}
-\end{itemize}
-\end{frame}
-
-\begin{frame}
-\frametitle{Independent Cascade Model}
-\begin{itemize}
-\pause
-\item Each `infected' node $i$ has a probability $p_{i,j}$ of infecting each of his neighbors $j$.
-\pause
-\item A node stays `infected' for a single turn. Then it becomes `inactive'.
-$$\mathbb{P}(j \text{ becomes infected at t+1}|X_{t}) = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j})$$
-\end{itemize}
-\end{frame}
-
-\begin{frame}
-\frametitle{Independent Cascade Model}
-\begin{align}
-\mathbb{P}(j\in X_{t+1}|X_{t}) & = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j}) \\
-& = 1 - \exp \left[ \sum_{i \in {\cal N}(j) \cap X_{t}} \log(1 - p_{i,j}) \right] \\
-& = 1 - \exp \left[ X_{t} \cdot \theta_{i,j}\right]
-\end{align}
-
-where $\theta_{i,j} := \log (1 - p_{i,j})$ and $\theta_i := (\theta_{i,j})_j$
-\\[5ex]
-\begin{itemize}
-\item Support of $\vec \theta$ $\Leftrightarrow$ support of $\vec p$
-\end{itemize}
-\end{frame}
-
-\begin{frame}
-\frametitle{Model Comparison}
-\begin{table}
-\centering
-\begin{tabular}{c | c}
-Voter Model & Indep. Casc. Model \\[1ex]
-\hline \\[.1ex]
-Markov & Markov \\[3ex]
-Indep. prob. of $\in X^{t+1} | X^t$ & Indep. prob. of $\in X^{t+1} | X^t$  \\[3ex]
-$\mathbb{P}(j\in X_{t+1}|X_{t}) = X_{t} \cdot \theta_{i}$ & $\mathbb{P}(j\in X_{t+1}|X_{t}) = 1 - \exp(X_{t} \cdot \theta_{i})$ \\[3ex]
-Always Susceptible & Susceptible until infected \\
-\includegraphics[scale=.4]{../images/voter_model.pdf} & \includegraphics[scale=.3]{../images/icc_model.pdf} \\
-\end{tabular}
-\end{table}
-\end{frame}
-
-\begin{frame}
-\frametitle{Generalized Linear Cascade Models}
-\begin{definition}
-{\bf Generalized Linear Cascade Model} with inverse link function $f : \mathbb{R} \rightarrow [0,1]$:
-\begin{itemize}
-\item for each \emph{susceptible} node $j$ in state $s$ at $t$, $\mathbb{P}(j \in X^{t+1}|X^t)$ is a Bernoulli of parameter $f(\theta_j \cdot X^t)$
-\end{itemize}
-\end{definition}
-\end{frame}
-
-\begin{frame}
-\frametitle{Sparse Recovery}
-\begin{figure}
-\includegraphics[scale=.6]{../images/sparse_recovery_illustration.pdf}
-\caption{$f(X\cdot\theta) = b$}
-\end{figure}
-\end{frame}
-
-
-\begin{frame}
-\frametitle{$\ell1$ penalized Maximum Likelihood}
-\begin{itemize}
-\item Decomposable node by node
-\item Sum over susceptible steps
-\end{itemize}
-
-\begin{block}{Likelihood function}
-\begin{equation}
-\nonumber
-{\cal L}(\theta| x^1, \dots x^n) =  \frac{1}{{\cal T}_i}  \sum_{t \in {\cal T}_i} x^{t+1}_i \log f(\theta_i \cdot x^t) + (1 - x^{t+1}_i) \log(1 - f(\theta_i \cdot x^t))
-\end{equation}
-\end{block}
-
-\begin{block}{Algorithm}
-\begin{equation}
-\nonumber
-\theta \in \arg \max_\theta {\cal L}(\theta| x^1, \dots x^n) - \lambda \|\theta\|_1
-\end{equation}
-\end{block}
-
-\end{frame}
-
-\begin{frame}
-\frametitle{Main Result}
-\begin{theorem}
-Assume condition on the Hessian and certain regularity properties on $f$, then $\exists C>0$ depending only on the properties of the ${\cal G}$, with high probability:
-$$\| \theta^*_i - \hat \theta_i \|_2 \leq C\sqrt{\frac{s\log m}{n}}$$
-\end{theorem}
-
-\begin{corollary}
-By thresholding $\hat \theta_i$, if $n > C' s \log m$, we recover the support of $\theta^*$ and therefore the edges of ${\cal G}$
-\end{corollary}
-
-\end{frame}
-
-\begin{frame}
-\frametitle{Approximate Sparsity}
-\begin{itemize}
-\item $\theta^*_{\lceil s \rceil}$ best s-sparse approximation to $\theta^*$
-\item $\|\theta^* - \theta^*_{\lceil s \rceil} \|_1$: `tail' of $\theta^*$
-\end{itemize}
-\begin{theorem}
-Assume condition on Hessian and certain regularity properties on $f$, then $\exists C_1, C_2>0$ depending only on the properties of ${\cal G}$, with high probability:
-\begin{equation}
-\|\hat \theta_i - \theta^*_i\|_2 \leq C_1 \sqrt{\frac{s\log m}{n}} + C_2 \sqrt[4]{\frac{s\log m}{n}}\|\theta^* - \theta^*_{\lceil s \rceil} \|_1
-\end{equation}
-\end{theorem}
-\end{frame}
-
-\begin{frame}
-\frametitle{Lower Bound}
-\begin{itemize}
-\item Under correlation decay assumption for the IC model, ${\Omega}(s \log N/s)$ cascades necessary for graph reconstruction (Netrapalli et Sanghavi SIGMETRICS'12)
-\item Adapting (Price \& Woodruff STOC'12), in the approximately sparse case, any algorithm for any generalized linear cascade model such that:
-$$\|\hat \theta - \theta^*\|_2 \leq C \|\theta^* - \theta^*_{\lfloor s \rfloor}\|_2$$
-requires ${\cal O}(s \log (n/s)/\log C)$ measurement.
-\end{itemize}
-\end{frame}
-
-\begin{frame}
-\frametitle{(RE) assumptions}
-\begin{block}{Assumption on Hessian}
-\begin{itemize}
-\item
-Hessian has to verify a `restricted eigenvalue property' i.e smallest eigenvalue on sparse vectors is away from $0$.
-\end{itemize}
-\end{block}
-
-\begin{block}{From Hessian to Gram Matrix}
-\begin{itemize}
-\item $\mathbb{E}[X X^T]$ : `expected' Gram matrix of observations
-\item $\mathbb{E}[X X^T]_{i,i}$ : $\mathbb{P}$ that node $i$ is infected
-\item $\mathbb{E}[X X^T]_{i,j}$ : $\mathbb{P} $that node $i$ and node $j$ are infected simultaneously
-\end{itemize}
-\end{block}
-\end{frame}
-
-\begin{frame}
-\frametitle{Future Work}
-
-\begin{block}{Linear Threshold Model}
-\begin{itemize}
-\item Linear threshold model is a generalized linear cascade, with non-differential inverse link function. $$\mathbb{P}(j \in X^{t+1}|X^t) = sign(\theta_j \cdot X^t - t_j)$$
-\end{itemize}
-\end{block}
-
-\begin{block}{Noisy Influence Maximization}
-\end{block}
-
-\begin{block}{Confidence Intervals}
-\end{block}
-
-\begin{block}{Active Learning}
-\end{block}
-\end{frame}
-
-\end{document}