diff options
Diffstat (limited to 'notes/presentation/beamer_2.tex')
| -rw-r--r-- | notes/presentation/beamer_2.tex | 240 |
1 files changed, 240 insertions, 0 deletions
diff --git a/notes/presentation/beamer_2.tex b/notes/presentation/beamer_2.tex new file mode 100644 index 0000000..fda5b8d --- /dev/null +++ b/notes/presentation/beamer_2.tex @@ -0,0 +1,240 @@ +\documentclass[10pt]{beamer} + +\usepackage{amssymb, amsmath, graphicx, amsfonts, color, amsthm} + +\newtheorem{proposition}{Proposition} + +\title{Learning from Diffusion processes} +\subtitle{What cascades really teach us about networks} +\author{Jean (John) Pouget-Abadie \\ Joint Work with Thibaut (T-bo) Horel} + +\begin{document} + +\begin{frame} +\titlepage +\end{frame} + +\begin{frame} +\frametitle{Introduction} + +%notes: Learn what? the network, the parameters of the diffusion process. + +\begin{table} +\centering +\begin{tabular}{c | c} +Network & Diffusion process \\[1ex] +\hline +\\ +Airports & Infectious diseases (SARS) \\ + & Delays (Eyjafjallajökull) \\[3ex] +Social Network & Infectious diseases (flu) \\ + & Behaviors (Ice Bucket Challenge) \\[3ex] +Internet/WWW & Information diffusion (Memes, Pirated content \dots) +\end{tabular} +\end{table} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Introduction} + +What do we know? What do we want to know? + +\begin{itemize} +\item We know the {\bf airport network} structure. We observe delays. Can we learn how delays propagate? +\item We (sometimes) know the {\bf social network}. We observe behaviors. Can we learn who influences whom? +\item Rarely know {\bf blog network}. We observe discussions. Can we learn who learns from whom? +\end{itemize} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Independent Cascade Model} + +\begin{figure} +\includegraphics[scale=.5]{figures/weighted_graph.png} +\caption{Weighted, directed graph} +\end{figure} + +\begin{itemize} +\item Three states: susceptible, {\color{blue} infected}, {\color{red} dead} +\item Each {\color{blue} infected} node $i$ has a probability $p_{i,j}$ of infecting each of his neighbors $j$. +\item A node stays {\color{blue} infected} for one round, then it {\color{red} dies} +\item At $t=0$, each node is {\color{blue} infected} with probability $p_{\text{init}}$ +\end{itemize} + +%Notes: Revisit the celebrated independent cascade model -> Influence maximisation is tractable, requires knowledge of weights + +\end{frame} + + +%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Independent Cascade Model} + +\begin{figure} +\includegraphics[scale=.5]{figures/weighted_graph.png} +\caption{Weighted, directed graph} +\end{figure} + +\begin{itemize} +\item If $3$ and $4$ are {\color{blue} infected} at $t$, what is the probability that node $0$ is infected at $t+1$? +$$1 - \mathbb{P}(\text{not infected}) = 1 - (1 - .45)(1-.04)$$ +\item In general, $X_t$ {\color{blue} infected} nodes at t: +$$\mathbb{P}(j \text{ becomes infected at t+1}|X_{t}) = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j})$$ +\end{itemize} + + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Independent Cascade Model} +\begin{proposition} +The ICC, conditioned on previous time step, can be cast as a {\bf generalized linear model} +$$\mathbb{P}(j \in X_{t+1} | X_t) = f(X_t \cdot \theta_j)$$ +\end{proposition} + +\begin{proof} +\begin{align} +\mathbb{P}(j\in X_{t+1}|X_{t}) & = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j}) \\ +& = 1 - \exp \left[ \sum_{i \in {\cal N}(j) \cap X_{t}} \log(1 - p_{i,j}) \right] \\ +& = 1 - \exp \left[ X_{t} \cdot \theta_{j}\right] +\end{align} +\end{proof} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Independent Cascade Model} +\begin{block}{Decomposability} +\begin{itemize} +\item Conditioned on $X_t$, the state of each node is sampled independently +\item We can focus on learning vector $\theta_{j}$ for each node +\end{itemize} +\end{block} + +\begin{block}{Sparsity} +\begin{itemize} +\item $\theta_{i,j} = 0 \Leftrightarrow p_{i,j} = 0$ +\item If graph is ``sparse'', then $\theta_j$ is sparse. +\end{itemize} +\end{block} +\end{frame} + + + +%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Sparse Recovery} +\begin{figure} +\includegraphics[scale=.6]{../images/sparse_recovery_illustration.pdf} +\caption{$f(X_t\cdot \theta) = \mathbb{P}(j \in X_{t+1}| X_t)$} +\end{figure} +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Learning from Diffusion Processes} +\begin{block}{Problem Statement} +\begin{itemize} +\item We are given a graph ${\cal G}$, and a diffusion process parameterized by $\left((\theta_{i,j})_{i,j}, f, p_{\text{init}}\right)$. +\item Suppose we {\bf only} observe $(X_t)$ from the diffusion process. +\item Under what conditions can we learn $\theta_{i,j}$ for all $(i,j)$? How many $(X_t)$ are necessary? +\end{itemize} +\end{block} +\end{frame} + +%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Learning from Diffusion Processes} + +\begin{figure} +\includegraphics[scale=.4]{../images/sparse_recovery_illustration.pdf} +\caption{Generalized Linear Model for node $i$} +\end{figure} + +\begin{block}{Likelihood Function} +$${\cal L}(\theta| X_1, \dots X_N) = \frac{1}{{\cal T}_i} \sum_{t \in {\cal T}_i} x^{t+1}_i \log f(\theta_i \cdot x^t) + (1 - x^{t+1}_i) \log(1 - f(\theta_i \cdot x^t))$$ +\end{block} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Conditions} +\begin{block}{On $f$} +\begin{itemize} +\item $\log f$ and $\log (1-f)$ have to be concave +\item $\log f$ and $\log (1-f)$ have bounded gradient +\end{itemize} +\end{block} + +\begin{block}{On $(X_t)$} +\begin{itemize} +\item Want ${\cal H}$ be the hessian of ${\cal L}$ with respect to $\theta$ to be ``inversible'' +\item $ n < dim(\theta) \implies {\cal H}$ is degenerate. +\item {\bf Restricted Eigenvalue condition} = ``almost invertible'' on sparse vectors. +\end{itemize} +\end{block} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Restricted Eigenvalue Condition} + +\begin{definition} +Let $S$ be the set of parents of node $i$. +$${\cal C} := \{ \Delta : \|\Delta\|_2 = 1, \|\Delta_{\bar S}\| \leq 3 \| \Delta_S\|_1 \}$$ +${\cal H}$ verifies the $(S, \gamma)$-RE condition if: +$$\forall X \in {\cal C}, \Delta {\cal H} \Delta \geq \gamma$$ +\end{definition} +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Main Result} +Adapting a result from \cite{Negahban:2009}, we have the following theorem: + +\begin{theorem} +Assume +\begin{itemize} +\item the Hessian verifies the $(S,\gamma)$-RE condition +\item $|(\log f)'| < \frac{1}{\alpha}$ and $|(\log 1- f)'| < \frac{1}{\alpha}$ +\end{itemize} then with high probability: +$$\| \theta^*_i - \hat \theta_i \|_2 \leq \frac{6}{\gamma}\sqrt{\frac{s\log m}{\alpha n}}$$ +\end{theorem} + +\begin{corollary} +By thresholding $\hat \theta_i$, if $n > C' s \log m$, we recover the support of $\theta^*$ and therefore the edges of ${\cal G}$ +\end{corollary} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +Comments: Correlated Measurements +TODO: still need to mention somewhere that we are doing penalized log likelihood +condition on $X_t$ is not great, we would like to have condition on the parameters $\theta, p_{\text{init}}$ $->$ Slides about expected hessian +TODO: slide about matrice de gram! +\end{frame} + +\bibliography{../../paper/sparse} +\bibliographystyle{apalike} + +\end{document}
\ No newline at end of file |
