aboutsummaryrefslogtreecommitdiffstats
path: root/notes/presentation/beamer_2.tex
diff options
context:
space:
mode:
Diffstat (limited to 'notes/presentation/beamer_2.tex')
-rw-r--r--notes/presentation/beamer_2.tex240
1 files changed, 240 insertions, 0 deletions
diff --git a/notes/presentation/beamer_2.tex b/notes/presentation/beamer_2.tex
new file mode 100644
index 0000000..fda5b8d
--- /dev/null
+++ b/notes/presentation/beamer_2.tex
@@ -0,0 +1,240 @@
+\documentclass[10pt]{beamer}
+
+\usepackage{amssymb, amsmath, graphicx, amsfonts, color, amsthm}
+
+\newtheorem{proposition}{Proposition}
+
+\title{Learning from Diffusion processes}
+\subtitle{What cascades really teach us about networks}
+\author{Jean (John) Pouget-Abadie \\ Joint Work with Thibaut (T-bo) Horel}
+
+\begin{document}
+
+\begin{frame}
+\titlepage
+\end{frame}
+
+\begin{frame}
+\frametitle{Introduction}
+
+%notes: Learn what? the network, the parameters of the diffusion process.
+
+\begin{table}
+\centering
+\begin{tabular}{c | c}
+Network & Diffusion process \\[1ex]
+\hline
+\\
+Airports & Infectious diseases (SARS) \\
+ & Delays (Eyjafjallajökull) \\[3ex]
+Social Network & Infectious diseases (flu) \\
+ & Behaviors (Ice Bucket Challenge) \\[3ex]
+Internet/WWW & Information diffusion (Memes, Pirated content \dots)
+\end{tabular}
+\end{table}
+
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Introduction}
+
+What do we know? What do we want to know?
+
+\begin{itemize}
+\item We know the {\bf airport network} structure. We observe delays. Can we learn how delays propagate?
+\item We (sometimes) know the {\bf social network}. We observe behaviors. Can we learn who influences whom?
+\item Rarely know {\bf blog network}. We observe discussions. Can we learn who learns from whom?
+\end{itemize}
+
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Independent Cascade Model}
+
+\begin{figure}
+\includegraphics[scale=.5]{figures/weighted_graph.png}
+\caption{Weighted, directed graph}
+\end{figure}
+
+\begin{itemize}
+\item Three states: susceptible, {\color{blue} infected}, {\color{red} dead}
+\item Each {\color{blue} infected} node $i$ has a probability $p_{i,j}$ of infecting each of his neighbors $j$.
+\item A node stays {\color{blue} infected} for one round, then it {\color{red} dies}
+\item At $t=0$, each node is {\color{blue} infected} with probability $p_{\text{init}}$
+\end{itemize}
+
+%Notes: Revisit the celebrated independent cascade model -> Influence maximisation is tractable, requires knowledge of weights
+
+\end{frame}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Independent Cascade Model}
+
+\begin{figure}
+\includegraphics[scale=.5]{figures/weighted_graph.png}
+\caption{Weighted, directed graph}
+\end{figure}
+
+\begin{itemize}
+\item If $3$ and $4$ are {\color{blue} infected} at $t$, what is the probability that node $0$ is infected at $t+1$?
+$$1 - \mathbb{P}(\text{not infected}) = 1 - (1 - .45)(1-.04)$$
+\item In general, $X_t$ {\color{blue} infected} nodes at t:
+$$\mathbb{P}(j \text{ becomes infected at t+1}|X_{t}) = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j})$$
+\end{itemize}
+
+
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Independent Cascade Model}
+\begin{proposition}
+The ICC, conditioned on previous time step, can be cast as a {\bf generalized linear model}
+$$\mathbb{P}(j \in X_{t+1} | X_t) = f(X_t \cdot \theta_j)$$
+\end{proposition}
+
+\begin{proof}
+\begin{align}
+\mathbb{P}(j\in X_{t+1}|X_{t}) & = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j}) \\
+& = 1 - \exp \left[ \sum_{i \in {\cal N}(j) \cap X_{t}} \log(1 - p_{i,j}) \right] \\
+& = 1 - \exp \left[ X_{t} \cdot \theta_{j}\right]
+\end{align}
+\end{proof}
+
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Independent Cascade Model}
+\begin{block}{Decomposability}
+\begin{itemize}
+\item Conditioned on $X_t$, the state of each node is sampled independently
+\item We can focus on learning vector $\theta_{j}$ for each node
+\end{itemize}
+\end{block}
+
+\begin{block}{Sparsity}
+\begin{itemize}
+\item $\theta_{i,j} = 0 \Leftrightarrow p_{i,j} = 0$
+\item If graph is ``sparse'', then $\theta_j$ is sparse.
+\end{itemize}
+\end{block}
+\end{frame}
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Sparse Recovery}
+\begin{figure}
+\includegraphics[scale=.6]{../images/sparse_recovery_illustration.pdf}
+\caption{$f(X_t\cdot \theta) = \mathbb{P}(j \in X_{t+1}| X_t)$}
+\end{figure}
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Learning from Diffusion Processes}
+\begin{block}{Problem Statement}
+\begin{itemize}
+\item We are given a graph ${\cal G}$, and a diffusion process parameterized by $\left((\theta_{i,j})_{i,j}, f, p_{\text{init}}\right)$.
+\item Suppose we {\bf only} observe $(X_t)$ from the diffusion process.
+\item Under what conditions can we learn $\theta_{i,j}$ for all $(i,j)$? How many $(X_t)$ are necessary?
+\end{itemize}
+\end{block}
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Learning from Diffusion Processes}
+
+\begin{figure}
+\includegraphics[scale=.4]{../images/sparse_recovery_illustration.pdf}
+\caption{Generalized Linear Model for node $i$}
+\end{figure}
+
+\begin{block}{Likelihood Function}
+$${\cal L}(\theta| X_1, \dots X_N) = \frac{1}{{\cal T}_i} \sum_{t \in {\cal T}_i} x^{t+1}_i \log f(\theta_i \cdot x^t) + (1 - x^{t+1}_i) \log(1 - f(\theta_i \cdot x^t))$$
+\end{block}
+
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Conditions}
+\begin{block}{On $f$}
+\begin{itemize}
+\item $\log f$ and $\log (1-f)$ have to be concave
+\item $\log f$ and $\log (1-f)$ have bounded gradient
+\end{itemize}
+\end{block}
+
+\begin{block}{On $(X_t)$}
+\begin{itemize}
+\item Want ${\cal H}$ be the hessian of ${\cal L}$ with respect to $\theta$ to be ``inversible''
+\item $ n < dim(\theta) \implies {\cal H}$ is degenerate.
+\item {\bf Restricted Eigenvalue condition} = ``almost invertible'' on sparse vectors.
+\end{itemize}
+\end{block}
+
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Restricted Eigenvalue Condition}
+
+\begin{definition}
+Let $S$ be the set of parents of node $i$.
+$${\cal C} := \{ \Delta : \|\Delta\|_2 = 1, \|\Delta_{\bar S}\| \leq 3 \| \Delta_S\|_1 \}$$
+${\cal H}$ verifies the $(S, \gamma)$-RE condition if:
+$$\forall X \in {\cal C}, \Delta {\cal H} \Delta \geq \gamma$$
+\end{definition}
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Main Result}
+Adapting a result from \cite{Negahban:2009}, we have the following theorem:
+
+\begin{theorem}
+Assume
+\begin{itemize}
+\item the Hessian verifies the $(S,\gamma)$-RE condition
+\item $|(\log f)'| < \frac{1}{\alpha}$ and $|(\log 1- f)'| < \frac{1}{\alpha}$
+\end{itemize} then with high probability:
+$$\| \theta^*_i - \hat \theta_i \|_2 \leq \frac{6}{\gamma}\sqrt{\frac{s\log m}{\alpha n}}$$
+\end{theorem}
+
+\begin{corollary}
+By thresholding $\hat \theta_i$, if $n > C' s \log m$, we recover the support of $\theta^*$ and therefore the edges of ${\cal G}$
+\end{corollary}
+
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+Comments: Correlated Measurements
+TODO: still need to mention somewhere that we are doing penalized log likelihood
+condition on $X_t$ is not great, we would like to have condition on the parameters $\theta, p_{\text{init}}$ $->$ Slides about expected hessian
+TODO: slide about matrice de gram!
+\end{frame}
+
+\bibliography{../../paper/sparse}
+\bibliographystyle{apalike}
+
+\end{document} \ No newline at end of file