diff options
Diffstat (limited to 'notes/presentation/beamer.tex')
| -rw-r--r-- | notes/presentation/beamer.tex | 381 |
1 files changed, 0 insertions, 381 deletions
diff --git a/notes/presentation/beamer.tex b/notes/presentation/beamer.tex deleted file mode 100644 index 7d4b5c1..0000000 --- a/notes/presentation/beamer.tex +++ /dev/null @@ -1,381 +0,0 @@ -\documentclass[10pt]{beamer} - -\usepackage{amssymb, amsmath, graphicx, amsfonts, color} - -\title{Estimating a Graph's Parameters from Cascades} -\author{Jean (John) Pouget-Abadie \\ Joint Work with Thibaut (T-bo) Horel} -\date{} - -\begin{document} - -\begin{frame} -\titlepage -\end{frame} - -\begin{frame} -\frametitle{Example} -\begin{figure} -\includegraphics[scale=.25]{../images/drawing.pdf} -\caption{A network} -\end{figure} -\end{frame} - -\begin{frame} -\frametitle{Example} -\begin{figure} -\includegraphics[scale=.25]{../images/noedges_step1.pdf} -\caption{Cascade 1: $t=0$} -\end{figure} -\end{frame} - -\begin{frame} -\frametitle{Example} -\begin{figure} -\includegraphics[scale=.25]{../images/noedges_step2.pdf} -\caption{Cascade 1: $t=1$} -\end{figure} -\end{frame} - -\begin{frame} -\frametitle{Example} -\begin{figure} -\includegraphics[scale=.25]{../images/noedges_step3.pdf} -\caption{Cascade 1: $t=2$} -\end{figure} -\end{frame} - -\begin{frame} -\frametitle{Example} -\begin{figure} -\includegraphics[scale=.25]{../images/noedges_step1_cascade2} -\caption{Cascade 2: $t=0$} -\end{figure} -\end{frame} - -\begin{frame} -\frametitle{Example} -\begin{figure} -\includegraphics[scale=.25]{../images/noedges_step2_cascade2} -\caption{Cascade 2: $t=1$} -\end{figure} -\end{frame} - -\begin{frame} -\frametitle{Example} -\begin{figure} -\includegraphics[scale=.25]{../images/noedges_step3_cascade2} -\caption{Cascade 2: $t=2$} -\end{figure} -\end{frame} - - -\begin{frame} -\frametitle{Context} - -Notation: -\begin{itemize} -\item $({\cal G}, \theta)$ : graph, parameters -\item Cascade: diffusion process of a `behavior' on $({\cal G}, \theta)$ -\item $(X_t)_c$ : set of `active' nodes at time t for cascade $c$ -\end{itemize} - - -\begin{table} -\begin{tabular}{c c} -Graph $\implies$ Cascades & Cascades $\implies$ Graph \\ \hline -$({\cal G}, \theta)$ is known & $(X_t)_c$ is observed \\ -Predict $(X_t) | X_0$ & Recover $({\cal G}, \theta)$ \\ -\end{tabular} -\end{table} - -Summary: -\begin{itemize} -\item Many algorithms \emph{require} knowledge of $({\cal G}, \theta)$ -\item {\bf Graph Inference} is the problem of \emph{learning} $({\cal G}, \theta)$ -\end{itemize} -\end{frame} - -\begin{frame} -\begin{block}{Decomposability} -Learning the graph $\Leftrightarrow$ Learning the parents of a single node -\end{block} -\end{frame} - - -\begin{frame} -\frametitle{Problem Statement} -\begin{itemize} -\pause -\item Can we learn ${\cal G}$ from $(X_t)_c$? -\pause -\item How many cascades? How many steps in each cascade? -\pause -\item Can we learn $\theta$ from $(X_t)_c$? -\pause -\item How does the error decrease with $n_{\text{cascades}}$? -\pause -\item Are there graphs which are easy to learn? Harder to learn? -\pause -\item What kind of diffusion processes can we consider? -\pause -\item What is the minimal number of cascades required to learn $({\cal G}, \theta)$? -\end{itemize} -\end{frame} - -\begin{frame} -\frametitle{Notation} -\begin{itemize} -\item n : number of measurements -\item N : number of cascades -\item m : number of nodes -\item s : degree of node considered -\end{itemize} -\end{frame} - - -\begin{frame} -\frametitle{Related Work} - -\begin{itemize} -\pause -\item Can we learn ${\cal G}$ from $(X_t)_c$? -\pause -\\{\color{blue} Yes} -\pause -\item How many cascades? How many steps in each cascade? -\pause -\\ {\color{blue} poly(s)$ \log m$ cascades} -\pause -\item Can we learn $\theta$ from $(X_t)_c$? -\pause -\\ {\color{blue} (?)} -\pause -\item How does the error decrease with $n_{\text{cascades}}$? -\pause -\\ {\color{blue} (?)} -\pause -\item Are there graphs which are easy to learn? Harder to learn? -\pause -\\{\color{blue} Sparse Graphs are easy} -\pause -\item What kind of diffusion processes can we consider? -\pause -\\{\color{blue} IC Model (discrete and continuous)} -\pause -\item What is the minimal number of cascades required to learn $({\cal G}, \theta)$? -\pause -\\{\color{blue} (?)\dots$s \log m/s$ in specific setting} -\end{itemize} -\end{frame} - - - -\begin{frame} -\frametitle{Our Work} -\begin{itemize} -\pause -\item Can we learn ${\cal G}$ from $(X_t)_c$? -\pause -\\{\color{blue} Yes} $\rightarrow$ {\color{red} Yes} -\pause -\item How many cascades? How many steps in each cascade? -\pause -\\ {\color{blue} poly(s)$ \log m$ cascades} $\rightarrow$ {\color{red} $s\log m$ measurements} -\pause -\item Can we learn $\theta$ from $(X_t)_c$? -\pause -\\ {\color{blue} (?)} $\rightarrow$ {\color{red} Yes!} -\pause -\item How does the error decrease with $n_{\text{cascades}}$? -\pause -\\ {\color{blue} (?)} $\rightarrow$ {\color{red} ${\cal O}(\sqrt{s\log m/n})$} -\pause -\item Are there graphs which are easy to learn? Harder to learn? -\pause -\\ {\color{blue} Sparse Graphs are easy} $\rightarrow$ {\color{red} Approx. sparsity is also easy} -\pause -\item What kind of diffusion processes can we consider? -\pause -\\ {\color{blue} IC Model (discrete, continuous)} $\rightarrow$ {\color{red} Large class of Cascade Models} -\pause -\item What is the minimal number of cascades required to learn $({\cal G}, \theta)$? -\pause -\\{\color{blue} $s \log m/s$ in specific setting} $\rightarrow$ {\color{red} $s \log m/s$ for approx. sparse graphs} -\end{itemize} - -\end{frame} - -\begin{frame} -\frametitle{Voter Model} -\begin{itemize} -\pause -\item {\color{red} Red} and {\color{blue} Blue} nodes. At every step, each node $i$ chooses one of its neighbors $j$ with probability $p_{j,i}$ and adopts that color at $t+1$ -\pause -\item If {\color{blue} Blue} is `contagious' state: -\pause -\begin{equation} -\nonumber -\mathbb{P}(i \in X^{t+1}|X^t) = \sum_{j \in {\cal N}(i)\cap X^t} p_{ji} = X^t \cdot \theta_i -\end{equation} -\end{itemize} -\end{frame} - -\begin{frame} -\frametitle{Independent Cascade Model} -\begin{itemize} -\pause -\item Each `infected' node $i$ has a probability $p_{i,j}$ of infecting each of his neighbors $j$. -\pause -\item A node stays `infected' for a single turn. Then it becomes `inactive'. -$$\mathbb{P}(j \text{ becomes infected at t+1}|X_{t}) = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j})$$ -\end{itemize} -\end{frame} - -\begin{frame} -\frametitle{Independent Cascade Model} -\begin{align} -\mathbb{P}(j\in X_{t+1}|X_{t}) & = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j}) \\ -& = 1 - \exp \left[ \sum_{i \in {\cal N}(j) \cap X_{t}} \log(1 - p_{i,j}) \right] \\ -& = 1 - \exp \left[ X_{t} \cdot \theta_{i,j}\right] -\end{align} - -where $\theta_{i,j} := \log (1 - p_{i,j})$ and $\theta_i := (\theta_{i,j})_j$ -\\[5ex] -\begin{itemize} -\item Support of $\vec \theta$ $\Leftrightarrow$ support of $\vec p$ -\end{itemize} -\end{frame} - -\begin{frame} -\frametitle{Model Comparison} -\begin{table} -\centering -\begin{tabular}{c | c} -Voter Model & Indep. Casc. Model \\[1ex] -\hline \\[.1ex] -Markov & Markov \\[3ex] -Indep. prob. of $\in X^{t+1} | X^t$ & Indep. prob. of $\in X^{t+1} | X^t$ \\[3ex] -$\mathbb{P}(j\in X_{t+1}|X_{t}) = X_{t} \cdot \theta_{i}$ & $\mathbb{P}(j\in X_{t+1}|X_{t}) = 1 - \exp(X_{t} \cdot \theta_{i})$ \\[3ex] -Always Susceptible & Susceptible until infected \\ -\includegraphics[scale=.4]{../images/voter_model.pdf} & \includegraphics[scale=.3]{../images/icc_model.pdf} \\ -\end{tabular} -\end{table} -\end{frame} - -\begin{frame} -\frametitle{Generalized Linear Cascade Models} -\begin{definition} -{\bf Generalized Linear Cascade Model} with inverse link function $f : \mathbb{R} \rightarrow [0,1]$: -\begin{itemize} -\item for each \emph{susceptible} node $j$ in state $s$ at $t$, $\mathbb{P}(j \in X^{t+1}|X^t)$ is a Bernoulli of parameter $f(\theta_j \cdot X^t)$ -\end{itemize} -\end{definition} -\end{frame} - -\begin{frame} -\frametitle{Sparse Recovery} -\begin{figure} -\includegraphics[scale=.6]{../images/sparse_recovery_illustration.pdf} -\caption{$f(X\cdot\theta) = b$} -\end{figure} -\end{frame} - - -\begin{frame} -\frametitle{$\ell1$ penalized Maximum Likelihood} -\begin{itemize} -\item Decomposable node by node -\item Sum over susceptible steps -\end{itemize} - -\begin{block}{Likelihood function} -\begin{equation} -\nonumber -{\cal L}(\theta| x^1, \dots x^n) = \frac{1}{{\cal T}_i} \sum_{t \in {\cal T}_i} x^{t+1}_i \log f(\theta_i \cdot x^t) + (1 - x^{t+1}_i) \log(1 - f(\theta_i \cdot x^t)) -\end{equation} -\end{block} - -\begin{block}{Algorithm} -\begin{equation} -\nonumber -\theta \in \arg \max_\theta {\cal L}(\theta| x^1, \dots x^n) - \lambda \|\theta\|_1 -\end{equation} -\end{block} - -\end{frame} - -\begin{frame} -\frametitle{Main Result} -\begin{theorem} -Assume condition on the Hessian and certain regularity properties on $f$, then $\exists C>0$ depending only on the properties of the ${\cal G}$, with high probability: -$$\| \theta^*_i - \hat \theta_i \|_2 \leq C\sqrt{\frac{s\log m}{n}}$$ -\end{theorem} - -\begin{corollary} -By thresholding $\hat \theta_i$, if $n > C' s \log m$, we recover the support of $\theta^*$ and therefore the edges of ${\cal G}$ -\end{corollary} - -\end{frame} - -\begin{frame} -\frametitle{Approximate Sparsity} -\begin{itemize} -\item $\theta^*_{\lceil s \rceil}$ best s-sparse approximation to $\theta^*$ -\item $\|\theta^* - \theta^*_{\lceil s \rceil} \|_1$: `tail' of $\theta^*$ -\end{itemize} -\begin{theorem} -Assume condition on Hessian and certain regularity properties on $f$, then $\exists C_1, C_2>0$ depending only on the properties of ${\cal G}$, with high probability: -\begin{equation} -\|\hat \theta_i - \theta^*_i\|_2 \leq C_1 \sqrt{\frac{s\log m}{n}} + C_2 \sqrt[4]{\frac{s\log m}{n}}\|\theta^* - \theta^*_{\lceil s \rceil} \|_1 -\end{equation} -\end{theorem} -\end{frame} - -\begin{frame} -\frametitle{Lower Bound} -\begin{itemize} -\item Under correlation decay assumption for the IC model, ${\Omega}(s \log N/s)$ cascades necessary for graph reconstruction (Netrapalli et Sanghavi SIGMETRICS'12) -\item Adapting (Price \& Woodruff STOC'12), in the approximately sparse case, any algorithm for any generalized linear cascade model such that: -$$\|\hat \theta - \theta^*\|_2 \leq C \|\theta^* - \theta^*_{\lfloor s \rfloor}\|_2$$ -requires ${\cal O}(s \log (n/s)/\log C)$ measurement. -\end{itemize} -\end{frame} - -\begin{frame} -\frametitle{(RE) assumptions} -\begin{block}{Assumption on Hessian} -\begin{itemize} -\item -Hessian has to verify a `restricted eigenvalue property' i.e smallest eigenvalue on sparse vectors is away from $0$. -\end{itemize} -\end{block} - -\begin{block}{From Hessian to Gram Matrix} -\begin{itemize} -\item $\mathbb{E}[X X^T]$ : `expected' Gram matrix of observations -\item $\mathbb{E}[X X^T]_{i,i}$ : $\mathbb{P}$ that node $i$ is infected -\item $\mathbb{E}[X X^T]_{i,j}$ : $\mathbb{P} $that node $i$ and node $j$ are infected simultaneously -\end{itemize} -\end{block} -\end{frame} - -\begin{frame} -\frametitle{Future Work} - -\begin{block}{Linear Threshold Model} -\begin{itemize} -\item Linear threshold model is a generalized linear cascade, with non-differential inverse link function. $$\mathbb{P}(j \in X^{t+1}|X^t) = sign(\theta_j \cdot X^t - t_j)$$ -\end{itemize} -\end{block} - -\begin{block}{Noisy Influence Maximization} -\end{block} - -\begin{block}{Confidence Intervals} -\end{block} - -\begin{block}{Active Learning} -\end{block} -\end{frame} - -\end{document} |
