1 files changed, 0 insertions, 397 deletions
diff --git a/notes/presentation/beamer_2.tex b/notes/presentation/beamer_2.tex
deleted file mode 100644
index ecc66ed..0000000
--- a/notes/presentation/beamer_2.tex
+++ /dev/null
@@ -1,397 +0,0 @@
-\documentclass[10pt]{beamer}
-
-\usepackage{amssymb, amsmath, graphicx, amsfonts, color, amsthm, wasysym}
-
-\newtheorem{proposition}{Proposition}
-
-\title{Learning from Diffusion processes}
-\subtitle{What cascades really teach us about networks}
-\author{Jean (John) Pouget-Abadie \\ Joint Work with Thibaut (T-bo) Horel}
-
-\begin{document}
-
-\begin{frame}
-\titlepage
-\end{frame}
-
-\begin{frame}
-\frametitle{Introduction}
-
-%notes: Learn what? the network, the parameters of the diffusion process.
-
-\begin{table}
-\centering
-\begin{tabular}{c | c}
-Network & Diffusion process \\[1ex]
-\hline
-\\
-Airports & Infectious diseases (SARS) \\
- & Delays (Eyjafjallajökull) \\[3ex]
-Social Network & Infectious diseases (flu) \\
- & Behaviors (Ice Bucket Challenge) \\[3ex]
-Internet/WWW & Information diffusion (Memes, Pirated content \dots)
-\end{tabular}
-\end{table}
-
-\end{frame}
-
-%%%%%%%%%%%%%%%%%%%%%%%
-
-\begin{frame}
-\frametitle{Introduction}
-
-What do we know? What do we want to know?
-
-\begin{itemize}
-\item We know the {\bf airport network} structure. We observe delays. Can we learn how delays propagate?
-\item  We (sometimes) know the {\bf social network}. We observe behaviors. Can we learn who influences whom?
-\item Rarely know {\bf blog network}. We observe discussions. Can we learn who learns from whom?
-\end{itemize}
-
-\end{frame}
-
-%%%%%%%%%%%%%%%%%%%%%%%
-
-\begin{frame}
-\frametitle{Independent Cascade Model}
-
-\begin{figure}
-\includegraphics[scale=.3]{figures/weighted_graph.png}
-\caption{Weighted, directed graph}
-\end{figure}
-
-\begin{itemize}
-\item At $t=0$, nodes are in three possible states: susceptible, {\color{blue} infected}, {\color{red} dead}
-\pause
-\item At time step $t$, each {\color{blue} infected} node $i$ has a ``one-shot'' probability $p_{i,j}$ of infecting each of his susceptible neighbors $j$ at $t+1$.
-\pause
-\item A node stays {\color{blue}  infected} for one round, then it {\color{red} dies}
-\pause
-\item At $t=0$, each node is {\color{blue} infected} with probability $p_{\text{init}}$
-\pause
-\item Process continues until random time $T$ when no more nodes can become infected. 
-\pause
-\item $X_t$: set of {\color{blue}  infected} nodes at time $t$
-\pause
-\item A {\bf cascade} is an instance of the ICC model: $(X_t)_{t=0, t=T}$
-\end{itemize}
-
-%Notes: Revisit the celebrated independent cascade model -> Influence maximisation is tractable, requires knowledge of weights
-
-\end{frame}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\begin{frame}
-\frametitle{Independent Cascade Model}
-
-\begin{figure}
-\includegraphics[scale=.5]{figures/weighted_graph.png}
-\caption{Weighted, directed graph}
-\end{figure}
-
-\begin{block}{Example}
-\begin{itemize}
-\item At $t=0$, the {\color{orange} orange} node is infected, and the two other nodes are susceptible. $X_0 = $({\color{orange} orange})
-\item At $t=1$, the {\color{orange}} node infects the {\color{blue} blue} node and fails to infect the {\color{green} green} node. The {\color{orange} orange} node dies. $X_1 = $({\color{blue} blue})
-\item At $t=2$, {\color{blue} blue} dies. $X_2 = \emptyset$
-\end{itemize}
-\end{block}
-
-\end{frame}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\begin{frame}
-\frametitle{Independent Cascade Model}
-
-\begin{figure}
-\includegraphics[scale=.5]{figures/weighted_graph.png}
-\caption{Weighted, directed graph}
-\end{figure}
-
-\begin{itemize}
-\item If the {\color{orange} orange} node and the {\color{green} green} node are infected at $t=0$, what is the probability that the {\color{blue} blue} node is infected at $t=1$?
-$$1 - \mathbb{P}(\text{not infected}) = 1 - (1 - .45)(1-.04)$$
-\end{itemize}
-
-
-\end{frame}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\begin{frame}
-\frametitle{Independent Cascade Model}
-\begin{figure}
-\includegraphics[scale=.5]{figures/weighted_graph.png}
-\caption{Weighted, directed graph}
-\end{figure}
-
-\begin{itemize}
-\item In general, for each susceptible node $j$:
-$$\mathbb{P}(j \text{ becomes infected at t+1}|X_{t}) = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j})$$
-\end{itemize}
-
-\end{frame}
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\begin{frame}
-\frametitle{Independent Cascade Model}
-For each susceptible node $j$, the event that it becomes {\color{blue} infected} conditioned on previous time step is a Bernoulli:
-$$(j \in X_{t+1} | X_t) \sim {\cal B} \big(f(X_t \cdot \theta_j) \big)$$
-\begin{itemize}
-\item $\theta_{i,j} := \log(1 - p_{i,j})$
-\item $\theta_j := (0, 0, 0, \theta_{4,j}, 0 \dots, \theta_{k,j}, \dots)$
-\item $f : x \mapsto 1 - e^x$
-\begin{align*}
-\mathbb{P}(j\in X_{t+1}|X_{t}) & = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j}) \\
-& = 1 - \exp \left[ \sum_{i \in {\cal N}(j) \cap X_{t}} \log(1 - p_{i,j}) \right] \\
-& = 1 - \exp \left[ X_{t} \cdot \theta_{j}\right]
-\end{align*}
-\end{itemize}
-\end{frame}
-
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\begin{frame}
-\frametitle{Independent Cascade Model}
-For each susceptible node $j$, the event that it becomes {\color{blue} infected} conditioned on previous time step is a Bernoulli:
-$$(j \in X_{t+1} | X_t) \sim {\cal B} \big(f(X_t \cdot \theta_j) \big)$$
-
-\begin{block}{Decomposability}
-\begin{itemize}
-\item Conditioned on $X_t$, the state of the nodes at the next time step are mutually independent
-\item We can learn the parents of each node independently
-\end{itemize}
-\end{block}
-
-\begin{block}{Sparsity}
-\begin{itemize}
-\item $\theta_{i,j} = 0 \Leftrightarrow \log(1 - p_{i,j}) = 0 \Leftrightarrow p_{i,j} = 0$
-\item If graph is ``sparse'', then $p_{j}$ is sparse, then $\theta_j$ is sparse.
-\end{itemize}
-\end{block}
-\end{frame}
-
-
-
-%%%%%%%%%%%%%%%%%%%%%%%
-
-\begin{frame}
-\frametitle{Learning from Diffusion Processes}
-\begin{block}{Problem Statement}
-\begin{itemize}
-\item We are given a graph ${\cal G}$, and a diffusion process $f$ parameterized by $\left((\theta_j)_j, p_{\text{init}}\right)$. 
-\item Suppose we {\bf only} observe $(X_t)$ from the diffusion process.
-\item Under what conditions can we learn $\theta_{i,j}$ for all $(i,j)$?
-\end{itemize}
-\end{block}
-\end{frame}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\begin{frame}
-\frametitle{Sparse Recovery}
-\begin{figure}
-\includegraphics[scale=.6]{../images/sparse_recovery_illustration_copy.pdf}
-\caption{$\mathbb{P}(j \in X_{t+1}| X_t) = f(X_t\cdot \theta)$}
-\end{figure}
-\end{frame}
-
-
-
-%%%%%%%%%%%%%%%%%%%%%
-
-\begin{frame}
-\frametitle{Learning from Diffusion Processes}
-
-% \begin{figure}
-% \includegraphics[scale=.4]{../images/sparse_recovery_illustration.pdf}
-% \caption{Generalized Cascade Model for node $i$}
-% \end{figure}
-
-\begin{block}{Likelihood Function}
-\begin{align*}
-{\cal L}(\theta_1, \dots, \theta_m| X_1, \dots  X_n) = \sum_{i=1}^m \sum_{t} & X_{t+1}^i \log f(\theta_i \cdot X_t) + \\
-& (1 - X_{t+1}^i) \log(1 - f(\theta_i \cdot X_t))
-\end{align*}
-\end{block}
-
-\begin{block}{MLE}
-For each node $i$, $$\theta_i \in \arg \max \frac{1}{n_i}{\cal {L}}_i(\theta_i | X_1, X_2, \dots, X_{n_i}) - \lambda \|\theta_i\|_1$$
-\end{block}
-
-\end{frame}
-
-%%%%%%%%%%%%%%%%%%%%%
-
-\begin{frame}
-\frametitle{Conditions}
-\begin{block}{On $f$}
-\begin{itemize}
-\item $\log f$ and $\log (1-f)$ have to be concave
-\item $\log f$ and $\log (1-f)$ have to have bounded gradient
-\end{itemize}
-\end{block}
-
-\begin{block}{On $(X_t)$}
-\begin{itemize}
-\item Want ${\cal H}$, the hessian of ${\cal L}$ with respect to $\theta$, to be well-conditioned.
-\item $ n < dim(\theta) \implies {\cal H}$ is degenerate.
-\item {\bf Restricted Eigenvalue condition} = ``almost invertible'' on sparse vectors. 
-\end{itemize}
-\end{block}
-
-\end{frame}
-
-%%%%%%%%%%%%%%%%%%%%%%%%
-
-\begin{frame}
-\frametitle{Restricted Eigenvalue Condition}
-
-\begin{definition}
-For a set $S$,
-$${\cal C} := \{ \Delta : \|\Delta\|_2 = 1, \|\Delta_{\bar S}\|_1 \leq 3 \| \Delta_S\|_1 \}$$
-${\cal H}$ verifies the $(S, \gamma)$-RE condition if:
-$$\forall \Delta \in {\cal C}, \Delta {\cal H} \Delta \geq \gamma$$
-\end{definition}
-\end{frame}
-%%%%%%%%%%%%%%%%%%%%%%%%
-
-\begin{frame}
-\frametitle{Main Result}
-Adapting a result from \cite{Negahban:2009}, we have the following theorem:
-
-\begin{theorem}
-For node $i$, assume
-\begin{itemize}
-\item the Hessian verifies the $(S,\gamma)$-RE condition where $S$ is the set of parents of node $i$ (support of $\theta_i$)
-\item $f$ and $1-f$ are log-concave
-\item $|(\log f)'| < \frac{1}{\alpha}$ and $|(\log 1- f)'| < \frac{1}{\alpha}$ 
-\end{itemize} then with high probability:
-$$\| \theta^*_i - \hat \theta_i \|_2 \leq \frac{6}{\gamma}\sqrt{\frac{s\log m}{\alpha n}}$$
-\end{theorem}
-
-\begin{corollary}
-By thresholding $\hat \theta_i$, if $n > C' s \log m$, we recover the support of $\theta^*$ and therefore the edges of ${\cal G}$
-\end{corollary}
-
-\end{frame}
-
-%%%%%%%%%%%%%%%%%%%%%%%%
-
-\begin{frame}
-\frametitle{Main result}
-
-\begin{block}{Correlation}
-\begin{itemize}
-\item Positive result despite correlated measurements \smiley
-\item Independent measurements $\implies$ taking one measurement per cascade.
-\end{itemize}
-\end{block}
-
-\begin{block}{Statement w.r.t observations and not the model}
-\begin{itemize}
-\item The Hessian must verify the $(S,\gamma)$-RE condition \frownie
-\item Can we make a conditional statement on $\theta$ and not $X_t$?
-\end{itemize}
-\end{block}
-
-\end{frame}
-
-%%%%%%%%%%%%%%%%%%%%%%%
-
-\begin{frame}
-\frametitle{Restricted Eigenvalue Condition}
-
-\begin{block}{From Hessian to Gram Matrix}
-\begin{itemize}
-\item If $\log f$ and $\log 1 -f$ are strictly log-concave with constant $c$, then if $(S, \gamma)$-RE holds for the gram matrix $\frac{1}{n}X X^T$ , then $(S, c \gamma)$-RE holds for ${\cal H}$
-\end{itemize}
-\end{block}
-
-\begin{block}{From Gram Matrix to Expected Gram Matrix}
-\begin{itemize}
-\item If $n > C' s^2 \log m$ and $(S, \gamma)$-RE holds for $\mathbb{E}(\frac{1}{n}XX^T)$, then $(S, \gamma/2)$-RE holds for $\frac{1}{n}XX^T$ w.h.p
-\item $\mathbb{E}(\frac{1}{n}XX^T)$ only depends on $p_{\text{init}}$ and $(\theta_j)_j$
-\end{itemize}
-\end{block}
-
-\begin{block}{Expected Gram Matrix}
-\begin{itemize}
-\item Diagonal : average number of times node is infected
-\item Outer-diagonal : average number of times pair of nodes is infected {\emph together}
-\end{itemize}
-\end{block}
-
-\end{frame}
-
-%%%%%%%%%%%%%%%%%%%
-\begin{frame}
-\frametitle{Approximate Sparsity}
-\begin{itemize}
-\item $\theta^*_{\lceil s \rceil}$ best s-sparse approximation to $\theta^*$
-\item $\|\theta^* - \theta^*_{\lceil s \rceil} \|_1$: `tail' of $\theta^*$
-\end{itemize}
-\begin{theorem}
-Under similar conditions on $f$ and a relaxed RE condition, there $\exists C_1, C_2>0$ such that with high probability:
-\begin{equation}
-\|\hat \theta_i - \theta^*_i\|_2 \leq C_1 \sqrt{\frac{s\log m}{n}} + C_2 \sqrt[4]{\frac{s\log m}{n}}\|\theta^* - \theta^*_{\lceil s \rceil} \|_1
-\end{equation}
-\end{theorem}
-\end{frame}
-
-%%%%%%%%%%%%%%%%%%%%%%%
-
-\begin{frame}
-\frametitle{Lower Bound}
-\begin{itemize}
-\item Under correlation decay assumption for the IC model, ${\Omega}(s \log N/s)$ cascades necessary for graph reconstruction (Netrapalli et Sanghavi SIGMETRICS'12)
-\item Adapting (Price \& Woodruff STOC'12), in the approximately sparse case, any algorithm for any generalized linear cascade model such that:
-$$\|\hat \theta - \theta^*\|_2 \leq C \|\theta^* - \theta^*_{\lfloor s \rfloor}\|_2$$
-requires ${\cal O}(s \log (n/s)/\log C)$ measurement.
-\end{itemize}
-\end{frame}
-
-%%%%%%%%%%%%%%%%%%%%%%
-
-\begin{frame}
-\frametitle{Voter Model}
-\begin{itemize}
-\pause
-\item {\color{red} Red} and {\color{blue} Blue} nodes. At every step, each node $i$ chooses one of its neighbors $j$ with probability $p_{j,i}$ and adopts that color at $t+1$
-\pause
-\item If {\color{blue} Blue} is `contagious' state:
-\pause
-\begin{equation}
-\nonumber
-\mathbb{P}(i \in X^{t+1}|X^t) = \sum_{j \in {\cal N}(i)\cap X^t} p_{ji} = X^t \cdot \theta_i
-\end{equation}
-\end{itemize}
-\end{frame}
-
-%%%%%%%%%%%%%%%%%%%%%%%
-
-\begin{frame}
-\frametitle{Future Work}
-\begin{itemize}
-\item Lower bound restricted eigenvalues of expected gram matrix
-\item Confidence Intervals
-\item Show that $n > C' s \log m$ measurements are necessary w.r.t. expected hessian.
-\item Linear Threshold model $\rightarrow$ 1-bit compressed sensing formulation
-\item Better lower bounds
-\item Active Learning
-\end{itemize}
-\end{frame}
-
-
-%%%%%%%%%%%%%%%%%
-
-\bibliography{../../paper/sparse}
-\bibliographystyle{apalike}
-
-\end{document}
-\ No newline at end of file