diff options
| author | jeanpouget-abadie <jean.pougetabadie@gmail.com> | 2014-12-04 17:17:04 -0500 |
|---|---|---|
| committer | jeanpouget-abadie <jean.pougetabadie@gmail.com> | 2014-12-04 17:17:04 -0500 |
| commit | 9c682e64d18c9cddfa2575adc68c57693862b0f5 (patch) | |
| tree | 1c4995eb353d519d10e12f637d014cd97d3f01ae /notes/cracking_cascades_classposter.tex | |
| parent | 9b8d4dc469f0e45d377684ab1d7bffcb0c289b18 (diff) | |
| download | cascades-9c682e64d18c9cddfa2575adc68c57693862b0f5.tar.gz | |
yaron poster
Diffstat (limited to 'notes/cracking_cascades_classposter.tex')
| -rw-r--r-- | notes/cracking_cascades_classposter.tex | 424 |
1 files changed, 424 insertions, 0 deletions
diff --git a/notes/cracking_cascades_classposter.tex b/notes/cracking_cascades_classposter.tex new file mode 100644 index 0000000..2b29109 --- /dev/null +++ b/notes/cracking_cascades_classposter.tex @@ -0,0 +1,424 @@ +\documentclass[final]{beamer} +\usepackage[utf8]{inputenc} +\usepackage[scale=1.41]{beamerposter} % Use the beamerposter package for laying out the poster + +\usetheme{confposter} % Use the confposter theme supplied with this template + +\usepackage{color, bbm} +\setbeamercolor{block title}{fg=dblue,bg=white} % Colors of the block titles +\setbeamercolor{block body}{fg=black,bg=white} % Colors of the body of blocks +\setbeamercolor{block alerted title}{fg=white,bg=dblue!70} % Colors of the highlighted block titles +\setbeamercolor{block alerted body}{fg=black,bg=dblue!10} % Colors of the body of highlighted blocks +% Many more colors are available for use in beamerthemeconfposter.sty + +%----------------------------------------------------------- +% Define the column widths and overall poster size +% To set effective sepwid, onecolwid and twocolwid values, first choose how many columns you want and how much separation you want between columns +% In this template, the separation width chosen is 0.024 of the paper width and a 4-column layout +% onecolwid should therefore be (1-(# of columns+1)*sepwid)/# of columns e.g. (1-(4+1)*0.024)/4 = 0.22 +% Set twocolwid to be (2*onecolwid)+sepwid = 0.464 +% Set threecolwid to be (3*onecolwid)+2*sepwid = 0.708 + +\newlength{\sepwid} +\newlength{\onecolwid} +\newlength{\twocolwid} +\newlength{\threecolwid} +\setlength{\paperwidth}{48in} % A0 width: 46.8in +\setlength{\paperheight}{40in} % A0 height: 33.1in +\setlength{\sepwid}{0.024\paperwidth} % Separation width (white space) between columns +\setlength{\onecolwid}{0.22\paperwidth} % Width of one column +\setlength{\twocolwid}{0.464\paperwidth} % Width of two columns +\setlength{\threecolwid}{0.708\paperwidth} % Width of three columns +\setlength{\topmargin}{-1in} % Reduce the top margin size +%----------------------------------------------------------- + +\usepackage{graphicx} % Required for including images + +\usepackage{booktabs} % Top and bottom rules for tables + + + +%---------------------------------------------------------------------------------------- +% TITLE SECTION +%---------------------------------------------------------------------------------------- + +\title{Sparse Recovery for Graph Reconstruction } % Poster title + +\author{Eric Balkanski, Jean Pouget-Abadie} % Author(s) + +\institute{Harvard University} % Institution(s) +%---------------------------------------------------------------------------------------- +\begin{document} +\addtobeamertemplate{block end}{}{\vspace*{2ex}} % White space under blocks +\addtobeamertemplate{block alerted end}{}{\vspace*{2ex}} % White space under highlighted (alert) blocks + +\setlength{\belowcaptionskip}{2ex} % White space under figures +\setlength\belowdisplayshortskip{2ex} % White space under equations + +\begin{frame}[t] % The whole poster is enclosed in one beamer frame + +\begin{columns}[t] % The whole poster consists of three major columns, the second of which is split into two columns twice - the [t] option aligns each column's content to the top + +\begin{column}{\sepwid}\end{column} % Empty spacer column + +\begin{column}{\onecolwid} % The first column + +%---------------------------------------------------------------------------------------- +% INTODUCTION +%---------------------------------------------------------------------------------------- + + +%\vspace{- 15.2 cm} +%\begin{center} +%{\includegraphics[height=7em]{logo.png}} % First university/lab logo on the left +%\end{center} + +%\vspace{4.6 cm} + +\begin{block}{Introduction} + +{\bf Graph Reconstruction}: + +\begin{itemize} +\item \{${\cal G}, \vec p$\}: directed graph, edge probabilities +\item $F$: cascade generating model +\item ${\cal M} := F\{{\cal G}, \vec p\}$: cascade +\end{itemize} + +{\bf Objective}: +\begin{itemize} +\item Find algorithm which computes $F^{-1}({\cal M}) = \{{\cal G}, \vec p\}$ w.h.p., i.e. recovers graph from cascades. +\end{itemize} + +{\bf Approach} +\begin{itemize} +\item Frame graph reconstruction as a {\it Sparse Recovery} problem for two cascade generating models. +\end{itemize} + +%Given a set of observed cascades, the \textbf{graph reconstruction problem} consists of finding the underlying graph on which these cascades spread. We assume that these cascades come from the classical \textbf{Independent Cascade Model} where at each time step, newly infected nodes infect each of their neighbor with some probability. + +%In previous work, this problem has been formulated in different ways, including a convex optimization and a maximum likelihood problem. However, there is no known algorithm for graph reconstruction with theoretical guarantees and with a reasonable required sample size. + +%We formulate a novel approach to this problem in which we use \textbf{Sparse Recovery} to find the edges in the unknown underlying network. Sparse Recovery is the problem of finding the sparsest vector $x$ such that $\mathbf{M x =b}$. In our case, for each node $i$, we wish to recover the vector $x = p_i$ where $p_{i_j}$ is the probability that node $j$ infects node $i$ if $j$ is active. To recover this vector, we are given $M$, where row $M_{t,k}$ indicates which nodes are infected at time $t$ in observed cascade $k$, and $b$, where $b_{t+1,k}$ indicates if node $i$ is infected at time $t+1$ in cascade $k$. Since most nodes have a small number of neighbors in large networks, we can assume that these vectors are sparse. Sparse Recovery is a well studied problem which can be solved efficiently and with small error if $M$ satisfies certain properties. In this project, we empirically study to what extent $M$ satisfies the Restricted Isometry Property. + +\end{block} + +%--------------------------------------------------------------------------------- +%--------------------------------------------------------------------------------- + +\begin{block}{Voter Model} +{\bf Description} + +\begin{itemize} +\item Two types of nodes, {\color{red} red} and {\color{blue} blue}. +\item $\mathbb{P}$(node is {\color{blue} blue} at $t=0) = p_{\text{init}}$ +\item $\mathbb{P}$(node is {\color{blue} blue} at t+1) = $\frac{\text{Number of {\color{blue}blue} neighbors}}{\text{Total number of neighbors}}$ +\item Observe until horizon T +\end{itemize} + +{\bf Sparse Recovery Formulation} +\begin{itemize} +\item Definitions: +\end{itemize} +\begin{align} +(\vec m^t_k)_j &:= \left[\text{node j is {\color{blue} blue} at t in cascade k}\right] \nonumber \\ +(\vec x_{i})_j &:= \frac{\text{1}}{\text{deg}(i)} \cdot \left[\text{j parent of i in }{\cal G}\right] \nonumber \\ +b^t_{k,i} &:= \text{node i is {\color{blue} blue} at t+1 in cascade k} \nonumber +\end{align} + +\begin{itemize} +\item Observation: +\end{itemize} +\begin{align} +\langle \vec m^t_k, \vec x_i \rangle &= \mathbb{P}(\text{node i is {\color{blue} blue} at t+1 in cascade k}) \nonumber \\ +&=: w^{t+1}_{k,i} \nonumber +\end{align} + +\begin{itemize} +\item We observe $M := \text{vstack}(\vec m^t_k)$ +\item We observe $b_i := \text{vstack}(b^t_{k,i})$ where $$b^t_{k,i} \sim {\cal B}(w^t_{k,i}) = w^t_{k,i} - \epsilon$$ +\item For each node i, solve for $\vec x_i$: +\begin{equation} +\boxed{M \vec x_i = \vec b_i + \epsilon \nonumber} +\end{equation} +\end{itemize} +\end{block} + + + + + +%--------------------------------------------------------------------------------- +%--------------------------------------------------------------------------------- + + +%--------------------------------------------------------------------------------- +%--------------------------------------------------------------------------------- + +\begin{block}{Independent Cascade Model} + +{\bf Description} +\begin{itemize} +\item Three possible states: susceptible, infected, inactive +\item Infected node $j$ infects its susceptible neighbors $i$ with probability $p_{j,i}$ independently +\item $\mathbb{P}$(inactive at t+1| infected at t) = 1 +\item $\mathbb{P}$(infected at t=0)$=p_{\text{init}}$ +\end{itemize} + + +{\bf Sparse Recovery Formulation} +\begin{itemize} + +\item Definitions: +\begin{align} +(\vec \theta_i)_j &:= \log ( 1 - p_{j,i}) \nonumber \\ +(\vec m^t_k)_j &= \left[\text{node j is infected at t in cascade k}\right] \nonumber +\end{align} + +\item Observation: +\begin{equation} +\langle \vec m^t_k, \vec x_i \rangle = \log \mathbb{P}(\text{node i {\it not} infect. at t+1 in casc. k}) \nonumber +\end{equation} +\item With same notations, we solve for each node i: +\begin{equation} +\boxed{e^{M \vec \theta_i} = (1 - \vec b_i) + \epsilon} \nonumber +\end{equation} +where: $e^{M \vec \theta_i}$ is taken element-wise +\end{itemize} +\end{block} + + + + + +%--------------------------------------------------------------------------------- +%--------------------------------------------------------------------------------- + + + +\end{column} % End of the first column + +\begin{column}{\sepwid}\end{column} % Empty spacer column + +\begin{column}{\onecolwid} % The first column + +%---------------------------------------------------------------------------------------- +% CONSTRAINT SATISFACTION - BACKTRACKING +%---------------------------------------------------------------------------------------- +\begin{block}{Example} + +\begin{figure} +\centering +\includegraphics[width=0.6\textwidth]{cascades.png} +\caption{Two cascades with two time steps each. Red nodes are the infected and active nodes.} +\end{figure} + +Figure 1 illustrates two cascades. In this case, if we wish to recover $p_A$, then our problem would be is given by the formula +\[ \left( \begin{array}{cccc} +1 & 0 & 0 & 0 \\ +0 & 1 & 0 & 0 \\ +\end{array} \right) x_A = \left( \begin{array}{c} +0 \\ +1 \\\end{array} \right)\] + +\end{block} + +%---------------------------------------------------------------------------------------- + +\begin{block}{Motivation} +\begin{itemize} +\item Vectors $\vec x_i$ are deg(i)-sparse. +\item Matrix $M$ is well-conditioned (see experimental section) +\item Voter model: quasi-textbook noisy sparse recovery +\item Independent cascade: if $p_{j,i} < 1 - \eta$, exponential can be approximated by affine function +\end{itemize} +\end{block} + +\begin{block}{Algorithms} + +{\bf Voter Model} + +\begin{itemize} +\item Solve for each node i: +\begin{equation} +\min_{\vec x_i} \|\vec x_i\|_1 + \lambda \|M \vec x_i - \vec b_i \|_2 \nonumber +\end{equation} +\end{itemize} + +{\bf Independent Cascade Model} + +\begin{itemize} +\item Solve for each node i: +\begin{equation} +\min_{\vec \theta_i} \|\vec \theta_i\|_1 + \lambda \|e^{M \vec \theta_i} - \vec b_i \|_2 \nonumber +\end{equation} +where: $e^{M \vec \theta_i}$ is taken element-wise +\end{itemize} + +\end{block} + + + + + +%---------------------------------------------------------------------------------------- +% MIP +%---------------------------------------------------------------------------------------- + +% \begin{block}{RIP property} + +% %The Restricted Isometry Property (RIP) characterizes a quasi-orthonormality of the measurement matrix M on sparse vectors. + +% For all k, we define $\delta_k$ as the best possible constant such that for all unit-normed ($l$2) and k-sparse vectors $x$ + +% \begin{equation} +% 1-\delta_k \leq \|Mx\|^2_2 \leq 1 + \delta_k +% \end{equation} + +% In general, the smaller $\delta_k$ is, the better we can recover $k$-sparse vectors $x$! + +% \end{block} + +%---------------------------------------------------------------------------------------- + +\end{column} + +\begin{column}{\sepwid}\end{column} % Empty spacer column + +\begin{column}{\onecolwid} % The first column within column 2 (column 2.1) + + +%---------------------------------------------------------------------------------------- + + +\begin{block}{Theoretical Guarantees} + +{\bf Assumptions} +\begin{itemize} +\item Suppose that each row of $M$ is taken from a distribution $F$, such that +\begin{itemize} + \item $\mathbb{E}_{a \in F}(a a^T) = I_n$ + \item $\|a\|_{\infty} \leq \mu$ +\end{itemize} +\item Suppose that w.h.p $\| M^T \epsilon \|_{\infty} \leq 2.5 \sqrt{\log n} $ +\end{itemize} + +{\bf Theorem \cite{candes}} + +If node $i$ has degree $\Delta$ and $n_{\text{rows}}(M) \geq C \mu \Delta \log n$, then, w.h.p., + +$$\| \hat x - x^* \|_2 \leq C (1 + \log^{3/2}(n))\sqrt{\frac{\Delta \log n}{m}}$$ + +{\bf Discussion} + +\begin{itemize} +\item If we consider $M/p_{init}$, then $\mathbb{E}_{a \in F}(a a^T) \approx I_n$ +\item $\mathbb{E}(\epsilon) = 0$, hence $\| M^T \epsilon \|_{\infty} \leq 2.5 \sqrt{\log n}$ should hold w.h.p +\end{itemize} + +\end{block} + + +\begin{block}{Restriced Isometry Property (RIP)} +{\bf Definition} +\begin{itemize} +\item The Restricted Isometry Property (RIP) characterizes a quasi-orthonormality of the measurement matrix M on sparse vectors. + +\item For all k, we define $\delta_k$ as the best possible constant such that for all unit-normed ($l$2) and k-sparse vectors $x$: + +\begin{equation} +1-\delta_k \leq \|Mx\|^2_2 \leq 1 + \delta_k \nonumber +\end{equation} + +\item In general, the smaller $\delta_k$ is, the better we can recover $k$-sparse vectors $x$! +\end{itemize} + +{\bf Relevance} +\begin{itemize} +\item Commonly studied in stable sparse recovery +\item $\mathbb{E}_{a \in F}(a a^T) \approx I_n$ + RIP property (with $\delta = \frac{1}{4}$) should get similar conclusion as above +\end{itemize} + + + +\end{block} + + + +%---------------------------------------------------------------------------------------- +% RESULTS +%---------------------------------------------------------------------------------------- + +\begin{block}{Description of Experiments} + + + +\end{block} + +%---------------------------------------------------------------------------------------- + + +\end{column} % End of the second column + +\begin{column}{\sepwid}\end{column} % Empty spacer column + +\begin{column}{\onecolwid} % The third column + +%---------------------------------------------------------------------------------------- +% IOVERALL COMPARISON +%---------------------------------------------------------------------------------------- + +%\vspace{- 14.2 cm} +%\begin{center} +%{\includegraphics[height=7em]{cmu_logo.png}} % First university/lab logo on the left +%\end{center} + +%\vspace{4 cm} + +\begin{alertblock}{Experimental Results} + + + + +\end{alertblock} + +%---------------------------------------------------------------------------------------- + + +%---------------------------------------------------------------------------------------- +% CONCLUSION +%---------------------------------------------------------------------------------------- + +\begin{block}{Conclusion} + + +\end{block} + +%---------------------------------------------------------------------------------------- +% REFERENCES +%---------------------------------------------------------------------------------------- + +\begin{block}{References} + +\begin{thebibliography}{42} + +\bibitem{candes} +Candès, E., and Plan, Y. +\newblock {\it A Probabilistic and RIPless Theory of Compressed Sensing} +\newblock Information Theory, IEEE Transactions on, 57(11): 7235--7254, +\newblock 2011. +\end{thebibliography} + +\end{block} + +%---------------------------------------------------------------------------------------- + +\end{column} % End of the third column + +\end{columns} % End of all the columns in the poster + +\end{frame} % End of the enclosing frame + + + +\end{document} |
