From 843f75943d25f4e180493142b6da0968621b9a78 Mon Sep 17 00:00:00 2001 From: Thibaut Horel Date: Mon, 9 Mar 2015 13:50:20 -0400 Subject: Big reorganisation of the repo --- notes/beamerthemeconfposter.sty | 184 ------ notes/cracking_cascades_classposter.tex | 461 --------------- notes/defs.tex | 17 - notes/formalisation.pdf | Bin 312600 -> 0 bytes notes/formalisation.tex | 317 ---------- notes/images/greedy_sparse_comparison.png | Bin 35393 -> 0 bytes notes/images/icc.png | Bin 130144 -> 0 bytes notes/images/sparse_recovery_illustration.pdf | Bin 20664 -> 0 bytes notes/images/sparse_recovery_illustration.svg | 655 --------------------- notes/images/sparse_recovery_illustration_copy.pdf | Bin 148049 -> 0 bytes notes/images/voter.png | Bin 111444 -> 0 bytes notes/maximum_likelihood_approach.tex | 230 -------- notes/presentation/beamer.tex | 381 ------------ notes/presentation/beamer_2.tex | 397 ------------- notes/presentation/extended_abstract.txt | 10 - .../figures/Screen Shot 2015-03-08 at 13.08.01.png | Bin 126844 -> 0 bytes notes/presentation/figures/weighted_graph.png | Bin 24602 -> 0 bytes notes/presentation/sparse.bib | 503 ---------------- notes/reportYaron.tex | 354 ----------- notes/sparse.bib | 105 ---- old_work/defs.tex | 17 + old_work/formalisation.pdf | Bin 0 -> 312600 bytes old_work/formalisation.tex | 317 ++++++++++ old_work/maximum_likelihood_approach.tex | 230 ++++++++ old_work/reportYaron.tex | 354 +++++++++++ old_work/sparse.bib | 105 ++++ paper/example_paper.bib | 75 --- paper/example_paper.tex | 543 ----------------- poster/beamerthemeconfposter.sty | 184 ++++++ poster/cracking_cascades_classposter.tex | 461 +++++++++++++++ poster/images/greedy_sparse_comparison.png | Bin 0 -> 35393 bytes poster/images/icc.png | Bin 0 -> 130144 bytes poster/images/sparse_recovery_illustration.svg | 655 +++++++++++++++++++++ poster/images/voter.png | Bin 0 -> 111444 bytes presentation/econcs/beamer.tex | 381 ++++++++++++ .../figures/Screen Shot 2015-03-08 at 13.08.01.png | Bin 0 -> 126844 bytes presentation/econcs/figures/weighted_graph.png | Bin 0 -> 24602 bytes presentation/econcs/sparse.bib | 503 ++++++++++++++++ presentation/extended_abstract.txt | 10 + presentation/images/greedy_sparse_comparison.png | Bin 0 -> 35393 bytes presentation/images/icc.png | Bin 0 -> 130144 bytes .../images/sparse_recovery_illustration.svg | 655 +++++++++++++++++++++ presentation/images/voter.png | Bin 0 -> 111444 bytes presentation/stats/beamer_2.tex | 397 +++++++++++++ .../figures/Screen Shot 2015-03-08 at 13.08.01.png | Bin 0 -> 126844 bytes presentation/stats/figures/weighted_graph.png | Bin 0 -> 24602 bytes presentation/stats/sparse.bib | 503 ++++++++++++++++ 47 files changed, 4772 insertions(+), 4232 deletions(-) delete mode 100644 notes/beamerthemeconfposter.sty delete mode 100644 notes/cracking_cascades_classposter.tex delete mode 100644 notes/defs.tex delete mode 100644 notes/formalisation.pdf delete mode 100644 notes/formalisation.tex delete mode 100644 notes/images/greedy_sparse_comparison.png delete mode 100644 notes/images/icc.png delete mode 100644 notes/images/sparse_recovery_illustration.pdf delete mode 100644 notes/images/sparse_recovery_illustration.svg delete mode 100644 notes/images/sparse_recovery_illustration_copy.pdf delete mode 100644 notes/images/voter.png delete mode 100644 notes/maximum_likelihood_approach.tex delete mode 100644 notes/presentation/beamer.tex delete mode 100644 notes/presentation/beamer_2.tex delete mode 100644 notes/presentation/extended_abstract.txt delete mode 100644 notes/presentation/figures/Screen Shot 2015-03-08 at 13.08.01.png delete mode 100644 notes/presentation/figures/weighted_graph.png delete mode 100644 notes/presentation/sparse.bib delete mode 100644 notes/reportYaron.tex delete mode 100644 notes/sparse.bib create mode 100644 old_work/defs.tex create mode 100644 old_work/formalisation.pdf create mode 100644 old_work/formalisation.tex create mode 100644 old_work/maximum_likelihood_approach.tex create mode 100644 old_work/reportYaron.tex create mode 100644 old_work/sparse.bib delete mode 100644 paper/example_paper.bib delete mode 100644 paper/example_paper.tex create mode 100644 poster/beamerthemeconfposter.sty create mode 100644 poster/cracking_cascades_classposter.tex create mode 100644 poster/images/greedy_sparse_comparison.png create mode 100644 poster/images/icc.png create mode 100644 poster/images/sparse_recovery_illustration.svg create mode 100644 poster/images/voter.png create mode 100644 presentation/econcs/beamer.tex create mode 100644 presentation/econcs/figures/Screen Shot 2015-03-08 at 13.08.01.png create mode 100644 presentation/econcs/figures/weighted_graph.png create mode 100644 presentation/econcs/sparse.bib create mode 100644 presentation/extended_abstract.txt create mode 100644 presentation/images/greedy_sparse_comparison.png create mode 100644 presentation/images/icc.png create mode 100644 presentation/images/sparse_recovery_illustration.svg create mode 100644 presentation/images/voter.png create mode 100644 presentation/stats/beamer_2.tex create mode 100644 presentation/stats/figures/Screen Shot 2015-03-08 at 13.08.01.png create mode 100644 presentation/stats/figures/weighted_graph.png create mode 100644 presentation/stats/sparse.bib diff --git a/notes/beamerthemeconfposter.sty b/notes/beamerthemeconfposter.sty deleted file mode 100644 index bcba6a7..0000000 --- a/notes/beamerthemeconfposter.sty +++ /dev/null @@ -1,184 +0,0 @@ -%============================================================================== -% Beamer style for the poster template posted at -% www.nathanieljohnston.com/index.php/2009/08/latex-poster-template -% -% Created by the Computational Physics and Biophysics Group at Jacobs University -% https://teamwork.jacobs-university.de:8443/confluence/display/CoPandBiG/LaTeX+Poster -% Modified by Nathaniel Johnston (nathaniel@nathanieljohnston.com) in August 2009 -% ============================================================================= - -\ProvidesPackage{beamerthemeconfposter} -\RequirePackage{tikz} % for drawing the nice rounded boxes -\usetikzlibrary{arrows,backgrounds} -\RequirePackage[T1]{fontenc} -\RequirePackage{lmodern} -\RequirePackage{textcomp} -\RequirePackage{amsmath,amssymb} -\usefonttheme{professionalfonts} -\newcommand{\makeruleinbox}{{\usebeamercolor[bg]{block alerted title}\centering\hspace*{-0.7cm}\rule{\inboxrule}{0.5cm}}} -\usepackage{ragged2e} -\usepackage{wrapfig} -%----------------------------------------------------------- -% Define a whole bunch of custom colours and fonts -%----------------------------------------------------------- - -\definecolor{lgreen} {RGB}{180,210,100} -\definecolor{dblue} {RGB}{20,66,129} -\definecolor{ddblue} {RGB}{11,36,69} -\definecolor{lred} {RGB}{220,0,0} -\definecolor{nred} {RGB}{224,0,0} -\definecolor{norange}{RGB}{244,127,36} -\definecolor{nyellow}{RGB}{255,221,0} -\definecolor{ngreen} {RGB}{98,158,31} -\definecolor{dgreen} {RGB}{78,138,21} -\definecolor{nblue} {RGB}{28,130,185} -\definecolor{jblue} {RGB}{20,50,100} -\definecolor{jalpha} {RGB}{0,0,0} - -% set the basic colors -\setbeamercolor{palette primary} {fg=black,bg=white} -\setbeamercolor{palette secondary} {fg=black,bg=white} -\setbeamercolor{palette tertiary} {bg=jblue,fg=white} -\setbeamercolor{palette quaternary}{fg=black,bg=white} -\setbeamercolor{structure}{fg=jblue} -\setbeamercolor{titlelike} {bg=jblue,fg=white} -\setbeamercolor{frametitle} {bg=jblue!10,fg=jblue} -\setbeamercolor{cboxb}{fg=black,bg=jblue} -\setbeamercolor{cboxr}{fg=black,bg=red} - -% set colors for itemize/enumerate -\setbeamercolor{item}{fg=ngreen} -\setbeamercolor{item projected}{fg=white,bg=ngreen} - -% set colors for blocks -%\setbeamercolor{block title}{fg=ngreen,bg=white} -%\setbeamercolor{block body}{fg=black,bg=white} - -%set colors for alerted blocks (blocks with frame) -\setbeamercolor{block alerted title}{fg=white,bg=jblue} -\setbeamercolor{block alerted body}{fg=black,bg=jblue!10} - -% set the fonts -\setbeamerfont{section in head/foot}{series=\bfseries} -\setbeamerfont{block title}{series=\bfseries} -\setbeamerfont{block alerted title}{series=\bfseries} -\setbeamerfont{frametitle}{series=\bfseries} -\setbeamerfont{frametitle}{size=\Large} - -% set some beamer theme options -\setbeamertemplate{title page}[default][colsep=-4bp,rounded=true] -\setbeamertemplate{sections/subsections in toc}[square] -\setbeamertemplate{items}[circle] -\setbeamertemplate{blocks}[width=0.0] -\beamertemplatenavigationsymbolsempty - -% set bibliography style -\setbeamertemplate{bibliography item}[text] -\setbeamercolor{bibliography item}{fg=black,bg=white} -\setbeamercolor{bibliography entry author}{fg=black,bg=white} -\setbeamercolor{bibliography item}{fg=black,bg=white} - -% define some length variables that are used by the template -\newlength{\inboxwd} -\newlength{\iinboxwd} -\newlength{\inboxrule} -\makeatletter -\makeatother - -%============================================================================== -% build the poster title -%============================================================================== -\setbeamertemplate{headline}{ - \leavevmode - \begin{columns} - \begin{column}{.2\linewidth} - \vskip1cm - \centering - %\includegraphics[height=4in]{UIUC-logo} - \end{column} - \vspace{1cm} - \begin{column}{.6\linewidth} - \vskip1cm - \centering - \usebeamercolor{title in headline}{\color{jblue}\Huge{\textbf{\inserttitle}}\\[0.5ex]} - \usebeamercolor{author in headline}{\color{fg}\Large{\insertauthor}\\[1ex]} - \usebeamercolor{institute in headline}{\color{fg}\large{\insertinstitute}\\[1ex]} - \vskip1cm - \end{column} - \vspace{1cm} - \begin{column}{.2\linewidth} - \vskip1cm - \centering - %\includegraphics[height=4in]{graph500-logo} - \vskip1cm - \end{column} - \vspace{1cm} - \end{columns} - \vspace{0.5in} - \hspace{0.5in}\begin{beamercolorbox}[wd=47in,colsep=0.15cm]{cboxb}\end{beamercolorbox} - \vspace{0.1in} -} - -% Block definition -\setbeamertemplate{block begin} -{ - \par\vskip\medskipamount - \begin{beamercolorbox}[colsep*=0ex,dp={2ex},center]{block title} - \vskip-0.25cm - \usebeamerfont{block title}\large\insertblocktitle - \begin{flushleft} - \vskip-1cm - \begin{tikzpicture}[remember picture,overlay] - \shade [inner color=gray,outer color=white] - (0,0) rectangle (\textwidth,0.3cm); - \end{tikzpicture} - \end{flushleft} - \end{beamercolorbox} - {\parskip0pt\par} - \ifbeamercolorempty[bg]{block title} - {} - {\ifbeamercolorempty[bg]{block body}{}{\nointerlineskip\vskip-0.5pt}} - \usebeamerfont{block body} - \vskip-0.5cm - \begin{beamercolorbox}[colsep*=0ex,vmode]{block body} - \justifying -} - -\setbeamertemplate{block end} -{ - \end{beamercolorbox} - \vskip\smallskipamount -} - -% Alert block definition (with frame) -\setbeamertemplate{block alerted begin} -{ - \par\vskip\medskipamount - \begin{beamercolorbox}[sep=0ex,rounded=true,center,dp={2ex}]{block alerted title} - \vskip0.01cm - \usebeamerfont{block title}\large\insertblocktitle - \end{beamercolorbox} - {\parskip0pt\par} - \usebeamerfont{block body} - \vskip-0.8cm - \begin{beamercolorbox}[sep=0.5cm, rounded=true,center]{block alerted title} - \setlength{\inboxwd}{\linewidth} - \addtolength{\inboxwd}{-1cm} - \begin{beamercolorbox}[rounded=true,wd={\inboxwd},center]{block alerted body} - \setlength{\iinboxwd}{\inboxwd} - \setlength{\inboxrule}{\inboxwd} - \addtolength{\iinboxwd}{-0.5cm} - \addtolength{\inboxrule}{0.5cm} - \begin{center} - \begin{minipage}{\iinboxwd} - \justifying -} - -\setbeamertemplate{block alerted end} -{ - \end{minipage} - \end{center} - \end{beamercolorbox} - \end{beamercolorbox} - \vskip\smallskipamount -} \ No newline at end of file diff --git a/notes/cracking_cascades_classposter.tex b/notes/cracking_cascades_classposter.tex deleted file mode 100644 index 3f8204e..0000000 --- a/notes/cracking_cascades_classposter.tex +++ /dev/null @@ -1,461 +0,0 @@ -\documentclass[final]{beamer} -\usepackage[utf8]{inputenc} -\usepackage[scale=1.6]{beamerposter} % Use the beamerposter package for laying out the poster - -\usetheme{confposter} % Use the confposter theme supplied with this template - -\usepackage{color, bbm} -\setbeamercolor{block title}{fg=dblue,bg=white} % Colors of the block titles -\setbeamercolor{block body}{fg=black,bg=white} % Colors of the body of blocks -\setbeamercolor{block alerted title}{fg=white,bg=dblue!70} % Colors of the highlighted block titles -\setbeamercolor{block alerted body}{fg=black,bg=dblue!10} % Colors of the body of highlighted blocks -% Many more colors are available for use in beamerthemeconfposter.sty - -%----------------------------------------------------------- -% Define the column widths and overall poster size -% To set effective sepwid, onecolwid and twocolwid values, first choose how many columns you want and how much separation you want between columns -% In this template, the separation width chosen is 0.024 of the paper width and a 4-column layout -% onecolwid should therefore be (1-(# of columns+1)*sepwid)/# of columns e.g. (1-(4+1)*0.024)/4 = 0.22 -% Set twocolwid to be (2*onecolwid)+sepwid = 0.464 -% Set threecolwid to be (3*onecolwid)+2*sepwid = 0.708 - -\newlength{\sepwid} -\newlength{\onecolwid} -\newlength{\twocolwid} -\newlength{\threecolwid} -\setlength{\paperwidth}{48in} % A0 width: 46.8in -\setlength{\paperheight}{40in} % A0 height: 33.1in -\setlength{\sepwid}{0.024\paperwidth} % Separation width (white space) between columns -\setlength{\onecolwid}{0.22\paperwidth} % Width of one column -\setlength{\twocolwid}{0.464\paperwidth} % Width of two columns -\setlength{\threecolwid}{0.708\paperwidth} % Width of three columns -\setlength{\topmargin}{-1in} % Reduce the top margin size -%----------------------------------------------------------- - -\usepackage{graphicx} % Required for including images - -\usepackage{booktabs} % Top and bottom rules for tables - - - -%---------------------------------------------------------------------------------------- -% TITLE SECTION -%---------------------------------------------------------------------------------------- - -\title{Sparse Recovery for Graph Reconstruction } % Poster title - -\author{Eric Balkanski, Jean Pouget-Abadie} % Author(s) - -\institute{Harvard University} % Institution(s) -%---------------------------------------------------------------------------------------- -\begin{document} -\addtobeamertemplate{block end}{}{\vspace*{2ex}} % White space under blocks -\addtobeamertemplate{block alerted end}{}{\vspace*{2ex}} % White space under highlighted (alert) blocks - -\setlength{\belowcaptionskip}{2ex} % White space under figures -\setlength\belowdisplayshortskip{2ex} % White space under equations - -\begin{frame}[t] % The whole poster is enclosed in one beamer frame - -\begin{columns}[t] % The whole poster consists of three major columns, the second of which is split into two columns twice - the [t] option aligns each column's content to the top - -\begin{column}{\sepwid}\end{column} % Empty spacer column - -\begin{column}{\onecolwid} % The first column - -%---------------------------------------------------------------------------------------- -% INTODUCTION -%---------------------------------------------------------------------------------------- - - -%\vspace{- 15.2 cm} -%\begin{center} -%{\includegraphics[height=7em]{logo.png}} % First university/lab logo on the left -%\end{center} - -%\vspace{4.6 cm} - -\begin{block}{Problem Statement} -\begin{center} -\bf{How can we reconstruct a graph on which observed cascades spread?} -\end{center} -\end{block} - - -%{\bf Graph Reconstruction}: - -%\begin{itemize} -%\item \{${\cal G}, \vec p$\}: directed graph, edge probabilities -%\item $F$: cascade generating model -%\item ${\cal M} := F\{{\cal G}, \vec p\}$: cascade -%\end{itemize} - -%{\bf Objective}: -%\begin{itemize} -%\item Find algorithm which computes $F^{-1}({\cal M}) = \{{\cal G}, \vec p\}$ w.h.p., i.e. recovers graph from cascades. -%\end{itemize} - -%{\bf Approach} -%\begin{itemize} -%\item Frame graph reconstruction as a {\it Sparse Recovery} problem for two cascade generating models. -%\end{itemize} - -%Given a set of observed cascades, the \textbf{graph reconstruction problem} consists of finding the underlying graph on which these cascades spread. We assume that these cascades come from the classical \textbf{Independent Cascade Model} where at each time step, newly infected nodes infect each of their neighbor with some probability. - -%In previous work, this problem has been formulated in different ways, including a convex optimization and a maximum likelihood problem. However, there is no known algorithm for graph reconstruction with theoretical guarantees and with a reasonable required sample size. - -%We formulate a novel approach to this problem in which we use \textbf{Sparse Recovery} to find the edges in the unknown underlying network. Sparse Recovery is the problem of finding the sparsest vector $x$ such that $\mathbf{M x =b}$. In our case, for each node $i$, we wish to recover the vector $x = p_i$ where $p_{i_j}$ is the probability that node $j$ infects node $i$ if $j$ is active. To recover this vector, we are given $M$, where row $M_{t,k}$ indicates which nodes are infected at time $t$ in observed cascade $k$, and $b$, where $b_{t+1,k}$ indicates if node $i$ is infected at time $t+1$ in cascade $k$. Since most nodes have a small number of neighbors in large networks, we can assume that these vectors are sparse. Sparse Recovery is a well studied problem which can be solved efficiently and with small error if $M$ satisfies certain properties. In this project, we empirically study to what extent $M$ satisfies the Restricted Isometry Property. - - -%--------------------------------------------------------------------------------- -%--------------------------------------------------------------------------------- - -\begin{block}{Voter Model} - -\begin{figure} -\centering -\includegraphics[width=0.6\textwidth]{images/voter.png} -\end{figure} - - - -\vspace{0.5 cm} -{\bf Description} - -\vspace{0.5 cm} - -\begin{itemize} -\item $\mathbb{P}$({\color{blue} blue} at $t=0) = p_{\text{init}}$ -\item $\mathbb{P}$({\color{blue} blue} at $t+1) = \frac{\text{Number of {\color{blue}blue} neighbors}}{\text{Total number of neighbors}}$ -\end{itemize} - -\vspace{0.5 cm} - -{\bf Sparse Recovery Formulation} - -\vspace{0.5 cm} - - -To recover the neighbors of $v_1$, observe which nodes are {\color{red} red} (1) or {\color{blue} blue} (0) at time step $t$: -\begin{align*} -&v_2 \hspace{0.2 cm} v_3 \hspace{0.2 cm} v_4 \hspace{0.2 cm} v_5 &\\ -\vspace{1 cm} -M = & \left( \begin{array}{cccc} -0 & 0 & 1 & 1 \\ -1 & 1 & 0 & 0 \\ -\end{array} \right) & \begin{array}{l} \hspace{ - 5cm} -\text{time step 0} \\ - \hspace{ - 5cm} \text{time step 1} \\ -\end{array} -\end{align*} - -and which color $v_1$ is at time step $t+1$ due to $M$: - -\begin{align*} -b_1 = & \left( \begin{array}{c} -1 \\ -1 \\ -\end{array} \right) & \begin{array}{l} \hspace{ - 5cm} -\text{time step 1} \\ - \hspace{ - 5cm} \text{time step 2} \\ - \end{array} -\end{align*} - -Then , - -\begin{equation} -\boxed{M \vec x_1 = \vec b_1 + \epsilon \nonumber} -\end{equation} - -where $(\vec x_{1})_j := \frac{\text{1}}{\text{deg}(i)} \cdot \left[\text{j parent of 1 in }{\cal G}\right] $ - - -\end{block} - - - - - -%--------------------------------------------------------------------------------- -%--------------------------------------------------------------------------------- - - -%--------------------------------------------------------------------------------- -%--------------------------------------------------------------------------------- - - - - - - -%--------------------------------------------------------------------------------- -%--------------------------------------------------------------------------------- - - - -\end{column} % End of the first column - -\begin{column}{\sepwid}\end{column} % Empty spacer column - -\begin{column}{\onecolwid} % The first column - -%---------------------------------------------------------------------------------------- -% CONSTRAINT SATISFACTION - BACKTRACKING -%---------------------------------------------------------------------------------------- -\begin{block}{Independent Cascades Model} -\begin{figure} -\centering -\includegraphics[width=0.6\textwidth]{images/icc.png} -\end{figure} - -\vspace{0.5 cm} - -{\bf Description} - -\vspace{0.5 cm} - -\begin{itemize} -\item Three possible states: {\color{blue} susceptible}, {\color{red} infected}, {\color{yellow} inactive } -\item $\mathbb{P}$(infected at t=0)$=p_{\text{init}}$ -\item Infected node $j$ infects its susceptible neighbors $i$ with probability $p_{j,i}$ independently -\end{itemize} - -\vspace{0.5 cm} - -{\bf Sparse Recovery Formulation} - -\vspace{0.5 cm} - -To recover the neighbors of $v_5$,observe which nodes are {\color{red} red} (1), {\color{blue} blue} (0), or {\color{yellow} yellow} (0) at time step $t$: -\begin{align*} -&v_1 \hspace{0.2 cm} v_2 \hspace{0.2 cm} v_3 \hspace{0.2 cm} v_4 \\ -\vspace{1 cm} -M = & \left( \begin{array}{cccc} -1 & 0 & 0 & 0 \\ -0 & 1 & 1 & 0 \\ -\end{array} \right) \begin{array}{l} \hspace{ 1cm} -\text{time step 0} \\ - \hspace{ 1cm} \text{time step 1} \\ - \end{array} -\end{align*} - -and if $M$ caused $v_5$ to be infected at time step $t+1$: - -\begin{align*} -b_5 = & \left( \begin{array}{c} -0 \\ -1 \\ -\end{array} \right) \begin{array}{l} \hspace{ 1cm} -\text{time step 1} \\ - \hspace{ 1cm} \text{time step 2} \\ - \end{array} -\end{align*} - - -Then, - -\begin{equation} -\boxed{e^{M \vec \theta_5} = (1 - \vec b_5) + \epsilon} \nonumber -\end{equation} - -where $(\vec \theta_5)_j := \log ( 1 - p_{j,5}) $ - -\vspace{1 cm} - - -This problem is a {\bf Noisy Sparse Recovery} problem, which has been studied extensively. Here, the vectors $\vec x_i$ are deg(i)-sparse. - - -\end{block} - -%---------------------------------------------------------------------------------------- - - - - - - - - - -%---------------------------------------------------------------------------------------- -% MIP -%---------------------------------------------------------------------------------------- - -% \begin{block}{RIP property} - -% %The Restricted Isometry Property (RIP) characterizes a quasi-orthonormality of the measurement matrix M on sparse vectors. - -% For all k, we define $\delta_k$ as the best possible constant such that for all unit-normed ($l$2) and k-sparse vectors $x$ - -% \begin{equation} -% 1-\delta_k \leq \|Mx\|^2_2 \leq 1 + \delta_k -% \end{equation} - -% In general, the smaller $\delta_k$ is, the better we can recover $k$-sparse vectors $x$! - -% \end{block} - -%---------------------------------------------------------------------------------------- - -\end{column} - -\begin{column}{\sepwid}\end{column} % Empty spacer column - -\begin{column}{\onecolwid} % The first column within column 2 (column 2.1) - - -%---------------------------------------------------------------------------------------- - - -\begin{block}{Algorithms} - -{\bf Voter Model} - -\begin{itemize} -\item Solve for each node i: -\begin{equation} -\min_{\vec x_i} \|\vec x_i\|_1 + \lambda \|M \vec x_i - \vec b_i \|_2 \nonumber -\end{equation} -\end{itemize} - -{\bf Independent Cascade Model} - -\begin{itemize} -\item Solve for each node i: -\begin{equation} -\min_{\vec \theta_i} \|\vec \theta_i\|_1 + \lambda \|e^{M \vec \theta_i} - (1 - \vec b_i) \|_2 \nonumber -\end{equation} -\end{itemize} - -\end{block} - -\begin{block}{Restricted Isometry Property (RIP)} -{\bf Definition} -\begin{itemize} -\item Characterizes a quasi-orthonormality of M on sparse vectors. - -\item The RIP constant $\delta_k$ is the best possible constant such that for all unit-normed ($l$2) and k-sparse vectors $x$: - -\begin{equation} -1-\delta_k \leq \|Mx\|^2_2 \leq 1 + \delta_k \nonumber -\end{equation} - -\item The smaller $\delta_k$ is, the better we can recover $k$-sparse vectors $x$. -\end{itemize} - - - - -\end{block} - -\begin{block}{Theoretical Guarantees} - -With small RIP constants $(\delta \leq 0.25)$ for $M$ and some assumption on the noise $\epsilon$: - -{\bf Theorem \cite{candes}} - -If node $i$ has degree $\Delta$ and $n_{\text{rows}}(M) \geq C_1 \mu \Delta \log n$, then, w.h.p., - -$$\| \hat x - x^* \|_2 \leq C (1 + \log^{3/2}(n))\sqrt{\frac{\Delta \log n}{n_{\text{rows}}(M) }}$$ - - -\end{block} - - - - -%---------------------------------------------------------------------------------------- -% RESULTS -%---------------------------------------------------------------------------------------- - -\begin{block}{RIP Experiments} - -\begin{center} -\begin{table} -\begin{tabular}{c | c | c | c | c } -& $c$ = 100, &$c$ = 1000,& $c$ = 100, &$c$ = 1000,\\ -& $i$ = 0.1& $i$ = 0.1& $i$ = 0.05& $i$ = 0.05\\ - \hline - $\delta_4$ & 0.54 & 0.37 &0.43&0.23 \\ - \end{tabular} - \caption{RIP constant for a small graph. Here, $c$ is the number of cascades and $i$ is $p_{\text{init}}$.} -\end{table} -\end{center} - - -\end{block} - -%---------------------------------------------------------------------------------------- - - -\end{column} % End of the second column - -\begin{column}{\sepwid}\end{column} % Empty spacer column - -\begin{column}{\onecolwid} % The third column - -%---------------------------------------------------------------------------------------- -% IOVERALL COMPARISON -%---------------------------------------------------------------------------------------- - -%\vspace{- 14.2 cm} -%\begin{center} -%{\includegraphics[height=7em]{cmu_logo.png}} % First university/lab logo on the left -%\end{center} - -%\vspace{4 cm} - -\begin{alertblock}{Experimental Results} - - - - -\end{alertblock} - -%---------------------------------------------------------------------------------------- - - -%---------------------------------------------------------------------------------------- -% CONCLUSION -%---------------------------------------------------------------------------------------- - -\begin{block}{Conclusion} - -\begin{center} - - -{\bf Graph reconstruction can naturally be expressed as Sparse Recovery. Understanding properties of $M$, for example RIP, leads to theoretical guarantees on the reconstruction.} - -\end{center} - -\end{block} - -%---------------------------------------------------------------------------------------- -% REFERENCES -%---------------------------------------------------------------------------------------- - -\begin{block}{References} - -\begin{thebibliography}{42} - -\bibitem{candes} -Candès, E., and Plan, Y. -\newblock {\it A Probabilistic and RIPless Theory of Compressed Sensing} -\newblock Information Theory, IEEE Transactions on, 57(11): 7235--7254, -\newblock 2011. -\end{thebibliography} - -\end{block} - -%---------------------------------------------------------------------------------------- - -\end{column} % End of the third column - -\end{columns} % End of all the columns in the poster - -\end{frame} % End of the enclosing frame - - - -\end{document} diff --git a/notes/defs.tex b/notes/defs.tex deleted file mode 100644 index ca5dd55..0000000 --- a/notes/defs.tex +++ /dev/null @@ -1,17 +0,0 @@ -\newtheorem{theorem}{Theorem}[section] -\newtheorem{lemma}[theorem]{Lemma} -\newtheorem{proposition}[theorem]{Proposition} -\newtheorem{corollary}[theorem]{Corollary} -\theoremstyle{definition} -\newtheorem*{remark}{Remark} - -\DeclareMathOperator{\E}{\mathbb{E}} -\let\P\relax -\DeclareMathOperator{\P}{\mathbb{P}} -\newcommand{\ex}[1]{\E\left[#1\right]} -\newcommand{\prob}[1]{\P\left[#1\right]} -\newcommand{\inprod}[1]{\left\langle #1 \right\rangle} -\newcommand{\R}{\mathbb{R}} -\newcommand{\N}{\mathbb{N}} -\newcommand{\eps}{\varepsilon} -\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}} diff --git a/notes/formalisation.pdf b/notes/formalisation.pdf deleted file mode 100644 index 760abe0..0000000 Binary files a/notes/formalisation.pdf and /dev/null differ diff --git a/notes/formalisation.tex b/notes/formalisation.tex deleted file mode 100644 index c2ba8c0..0000000 --- a/notes/formalisation.tex +++ /dev/null @@ -1,317 +0,0 @@ -\documentclass[10pt]{article} -\usepackage[utf8x]{inputenc} -\usepackage{fullpage} -\usepackage{amsmath, amssymb, bbm, amsthm} -\usepackage[pagebackref=true,breaklinks=true,colorlinks=true,backref=false]{hyperref} -\usepackage[capitalize, noabbrev]{cleveref} -\usepackage{microtype} -\input{defs} - -\date{} -\title{Cracking Cascades: Applying Sparse Recovery to Graph Reconstruction} -\author{Jean Pouget-Abadie, Thibaut Horel} - -\begin{document} -\maketitle - -\section{Preliminaries: What is a measurement?} - -There is an unknown graph $G=(V,E)$ that we wish to reconstruct by observing -information cascades. An information cascade is a random discrete-time process -where we get to observe the set of vertices infected at each time step. - -The key contribution is to formulate this problem as a sparse recovery problem -akin to those found in the compressed sensing literature. The parallel between -the two problems is as follows: -\begin{itemize} - \item the unknown \emph{signal} $s$ is a vector in $\{0,1\}^{V\times V}$ - with $s_{u,v} = 1$ iff. $(u,v)\in E$. - \item the \emph{sensing matrix} is given by the cascades. The \emph{sensing - matrix} maps the signal to the observation space. Since the cascades - are random time processes, they define a sequence of random - observations. - \item the \emph{observations} are the set of vertices infected at each time - step in each cascade. -\end{itemize} - -Let us define $n\eqdef |V|$. If we assume that all the vertices in $G$ have -a degree bounded by $d\in\N_+$, then the signal $s$ is $nd$-sparse. Assuming -that the sensing matrix has good random properties, then we can hope to -recover $s$ with $O(nd\log n)$ random observations. - -Two main goals: -\begin{itemize} - \item understand to which extent the probabilistic cascade models translate - into incoherence properties of the sensing matrix and allow for an - accurate reconstruction of the unknown graph with a small number of - observations. - \item compare the empirical performance of this compressed sensing approach - to prior graph reconstruction approaches. -\end{itemize} - -\begin{remark} -\begin{itemize} - \item It seems that we will need a \emph{complete} probabilistic model for - the cascades. By complete I mean that we need to specify a joint - distribution on the source of the cascade (\emph{e.g.} the source is - chosen uniformly at random) and on the cascade diffusion process. Such - a model is probably not very realistic, but is necessary to obtain - theoretical guarantees. The unrealism of the model is not a problem - \emph{per se}; the eventual validity of this approach will be confirmed - experimentally anyway. - \item It seems likely that we will want the rows of the sensing matrix to - be independent realizations of the same distribution. However, - \textbf{this will never be the case} if we have one row for each time - step of each cascade: the rows within the same cascade are obviously - highly correlated. We might have to \emph{aggregate} them (for example - through concatenation?) in a sensible way to have with one row per - cascade. - \item Something which makes this problem very different from the usual - compressed sensing problems is that the measurements depend on the - signal we wish to recover: cascades propagate along the edges contained - in the signal. More precisely, not only the rows of the sensing matrix - are correlated (if we do not aggregate them by cascade) but also they - are correlated through the signal. Is this a problem? I don't know, - this looks scary though. - \item A slightly more general model which we will need in the independent - cascade case considers a signal $s\in[0,1]^{V\times V}$. We can always - go from this model to the binary case by first recovering the - non-binary $s$ and then applying a thresholding procedure. But if we - only care about recovering the support of $s$, can we do better than - this two-step process? (TO DO: read stuff about support recovery). -\end{itemize} -\end{remark} - -Previous stuff: -\begin{itemize} -\item Introduction -\item Simple graphs (cycle, star, path, tree?) -\item Isotropic condition -\item RIP condition -\end{itemize} - -One last property we are going to look for in measurements is the {\bf Restriced Isometry Property} (RIP), which was introduced in \cite{???} and is ubiquitous to the compressed sensing literature. Suppose we are given a measurement matrix $M$. For every $T \in \mathbb{N}$, we define $\delta_T$ to be the smallest quantity such that, for any submatrix $M_T$ of $M$ obtained by extracting $T$ columns of $M$, and for any vector $c$ such that $\| c\|_2 = 1$: - -$$1 - \delta_T \leq \| M_T c \|_2^2 \leq 1 + \delta_T$$ - -For small $\delta_T$, the above equation defines a `loose' orthonormality property for the columns of $M$. - -\section{Warm up: the voter model} - - -In the voter model, there are two types of nodes, {\it red} and {\it blue}. We fix a horizon $T$. At $t=0$, we decide on a set of {\it blue} nodes, called the seeding set. At each time step $t$, each node $u$ chooses one of its neighbors uniformly, with probability $1/deg(u)$, and adopts the color of that neighbor. A cascade in the voter model is the evolution of which nodes $blue$ nodes for each time step $ 0 \leq t \leq T$. - -For simplicity, we suppose that every node has a probability $1/p_\text{init}$ of being a part of the seeding set of a cascade. The following analysis can easily be adapted to a non-uniform probability vector. We also suppose that the graphs includes self-loops, meaning the node has the option to keep his color for the next round. This is a common assumption which does not change any of the following results. - -\paragraph{Notation} -Denote by $(M^t_k)_{1 \leq k \leq m; 1 \leq t \leq T}$ the set of blue nodes in cascade $k$ at time step $t$. In other words, $M^t_k$ is a vector in $\mathbb{R}^n$ such that: - -\[ (M^t_k)_j = \left\{ - \begin{array}{l l} - 1 & \quad \text{if $j$ is $blue$ in cascade $k$ at time step $t$}\\ - 0 & \quad \text{otherwise} - \end{array} \right.\] - -Consider a node $i$, and let $\theta_i$ be the vector representing the neighbors of $i$ in the graph {\cal G}. In other words, $\theta_i$ is a vector in $\mathbb{R}^n$ such that: - -\[ \theta_{ij} = \left\{ - \begin{array}{l l} - 1/deg(i) & \quad \text{if $j$ is a parent of $i$ in {\cal G}}\\ - 0 & \quad \text{otherwise} - \end{array} \right.\] - -Let $b^t_k$ be the vector representing the probability that in cascade $k$ at time step $t$ node $i$ stays or becomes $blue$. - -\[ b^t_k = \left\{ - \begin{array}{l l} - (\text{Number of $blue$ neighbors in $(k, t-1$})/deg(i) & \quad \text{if $t\neq 0$}\\ - 1/p_{\text{init}} & \quad \text{otherwise} - \end{array} \right.\] - -Our objective is to find the neighbors of $i$ in {\cal G} from $(M^t_k)$ i.e recover the vector $\theta_i$. Notice that for each $(k,t)$, we have: - -$$\forall t< T-1, k, \ M^t_k \cdot \theta_i = b^{t+1}_k$$ - -We can concatenate each equality in matrix form $M$, such that the rows of $M$ are the observed $blue$ nodes $M^t_k$ and the entries of $\vec b$ are the corresponding probabilities: - -$$M \cdot \theta_i = \vec b$$ - -Note that if $M$ had full column rank, then we could recover $\theta_i$ from $\vec b$. This is however an unreasonable assumption, even after having observed many cascades. We must explore which assumptions on $M$ allow us to recover $\theta_i$ from a small number of cascades. Further note that we do not immediately observe $\vec b$. Instead, we observe a bernoulli vector, whose j$^{th}$ entry is equal to $1$ with probability $b_j$ and $0$ otherwise. We will denote this vector $\vec w$, and denote by $\vec \epsilon$ the quantum noise vector such that $\vec w = \vec b + \vec \epsilon$. We can therefore reformulate the problem with our observables: - -$$M \cdot \theta_i + \vec \epsilon = \vec w$$ - -We hope to show that we can exploit the sparsity of vector $\theta_i$ to apply common sparse recovery techniques. - -\section{Independent Cascade Model} - -\subsection{The Model} - -Recall the independent cascade model, where nodes have 3 states: susceptible, -active, and inactive. All nodes start either as susceptible or active. At each -time step $t$, for every (active, susceptible)-pair $(j,i)$, $j$ attempts to -infect $i$ with probability $p_{j,i}$ and become inactive. If $j$ succeeds, $i$ -will become active at time step $t+1$. The cascade continues until no active -nodes remain. - -Consider a node $i$. If $p_{j,i}$ is the probability that node $j$ infects $i$ -at the next step, let $\theta_{i,j} := \log (1 - p_{j,i})$. Note that -$\theta_{j,i} = 0 \Leftrightarrow p_{j,i} = 0$. Also note that: - -$$\sum_{j\in S} \theta_{j,i} = \log \prod_{j \in S} (1 - p_{j,i}) = \log \mathbb{P}(\text{i not infected by any j} \in S)$$ - -Our objective is to recover the vector $\theta_{*,i}$ for every node $i$. - -\subsection{Formulating the sparse recovery problem} - -The idea behind what follows is the assumption that $p_{*,i}$ and therefore $\theta_{*,i}$ are sparse vectors: a node has few neighbors (parents in the directed graph) compared to the size of the graph. - -Let $\mathbbm{1}_S$ be the indicator variable for the nodes that are active at -a certain time step $t$ of a certain cascade. Under the previous assumption, we -have: -$$\inprod{\mathbbm{1}_S, \theta_{*,i}} = \log \mathbb{P}( \text{i not infected at } t+1| S \text{ active at } t)$$ - -Suppose we observe a series of cascades $(A_k^t)$, where $A_k^t$ are the nodes -active in cascade $k$ at time $t$ (these are exactly the nodes which became -infected at time $t-1$). Recall that once a node has been infected (becomes -active), it cannot be infected in the future (is inactive). Let us therefore -look at all the sets $(A_k^t)_{t \leq t_i^k} $ such that for every cascade $k$, -$t_i^k > t$, where $t_i^k = + \infty$ if $i$ was not infected in that cascade. -If we concatenate these rows $\mathbbm{1}_{A_k^t}$ into a matrix $M$ (for -measurement), we have: - -$$M \theta_{*,i} = b$$ -where $b$ is a column vector corresponding to $\log \mathbb{P}(\text{i was not infected at } t+1| S \text{ active at } t)$. Note that even if we had access to $b$ directly but $M$ does not have full column rank, then solving for $x$ is not trivial! However, under sparsity assumptions on $x$, we could apply results for the sparse recovery/sparse coding/compressed sensing literature - -In reality, we do not have access to $b$. We have access to the realization vector $\vec w$ of $\{i$ was infected at $t+1 | S$ active at $t\}$: - -$$\vec w\sim B(1 - e^{M \theta_{*,i}})$$ - -where the operation $f: \vec v \rightarrow 1 - e^{\vec v}$ is done element-wise and $B(\vec p)$ is a vector whose entries are bernoulli variables of parameter the entries of $\vec p$. - -\subsection{Maximum likelihood problem} - -The maximum likelihood problem takes the following form: -\begin{displaymath} - \begin{split} - \max_\theta &\sum_{k,t}\left(b_{k,t}\log\left(1-\exp\inprod{A_k^t, \theta}\right) - + (1-b_{k,t})\log\exp\inprod{A_k^t,\theta}\right)\\ - \text{s.t. }& \|\theta\|_1 \leq k - \end{split} -\end{displaymath} -which we can rewrite as: -\begin{displaymath} - \begin{split} - \max_\theta &\sum_{k,t}b_{k,t}\log\left(\exp\left(-\inprod{A_k^t, \theta}\right)-1\right) - + \sum_{k,t}\inprod{A_k^t,\theta}\\ - \text{s.t. }& \|\theta\|_1 \leq k - \end{split} -\end{displaymath} -In this form it is easy to see that this is a convex program: the objective -function is the sum of a linear function and a concave function. - -\paragraph{Questions} to check: is the program also convex in the $p_{j,i}$'s? -What is the theory of sparse recovery for maximum likelihood problems? (TO DO: -read about Bregman divergence, I think it could be related). - -\subsection{Results under strong assumptions} - -What can we hope to have etc. (If M has full column rank or if same sets S occur frequently). - -\subsection{Result under RIP condition} - -For ease of notation, let $\theta := \theta_{*,i}$ The following resolution of the above problem is heavily borrowed from \cite{2005math......3066C}. We can see $w$ as a bounded perturbation of $f(b)$: -$$w = f(M\theta) + \epsilon, \text{ where } \|\epsilon\|_{\infty} < 1$$ - -Define the following program, which we can solve with all available information $M$ and $w$: -\begin{equation} -(N_1) \quad \min_\theta \|\theta\|_1 \quad \text{subject to} \quad \|e^{M\theta} - (1 - w) \|_2 \leq \|\epsilon \|_\infty \nonumber -\end{equation} - -While we can hope for the columns of $M$ to be `loosely' orthogonal, we must normalize them if we hope for $\delta_T$ to be small. Let $M'$ correspond to $M$ where the columns have been normalized, we will therefore define $\epsilon'$ and $M'$, such that: -$$w = f(M'\theta) + \epsilon'$$ -We see that $\epsilon'= w - (w - \epsilon)^{\vec \lambda}$ (element-wise operation), where $\lambda_i = 1/\sqrt{\sum \text{col}(M)_i}$ such that $\|\epsilon'\|_{\infty} < 1$. We can define the following program: - -\begin{equation} -(N_2) \quad \min_\theta \|\theta\|_1 \quad \text{subject to} \quad \|e^{M'\theta} - (1 - w) \|_2 \leq \|\epsilon' \|_\infty \nonumber -\end{equation} - -With the corresponding $\delta_T'$ defined for $M'$, we are going to prove the following: - -\begin{theorem} - -Suppose that all nodes in our graph have {\bf bounded degree by $S$}, that no edge between $i$ and its neighbors has a probability of $1$, such that $\forall j, p_{j,i} < 1 - \eta \ \text{ where } 0<\eta<1$, and that $\delta'_{3S} + 3 \delta'_{4S} < 2$. If $\theta_0$ is the true vector for node $i$, the solution $\hat \theta$ to $(N_2)$ obeys: -$$\|\hat \theta - \theta_0 \|_2 \leq \frac{C_S}{\eta} \|\epsilon' \|_{\infty} $$ where $C_S$ is well behaved ($\approx 10$ for reasonable values of $\delta'_{4S}$) -\end{theorem} - - -\begin{proof} -The proof borrows heavily from \cite{2005math......3066C}. Define $h := \theta - \theta_0$. We list the results as they are given in \cite{2005math......3066C} or as they can be adapted in our case. - -\begin{itemize} -\item $\|e^{M'\theta} - e^{M' \theta_0} \|_2 \leq \| e^{M'\theta} - ( 1 - w) \|_2 + \| e^{M'\theta} - (1 - w) \|_2 \leq 2 \|\epsilon' \|_{\infty} $ -\item $\|h\|_2^2 \leq (1 + |T_0|/N) \|h_{T_{01}}\|_2^2$, where N is an arbitrary positive integer, and $h_{T_{01}}$ is the vector $h$, where all entries are masked to 0, except those corresponding to the non-zero entries of $\theta_0$ and the $N$ largest entries of $h_{T_0^c}$. -\item $\|e^{M'h} \|_2 \geq |\eta| \|M'h\|_2 \geq |\eta| C_{T_0, N} \cdot \|h_{T_{0,1}} \|_2$, where $C_{T_0,N} := \sqrt{1 - \delta'_{N + T_0}} - \sqrt{T_0 / N} \sqrt{1 + \delta'_N}$ -\end{itemize} -The first bullet point is obtained by the triangle inequality. The second bullet point is identical to the proof in \cite{2005math......3066C}. The first inequality in the third bullet-point is obtained because by assumption all entries of $M'\theta$ are greater than $\ln \eta$ (remember that the $e^{\vec v}$ operation is done element-wise). The rest follows like in \cite{2005math......3066C}, and we obtain that: -$$\|h\|_2 \leq 2 \frac{\sqrt{1 + T_0/N}}{ \eta \cdot C_{T_0, N}} \|\epsilon'\|_{\infty} = \frac{C_S}{\eta} \|\epsilon'\|_{\infty}$$ -For $\delta'_{4S} \leq \frac{1}{5}, C_S \approx 8.82$, for $\delta'_{4S} \leq \frac{1}{4}, C_S \approx 10.47$ -\end{proof} - -\begin{remark} A few notes on the result: -\begin{itemize} -\item With this naive reduction of $w$ as an $e-$pertubation to $f(b)$, the best $\|\epsilon\|_{\infty}$ we can hope for is $1 - \eta$. Seeing $w$ as more than an arbitrary perturbation will most likely yield a much better upper-bound. -\item The upper-bound is done in $\log$-space for the true probabilities: -$$\frac{C_S}{\eta} > \sum \left[ \log \left(\frac{1 - p_j}{1 - p_j'} \right) \right]^2 > \frac{\eta \log(1/\eta)}{1 - \eta} \| \frac{1 - p_j}{1 - p'_j} - 1 \| $$ - -This bullet-point needs to be finished -\item To what extent can we approximate our measurement matrix to a matrix with independent bernoulli entries? What $\delta_T$'s can we hope for? Which constant $C_S$ can we hope for? How does it change with the rows of $M$? -\end{itemize} - -\end{remark} - - - -\subsection{Results under isotropic condition} - - - -% Given $M$ and $w$, we can solve the following maximum log-likelihood problem: -% \begin{equation} -% \max_{\|\theta_{*,i}\|_0 \leq k} -% \label{eq:max_loglik_obj} -% \end{equation} - -% Since optimizing under the $\|\theta\|_0 \leq k$ is hard, we solve instead the Lagrangian relaxation: - -% \begin{equation} -% \max_{\theta_{*,i}} + \lambda \|\theta_{*,i}\|_0 -% \label{eq:lagra_max_loglik_obj} -% \end{equation} - - -% Eq~\ref{eq:lagra_max_loglik_obj} is concave in $\theta_{*,i}$ and can therefore be solved efficiently. We would like to exploit the sparsity of $\theta_{*,i}$ to show that we recover the true sparse $\theta_{*,i}$ with few measurements (rows of M). - - -\bibliography{sparse}{} -\bibliographystyle{plain} - -\end{document} - - - - -% Let us now consider the matrix M, whose rows are the vectors $\mathbbm{1}_S$. If we could estimate the vector $X := \mathbb{P}(i \text{ not infected at } t | S \text{ active at } t)$, we could solve for $\theta$: -% $$M \theta_{*i} = X, \text{ where }\theta_{*,i} \text{ is a sparse vector}$$ - -% There are many powerful tools from sparse coding theory which would allow us to recover the sparsest $\theta$ under certain assumptions on $M$. One such assumption is incoherence of the columns of $M$. In other words, if we can show that for all columns $M_i, M_j$ of M, we have $|| \leq \mu \|M_i\| \|M_j\|$, i.e M is $\mu-incoherent$, then for any node $i$ with fewer than $k = \frac{1}{2 \mu}$ parents, we can recover $\theta_{*i}$ exactly. To get a better intuition of what this means, if the entries of $M$ were independent gaussian variables, we would need approximately $k \ln(\frac{n}{k})$ measurements to recover $\theta_{*i}$ exactly. - -% This would seem to suggest that the quality of observed cascades is much more important than quantity--a notion which has not truly been explored in graph reconstruction as of yet. Another stronger assumption on the columns of M is separability (see Moitra's work in Topic Modeling) leading to even stronger results. - -% In reality of course, we do not know $X$. Instead, we have access to $\hat X$ which estimates $X$ in the following way. For any set S in any cascade that was active before i was, we obtain a binary signal, $\hat X_S:=0$ if $i$ was infected immediately afterwards and $\hat X_S:=1$ otherwise. As the number of times set S was observed active, we have $$\frac{1}{m} \sum \hat X_S \rightarrow \mathbb{P}(i \text{ not infected at }t+1 | S \text{ active at } t)$$. - -% Certain sets might have been infected many times in our set of cascades, certain sets might have been infected only once. One possible way to rewrite the program above is: $$\hat X_j = H(w_j - e^{(M \theta_{*i})_j}), \text{ where } w\sim {\cal U}[0,1] \text{ and H is the step function } \mathbbm{1}_{\cdot \geq 0}$$ - -% This problem seems to be closely related to the growing field of 1-bit compressed sensing, with at least one difference: when scientists consider 1bitCS with noise, they are usually in a position to decide which measurement to do next to recover $\theta_{*i}$, whereas in this framework, we are limited to an observer status\footnote{Research in the other direction could be very interesting: see discussion about exploration-exploitation with Thibaut}. - -% I need to do more reading on the subject, namely to understand what guarantees people have found for sample complexity. - diff --git a/notes/images/greedy_sparse_comparison.png b/notes/images/greedy_sparse_comparison.png deleted file mode 100644 index 3fab5b0..0000000 Binary files a/notes/images/greedy_sparse_comparison.png and /dev/null differ diff --git a/notes/images/icc.png b/notes/images/icc.png deleted file mode 100644 index 2148eed..0000000 Binary files a/notes/images/icc.png and /dev/null differ diff --git a/notes/images/sparse_recovery_illustration.pdf b/notes/images/sparse_recovery_illustration.pdf deleted file mode 100644 index 984bb19..0000000 Binary files a/notes/images/sparse_recovery_illustration.pdf and /dev/null differ diff --git a/notes/images/sparse_recovery_illustration.svg b/notes/images/sparse_recovery_illustration.svg deleted file mode 100644 index bef82c2..0000000 --- a/notes/images/sparse_recovery_illustration.svg +++ /dev/null @@ -1,655 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - image/svg+xml - - - - - - - - diff --git a/notes/images/sparse_recovery_illustration_copy.pdf b/notes/images/sparse_recovery_illustration_copy.pdf deleted file mode 100644 index a43f1f0..0000000 Binary files a/notes/images/sparse_recovery_illustration_copy.pdf and /dev/null differ diff --git a/notes/images/voter.png b/notes/images/voter.png deleted file mode 100644 index c1325cb..0000000 Binary files a/notes/images/voter.png and /dev/null differ diff --git a/notes/maximum_likelihood_approach.tex b/notes/maximum_likelihood_approach.tex deleted file mode 100644 index 4a22158..0000000 --- a/notes/maximum_likelihood_approach.tex +++ /dev/null @@ -1,230 +0,0 @@ -\documentclass[11pt]{article} -\usepackage{fullpage, amsmath, amssymb, amsthm} -\usepackage{color} -\newtheorem{theorem}{Theorem} -\newtheorem{lemma}{Lemma} -\newtheorem{remark}{Remark} -\newtheorem{proposition}{Proposition} - -\title{Maximum Likelihood Approach} -\author{Jean Pouget-Abadie} - -\begin{document} - -\maketitle - -We consider the node $\alpha$. We index the measurements by $i \in [1, n]$. Let $b^i$ be the indicator variable for node $\alpha$ active at the round following measurememt $i$ and let $x^i$ be the vector of active nodes for measurement $i$. Recall that: - -\begin{equation} -\label{eq:probability_of_infection} -1 - \exp(\langle x^i, \theta \rangle) = \mathbb{P}(\text{node } \alpha \text{ is active at the following round}) -\end{equation} - -The likelihood problem can be formulated as such: - -\begin{equation} -\label{eq:main_formulation} -\min_{\theta \in \mathbb{R}^p} \quad \lambda_n \| \theta \|_1 + \sum^n_{i=1} - b^i \log \left(e^{-\langle x^i, \theta \rangle} - 1 \right) - \langle x^i, \theta \rangle -\end{equation} - -We define $f(\theta):= \sum^n_{i=1} - b^i \log \left(\exp(-\langle x^i, \theta \rangle) \right) - \langle x^i, \theta \rangle$ such that Eq.~\ref{eq:main_formulation} can be rewritten as: - -\begin{equation} -\label{eq:small_formulation} -\min_{\theta \in \mathbb{R}^p} \quad f(\theta) + \lambda_n \| \theta \|_1 -\end{equation} - -We cite the following theorem from \cite{Negahban:2009} (roughly, because the statements of the theorem are either slightly wrong or unclear): - -\begin{proposition} -\label{thm:cited_theorem} -Let ${\cal C}:=\{\Delta \in \mathbb{R}^p : \exists S \subset [1, n] \ s.t. \ \|\Delta_{S^c}\|_1 \leq 3 \| \Delta_S \|_1 \}$. Suppose that $\theta^*$ is s-sparse, and the following two conditions are met: -\begin{equation} -\lambda_n \geq 2 \|\nabla f(\theta^*) \|_\infty -\label{eq:lambda_condition} -\end{equation} -\begin{equation} -\forall \Delta \in {\cal C}, \ \Delta^T \cdot \nabla^2 f(\theta^*) \cdot \Delta \geq \gamma_n \| \Delta \|_2^2 -\label{eq:RSC_condition} -\end{equation} -then: -\begin{equation} -\| \theta - \theta^* \|_2 \leq \frac{\sqrt{s} \lambda_n}{\gamma_n} -\end{equation} -\end{proposition} - -It remains to show the two conditions for Proposition~\ref{thm:cited_theorem} are met. - -\section*{Condition~\ref{eq:lambda_condition}} -Condition~\ref{eq:lambda_condition} requires us to find an upper-bound for $ 2 \|\nabla f(\theta^*) \|_\infty$. If we only consider the first measurement of every cascade, this can be done easily. Let $N$ be the number of cascades (to distinguish from $n$ number of total measurements). Begin by noting that: - -\begin{equation} -\nabla_k f(\theta) = \sum^n_{i=1} \frac{b^i x^i_k}{1 - e^{\langle x^i, \theta \rangle}} - \sum^n_{i=1} x^i_k = \sum_{i=1}^n x^k_i \left( \frac{b^i}{\mathbb{P}(\text{node } \alpha \text { infected})} - 1\right) -\end{equation} - -\begin{lemma} -\label{lem:subgaussian_variable} -$\nabla f(\theta^*)$ is a $N/p_{\min}$-subgaussian variable, where $p_{\min}$ is the smallest non-zero link to node $\alpha$. -\end{lemma} - -\begin{proof} -\begin{align} -\mathbb{E} \left( \nabla_k f(\theta) \right) & = \sum_{i=1}^N \mathbb{E} \left[ x^i_k \left( \frac{b^i}{\mathbb{P}(\text{node } \alpha \text { infected})} - 1\right) \right] \nonumber \\ -& = \sum_{i=1}^N \mathbb{E}_S \left[ \mathbb{E}\left[x^i_k \left( \frac{b^i}{\mathbb{P}(\text{node } \alpha \text { infected})} - 1\right) \middle| S \right] \right] \quad \text{where S is the seed set} \nonumber \\ -& = \sum_{i=1}^N \mathbb{E}\left[x^i_k \left( \frac{ \mathbb{E}_S \left[ b^i \middle| S \right]}{\mathbb{P}(\text{node } \alpha \text { infected})} - 1\right) \right] \nonumber \\ -& = 0 -\end{align} -Therefore, $\nabla f(\theta^*)$ is the sum of zero-mean variables, upper-bounded by $1/p_{\min}$. It follows that $\nabla f(\theta^*)$ is $N/p_{\min}$-subgaussian. -\end{proof} - -By union bound and characterization of sub-gaussian variables: - -\begin{equation} -\mathbb{P}(\| \nabla f(\theta) \|_{\infty} > \lambda) \leq 2 \exp \left( -\frac{\lambda^2 p_{\min}}{2n} + \log p \right) -\end{equation} - -In conclusion, for $\delta>0$, $\lambda := 2 \sqrt{\frac{n^{\delta + 1} \log p}{p_{\min}}}$ meets Condition~\ref{eq:lambda_condition} with probability $1 - \exp(-n^\delta \log p )$ - - -\section*{Condition~\ref{eq:RSC_condition}} - -Note that: -\begin{equation} -\nabla_{kj} f(\theta) = \sum_{i=1}^n \frac{b^i x_k^i x_j^i e^{\langle x^i, \theta \rangle}}{\left(1 - e^{\langle x^i, \theta \rangle} \right)^2} = \sum_{i=1}^n b^i x_k^i x_j^i \frac{\mathbb{P}(\text{node } \alpha \text { not infected})}{\mathbb{P}(\text{node } \alpha \text { infected})^2} -\end{equation} - - -We are going to explicitate a constant $\gamma$ such that: $\forall \Delta \in {\cal C}, \Delta^T \cdot \nabla^2 f(\theta^*) \cdot \Delta \geq \gamma n \|\Delta\|_2^2$. - -\paragraph{Notation} Let $p_i := \mathbb{P}(\text{node } \alpha \text { infected})$. Let $Z^i_k := b^i x^i_k \frac{1-p_i}{p_i^2}$and let $Z^i_{k,j} := b^i x^i_k x^i_j \frac{1-p_i}{p_i^2}$. We also define $Z_k := \sum_i Z^i_k$ and $Z_{k,j} := \sum_i Z^i_{k,j}$. - -\begin{align} -\Delta^T \cdot \nabla^2 f(\theta^*) \cdot \Delta & = \sum_k \Delta_k^2 \left[ \sum_i b^i x_k^i \frac{1 - p_i}{p_i^2} \right] + 2 \sum_{k< j} \Delta_k \Delta_j \left[ \sum_i b^i x^i_k x^i_j \frac{1 - p_i}{p_i^2}\right] \nonumber \\ -& = \sum_k \Delta_k^2 Z_k + 2 \sum_{k< j} \Delta_k \Delta_j Z_{k,j} \nonumber -\end{align} - - - -\begin{lemma} -\label{lem:first_term} -Suppose that $\forall k$, $\mathbb{E}_{S(k)} \left[ \frac{1}{p_i} \right] \geq 1 + \alpha$, then with probability greater than $1 - 2 p e^{-2 \alpha^2 (1-c)^2p_{\min}^2 p_{\text{init}}^2 N}$, $$\forall k, \ Z_k > c \alpha p_{\text{init}} N$$ -\end{lemma} - -\begin{proof} -Let $S(k)$ denote the active set conditioned on the fact that node $k$ is active AND that one parent is active. We denote $p_{S(k)}$ the probability that the active set verifies the previous two conditions. -\begin{align} -\nonumber -\mathbb{E}[Z^i_k] & = p_{S(k)} \mathbb{E}_{S(k)} \left[ \mathbb{E}[b^i | S(k)] \frac{1 - p_i}{p_i^2} \right] \\ \nonumber -& = p_{S(k)} \left( \mathbb{E}_{S(k)} \left[ \frac{1}{p_i} \right] - 1 \right) \\ \nonumber -& \geq \alpha p_{S(k)} \quad \text{by assumption} -\end{align} - -Note that $|Z^i_k| < \frac{1}{p_{\text{min}}^2}$ {\it a.s.}. By Hoeffding's first inequality, for $0 c \beta p_{S(k,j)} N \right) & \leq e^{- \frac{2(1-c)^2}{Nb^2} \left( \mathbb{E}_{S(k,j)} \left[ \frac{1}{p_i} \right] - 1 \right)^2} \\ \nonumber -& \leq e^{-2 \alpha^2 (1-c)^2p_{\min}^2 p_{S(k,j)}^2 N} -\end{align} -We conclude by union bound. -\end{proof} - -\begin{proposition} -Suppose that $\forall k,j$, $1 + \alpha \leq \mathbb{E}_{S(k, j)} \left[ \frac{1}{p_i} \right] \leq 1 + \beta$, then with probability greater than $1 - XXX$, condition~\ref{eq:RSC_condition} is met with $\gamma_n = \gamma n$ where $\gamma := \alpha p_{S(k)} - 16 \sqrt{s} \beta p_{S(k,j)}$ -\end{proposition} - -\begin{proof} -(Sketch) By the triangle inequality followed by MacLaurin's inequality, -\begin{align} -\mid \frac{2}{\binom{n}{2}} \sum_{i \frac{1}{p_{init}} \nonumber -% \end{align} - -% We can conclude using the following Hoeffding inequality for independent random variables bounded by $[0, b_i]$ by noticing that our variables are bounded by above by $\frac{1 - p_{\min}}{p_{\min}^2}$ - -% \paragraph{Hoeffding's inequality} -% \begin{equation} -% \label{eq:hoeffding_inequality} -% \mathbb{P} \left(\sum Z_i \geq \mathbb{E}[\sum Z_i] - t \right) \leq \exp\left(- \frac{2 N t^2}{b^2} \right) -% \end{equation} - -% It follows that for $c<1$ with probability $1 - \exp \left( - n^3 c^2 s^2 p_{init}^4 p_{\min}^6 \frac{\eta^2}{(1 - \eta)^4} \right)$, we have that $$\sum_k \Delta_k^2 \left[ \sum_i b^i x_k^i \frac{1 - p_i}{p_i^2} \right] \geq \gamma N =: (1 -c) s p_{init}^2 p_{\min} \frac{\eta}{(1 - \eta)^2} N$$ - -% \begin{remark} -% Would it be possible to extend this result using Azuma's inequality on Martingales to not just the first measurement of every cascade? -% \end{remark} - -% \subsection*{Second term} -% We are now going to find an upper-bound on the term $\sum_i b^i x^i_k x^i_j \frac{1 - p_i}{p_i^2}$. - - -\section*{Conclusion} - -Suppose we show that Condition~\ref{eq:RSC_condition} is met for $\gamma_n = \gamma N$, then we have the following theorems: - -\begin{theorem} -\label{thm:l2_bound} -Suppose that $\theta^* \in \mathbb{R}^p$ is s-sparse and that we choose $\lambda_n = 2 \sqrt{\frac{n^{\delta + 1} \log p}{p_{\min}}}$ for $\delta >0$, then with probability $1 - \exp(-n^\delta \log p )$, we have -\begin{equation} -\|\hat \theta - \theta^* \|_2 \leq \frac{2}{\gamma} \sqrt{\frac{s \log p}{p_{\min} N^{1 - \delta}}} -\end{equation} -\end{theorem} - -Note that we can choose $\delta = 0$ in high-dimensions since the probability of success will then be $1 - \frac{1}{p} \approx 1$. We can also conclude on support recovery with the following reasoning. - -\begin{theorem} -\label{thm:support_recovery} -Suppose that $N$ is chosen such that $\frac{2}{\gamma}\sqrt{\frac{s \log p}{p_{\min} N^{1 -\delta}}} < \eta$ and suppose we only keep as elements of the support of $\theta^*$ the coordinates $\hat \theta_i > \eta$. Then no wrong parent will be included, and all `strong' parents will be included, where `strong' signifies: $\theta^*_i > 2 \eta$. -\end{theorem} - -It follows that we have found an ${\cal O}(s \log p)$ algorithm for recovering the graph, with better constants and fewer assumptions than any previous work. - -\bibliography{sparse} -\bibliographystyle{plain} - -\end{document} \ No newline at end of file diff --git a/notes/presentation/beamer.tex b/notes/presentation/beamer.tex deleted file mode 100644 index 7d4b5c1..0000000 --- a/notes/presentation/beamer.tex +++ /dev/null @@ -1,381 +0,0 @@ -\documentclass[10pt]{beamer} - -\usepackage{amssymb, amsmath, graphicx, amsfonts, color} - -\title{Estimating a Graph's Parameters from Cascades} -\author{Jean (John) Pouget-Abadie \\ Joint Work with Thibaut (T-bo) Horel} -\date{} - -\begin{document} - -\begin{frame} -\titlepage -\end{frame} - -\begin{frame} -\frametitle{Example} -\begin{figure} -\includegraphics[scale=.25]{../images/drawing.pdf} -\caption{A network} -\end{figure} -\end{frame} - -\begin{frame} -\frametitle{Example} -\begin{figure} -\includegraphics[scale=.25]{../images/noedges_step1.pdf} -\caption{Cascade 1: $t=0$} -\end{figure} -\end{frame} - -\begin{frame} -\frametitle{Example} -\begin{figure} -\includegraphics[scale=.25]{../images/noedges_step2.pdf} -\caption{Cascade 1: $t=1$} -\end{figure} -\end{frame} - -\begin{frame} -\frametitle{Example} -\begin{figure} -\includegraphics[scale=.25]{../images/noedges_step3.pdf} -\caption{Cascade 1: $t=2$} -\end{figure} -\end{frame} - -\begin{frame} -\frametitle{Example} -\begin{figure} -\includegraphics[scale=.25]{../images/noedges_step1_cascade2} -\caption{Cascade 2: $t=0$} -\end{figure} -\end{frame} - -\begin{frame} -\frametitle{Example} -\begin{figure} -\includegraphics[scale=.25]{../images/noedges_step2_cascade2} -\caption{Cascade 2: $t=1$} -\end{figure} -\end{frame} - -\begin{frame} -\frametitle{Example} -\begin{figure} -\includegraphics[scale=.25]{../images/noedges_step3_cascade2} -\caption{Cascade 2: $t=2$} -\end{figure} -\end{frame} - - -\begin{frame} -\frametitle{Context} - -Notation: -\begin{itemize} -\item $({\cal G}, \theta)$ : graph, parameters -\item Cascade: diffusion process of a `behavior' on $({\cal G}, \theta)$ -\item $(X_t)_c$ : set of `active' nodes at time t for cascade $c$ -\end{itemize} - - -\begin{table} -\begin{tabular}{c c} -Graph $\implies$ Cascades & Cascades $\implies$ Graph \\ \hline -$({\cal G}, \theta)$ is known & $(X_t)_c$ is observed \\ -Predict $(X_t) | X_0$ & Recover $({\cal G}, \theta)$ \\ -\end{tabular} -\end{table} - -Summary: -\begin{itemize} -\item Many algorithms \emph{require} knowledge of $({\cal G}, \theta)$ -\item {\bf Graph Inference} is the problem of \emph{learning} $({\cal G}, \theta)$ -\end{itemize} -\end{frame} - -\begin{frame} -\begin{block}{Decomposability} -Learning the graph $\Leftrightarrow$ Learning the parents of a single node -\end{block} -\end{frame} - - -\begin{frame} -\frametitle{Problem Statement} -\begin{itemize} -\pause -\item Can we learn ${\cal G}$ from $(X_t)_c$? -\pause -\item How many cascades? How many steps in each cascade? -\pause -\item Can we learn $\theta$ from $(X_t)_c$? -\pause -\item How does the error decrease with $n_{\text{cascades}}$? -\pause -\item Are there graphs which are easy to learn? Harder to learn? -\pause -\item What kind of diffusion processes can we consider? -\pause -\item What is the minimal number of cascades required to learn $({\cal G}, \theta)$? -\end{itemize} -\end{frame} - -\begin{frame} -\frametitle{Notation} -\begin{itemize} -\item n : number of measurements -\item N : number of cascades -\item m : number of nodes -\item s : degree of node considered -\end{itemize} -\end{frame} - - -\begin{frame} -\frametitle{Related Work} - -\begin{itemize} -\pause -\item Can we learn ${\cal G}$ from $(X_t)_c$? -\pause -\\{\color{blue} Yes} -\pause -\item How many cascades? How many steps in each cascade? -\pause -\\ {\color{blue} poly(s)$ \log m$ cascades} -\pause -\item Can we learn $\theta$ from $(X_t)_c$? -\pause -\\ {\color{blue} (?)} -\pause -\item How does the error decrease with $n_{\text{cascades}}$? -\pause -\\ {\color{blue} (?)} -\pause -\item Are there graphs which are easy to learn? Harder to learn? -\pause -\\{\color{blue} Sparse Graphs are easy} -\pause -\item What kind of diffusion processes can we consider? -\pause -\\{\color{blue} IC Model (discrete and continuous)} -\pause -\item What is the minimal number of cascades required to learn $({\cal G}, \theta)$? -\pause -\\{\color{blue} (?)\dots$s \log m/s$ in specific setting} -\end{itemize} -\end{frame} - - - -\begin{frame} -\frametitle{Our Work} -\begin{itemize} -\pause -\item Can we learn ${\cal G}$ from $(X_t)_c$? -\pause -\\{\color{blue} Yes} $\rightarrow$ {\color{red} Yes} -\pause -\item How many cascades? How many steps in each cascade? -\pause -\\ {\color{blue} poly(s)$ \log m$ cascades} $\rightarrow$ {\color{red} $s\log m$ measurements} -\pause -\item Can we learn $\theta$ from $(X_t)_c$? -\pause -\\ {\color{blue} (?)} $\rightarrow$ {\color{red} Yes!} -\pause -\item How does the error decrease with $n_{\text{cascades}}$? -\pause -\\ {\color{blue} (?)} $\rightarrow$ {\color{red} ${\cal O}(\sqrt{s\log m/n})$} -\pause -\item Are there graphs which are easy to learn? Harder to learn? -\pause -\\ {\color{blue} Sparse Graphs are easy} $\rightarrow$ {\color{red} Approx. sparsity is also easy} -\pause -\item What kind of diffusion processes can we consider? -\pause -\\ {\color{blue} IC Model (discrete, continuous)} $\rightarrow$ {\color{red} Large class of Cascade Models} -\pause -\item What is the minimal number of cascades required to learn $({\cal G}, \theta)$? -\pause -\\{\color{blue} $s \log m/s$ in specific setting} $\rightarrow$ {\color{red} $s \log m/s$ for approx. sparse graphs} -\end{itemize} - -\end{frame} - -\begin{frame} -\frametitle{Voter Model} -\begin{itemize} -\pause -\item {\color{red} Red} and {\color{blue} Blue} nodes. At every step, each node $i$ chooses one of its neighbors $j$ with probability $p_{j,i}$ and adopts that color at $t+1$ -\pause -\item If {\color{blue} Blue} is `contagious' state: -\pause -\begin{equation} -\nonumber -\mathbb{P}(i \in X^{t+1}|X^t) = \sum_{j \in {\cal N}(i)\cap X^t} p_{ji} = X^t \cdot \theta_i -\end{equation} -\end{itemize} -\end{frame} - -\begin{frame} -\frametitle{Independent Cascade Model} -\begin{itemize} -\pause -\item Each `infected' node $i$ has a probability $p_{i,j}$ of infecting each of his neighbors $j$. -\pause -\item A node stays `infected' for a single turn. Then it becomes `inactive'. -$$\mathbb{P}(j \text{ becomes infected at t+1}|X_{t}) = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j})$$ -\end{itemize} -\end{frame} - -\begin{frame} -\frametitle{Independent Cascade Model} -\begin{align} -\mathbb{P}(j\in X_{t+1}|X_{t}) & = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j}) \\ -& = 1 - \exp \left[ \sum_{i \in {\cal N}(j) \cap X_{t}} \log(1 - p_{i,j}) \right] \\ -& = 1 - \exp \left[ X_{t} \cdot \theta_{i,j}\right] -\end{align} - -where $\theta_{i,j} := \log (1 - p_{i,j})$ and $\theta_i := (\theta_{i,j})_j$ -\\[5ex] -\begin{itemize} -\item Support of $\vec \theta$ $\Leftrightarrow$ support of $\vec p$ -\end{itemize} -\end{frame} - -\begin{frame} -\frametitle{Model Comparison} -\begin{table} -\centering -\begin{tabular}{c | c} -Voter Model & Indep. Casc. Model \\[1ex] -\hline \\[.1ex] -Markov & Markov \\[3ex] -Indep. prob. of $\in X^{t+1} | X^t$ & Indep. prob. of $\in X^{t+1} | X^t$ \\[3ex] -$\mathbb{P}(j\in X_{t+1}|X_{t}) = X_{t} \cdot \theta_{i}$ & $\mathbb{P}(j\in X_{t+1}|X_{t}) = 1 - \exp(X_{t} \cdot \theta_{i})$ \\[3ex] -Always Susceptible & Susceptible until infected \\ -\includegraphics[scale=.4]{../images/voter_model.pdf} & \includegraphics[scale=.3]{../images/icc_model.pdf} \\ -\end{tabular} -\end{table} -\end{frame} - -\begin{frame} -\frametitle{Generalized Linear Cascade Models} -\begin{definition} -{\bf Generalized Linear Cascade Model} with inverse link function $f : \mathbb{R} \rightarrow [0,1]$: -\begin{itemize} -\item for each \emph{susceptible} node $j$ in state $s$ at $t$, $\mathbb{P}(j \in X^{t+1}|X^t)$ is a Bernoulli of parameter $f(\theta_j \cdot X^t)$ -\end{itemize} -\end{definition} -\end{frame} - -\begin{frame} -\frametitle{Sparse Recovery} -\begin{figure} -\includegraphics[scale=.6]{../images/sparse_recovery_illustration.pdf} -\caption{$f(X\cdot\theta) = b$} -\end{figure} -\end{frame} - - -\begin{frame} -\frametitle{$\ell1$ penalized Maximum Likelihood} -\begin{itemize} -\item Decomposable node by node -\item Sum over susceptible steps -\end{itemize} - -\begin{block}{Likelihood function} -\begin{equation} -\nonumber -{\cal L}(\theta| x^1, \dots x^n) = \frac{1}{{\cal T}_i} \sum_{t \in {\cal T}_i} x^{t+1}_i \log f(\theta_i \cdot x^t) + (1 - x^{t+1}_i) \log(1 - f(\theta_i \cdot x^t)) -\end{equation} -\end{block} - -\begin{block}{Algorithm} -\begin{equation} -\nonumber -\theta \in \arg \max_\theta {\cal L}(\theta| x^1, \dots x^n) - \lambda \|\theta\|_1 -\end{equation} -\end{block} - -\end{frame} - -\begin{frame} -\frametitle{Main Result} -\begin{theorem} -Assume condition on the Hessian and certain regularity properties on $f$, then $\exists C>0$ depending only on the properties of the ${\cal G}$, with high probability: -$$\| \theta^*_i - \hat \theta_i \|_2 \leq C\sqrt{\frac{s\log m}{n}}$$ -\end{theorem} - -\begin{corollary} -By thresholding $\hat \theta_i$, if $n > C' s \log m$, we recover the support of $\theta^*$ and therefore the edges of ${\cal G}$ -\end{corollary} - -\end{frame} - -\begin{frame} -\frametitle{Approximate Sparsity} -\begin{itemize} -\item $\theta^*_{\lceil s \rceil}$ best s-sparse approximation to $\theta^*$ -\item $\|\theta^* - \theta^*_{\lceil s \rceil} \|_1$: `tail' of $\theta^*$ -\end{itemize} -\begin{theorem} -Assume condition on Hessian and certain regularity properties on $f$, then $\exists C_1, C_2>0$ depending only on the properties of ${\cal G}$, with high probability: -\begin{equation} -\|\hat \theta_i - \theta^*_i\|_2 \leq C_1 \sqrt{\frac{s\log m}{n}} + C_2 \sqrt[4]{\frac{s\log m}{n}}\|\theta^* - \theta^*_{\lceil s \rceil} \|_1 -\end{equation} -\end{theorem} -\end{frame} - -\begin{frame} -\frametitle{Lower Bound} -\begin{itemize} -\item Under correlation decay assumption for the IC model, ${\Omega}(s \log N/s)$ cascades necessary for graph reconstruction (Netrapalli et Sanghavi SIGMETRICS'12) -\item Adapting (Price \& Woodruff STOC'12), in the approximately sparse case, any algorithm for any generalized linear cascade model such that: -$$\|\hat \theta - \theta^*\|_2 \leq C \|\theta^* - \theta^*_{\lfloor s \rfloor}\|_2$$ -requires ${\cal O}(s \log (n/s)/\log C)$ measurement. -\end{itemize} -\end{frame} - -\begin{frame} -\frametitle{(RE) assumptions} -\begin{block}{Assumption on Hessian} -\begin{itemize} -\item -Hessian has to verify a `restricted eigenvalue property' i.e smallest eigenvalue on sparse vectors is away from $0$. -\end{itemize} -\end{block} - -\begin{block}{From Hessian to Gram Matrix} -\begin{itemize} -\item $\mathbb{E}[X X^T]$ : `expected' Gram matrix of observations -\item $\mathbb{E}[X X^T]_{i,i}$ : $\mathbb{P}$ that node $i$ is infected -\item $\mathbb{E}[X X^T]_{i,j}$ : $\mathbb{P} $that node $i$ and node $j$ are infected simultaneously -\end{itemize} -\end{block} -\end{frame} - -\begin{frame} -\frametitle{Future Work} - -\begin{block}{Linear Threshold Model} -\begin{itemize} -\item Linear threshold model is a generalized linear cascade, with non-differential inverse link function. $$\mathbb{P}(j \in X^{t+1}|X^t) = sign(\theta_j \cdot X^t - t_j)$$ -\end{itemize} -\end{block} - -\begin{block}{Noisy Influence Maximization} -\end{block} - -\begin{block}{Confidence Intervals} -\end{block} - -\begin{block}{Active Learning} -\end{block} -\end{frame} - -\end{document} diff --git a/notes/presentation/beamer_2.tex b/notes/presentation/beamer_2.tex deleted file mode 100644 index ecc66ed..0000000 --- a/notes/presentation/beamer_2.tex +++ /dev/null @@ -1,397 +0,0 @@ -\documentclass[10pt]{beamer} - -\usepackage{amssymb, amsmath, graphicx, amsfonts, color, amsthm, wasysym} - -\newtheorem{proposition}{Proposition} - -\title{Learning from Diffusion processes} -\subtitle{What cascades really teach us about networks} -\author{Jean (John) Pouget-Abadie \\ Joint Work with Thibaut (T-bo) Horel} - -\begin{document} - -\begin{frame} -\titlepage -\end{frame} - -\begin{frame} -\frametitle{Introduction} - -%notes: Learn what? the network, the parameters of the diffusion process. - -\begin{table} -\centering -\begin{tabular}{c | c} -Network & Diffusion process \\[1ex] -\hline -\\ -Airports & Infectious diseases (SARS) \\ - & Delays (Eyjafjallajökull) \\[3ex] -Social Network & Infectious diseases (flu) \\ - & Behaviors (Ice Bucket Challenge) \\[3ex] -Internet/WWW & Information diffusion (Memes, Pirated content \dots) -\end{tabular} -\end{table} - -\end{frame} - -%%%%%%%%%%%%%%%%%%%%%%% - -\begin{frame} -\frametitle{Introduction} - -What do we know? What do we want to know? - -\begin{itemize} -\item We know the {\bf airport network} structure. We observe delays. Can we learn how delays propagate? -\item We (sometimes) know the {\bf social network}. We observe behaviors. Can we learn who influences whom? -\item Rarely know {\bf blog network}. We observe discussions. Can we learn who learns from whom? -\end{itemize} - -\end{frame} - -%%%%%%%%%%%%%%%%%%%%%%% - -\begin{frame} -\frametitle{Independent Cascade Model} - -\begin{figure} -\includegraphics[scale=.3]{figures/weighted_graph.png} -\caption{Weighted, directed graph} -\end{figure} - -\begin{itemize} -\item At $t=0$, nodes are in three possible states: susceptible, {\color{blue} infected}, {\color{red} dead} -\pause -\item At time step $t$, each {\color{blue} infected} node $i$ has a ``one-shot'' probability $p_{i,j}$ of infecting each of his susceptible neighbors $j$ at $t+1$. -\pause -\item A node stays {\color{blue} infected} for one round, then it {\color{red} dies} -\pause -\item At $t=0$, each node is {\color{blue} infected} with probability $p_{\text{init}}$ -\pause -\item Process continues until random time $T$ when no more nodes can become infected. -\pause -\item $X_t$: set of {\color{blue} infected} nodes at time $t$ -\pause -\item A {\bf cascade} is an instance of the ICC model: $(X_t)_{t=0, t=T}$ -\end{itemize} - -%Notes: Revisit the celebrated independent cascade model -> Influence maximisation is tractable, requires knowledge of weights - -\end{frame} - -%%%%%%%%%%%%%%%%%%%%%%%%% - -\begin{frame} -\frametitle{Independent Cascade Model} - -\begin{figure} -\includegraphics[scale=.5]{figures/weighted_graph.png} -\caption{Weighted, directed graph} -\end{figure} - -\begin{block}{Example} -\begin{itemize} -\item At $t=0$, the {\color{orange} orange} node is infected, and the two other nodes are susceptible. $X_0 = $({\color{orange} orange}) -\item At $t=1$, the {\color{orange}} node infects the {\color{blue} blue} node and fails to infect the {\color{green} green} node. The {\color{orange} orange} node dies. $X_1 = $({\color{blue} blue}) -\item At $t=2$, {\color{blue} blue} dies. $X_2 = \emptyset$ -\end{itemize} -\end{block} - -\end{frame} - -%%%%%%%%%%%%%%%%%%%%%%%%% - -\begin{frame} -\frametitle{Independent Cascade Model} - -\begin{figure} -\includegraphics[scale=.5]{figures/weighted_graph.png} -\caption{Weighted, directed graph} -\end{figure} - -\begin{itemize} -\item If the {\color{orange} orange} node and the {\color{green} green} node are infected at $t=0$, what is the probability that the {\color{blue} blue} node is infected at $t=1$? -$$1 - \mathbb{P}(\text{not infected}) = 1 - (1 - .45)(1-.04)$$ -\end{itemize} - - -\end{frame} - -%%%%%%%%%%%%%%%%%%%%%%%%% - -\begin{frame} -\frametitle{Independent Cascade Model} -\begin{figure} -\includegraphics[scale=.5]{figures/weighted_graph.png} -\caption{Weighted, directed graph} -\end{figure} - -\begin{itemize} -\item In general, for each susceptible node $j$: -$$\mathbb{P}(j \text{ becomes infected at t+1}|X_{t}) = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j})$$ -\end{itemize} - -\end{frame} - - -%%%%%%%%%%%%%%%%%%%%%%%%% - -\begin{frame} -\frametitle{Independent Cascade Model} -For each susceptible node $j$, the event that it becomes {\color{blue} infected} conditioned on previous time step is a Bernoulli: -$$(j \in X_{t+1} | X_t) \sim {\cal B} \big(f(X_t \cdot \theta_j) \big)$$ -\begin{itemize} -\item $\theta_{i,j} := \log(1 - p_{i,j})$ -\item $\theta_j := (0, 0, 0, \theta_{4,j}, 0 \dots, \theta_{k,j}, \dots)$ -\item $f : x \mapsto 1 - e^x$ -\begin{align*} -\mathbb{P}(j\in X_{t+1}|X_{t}) & = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j}) \\ -& = 1 - \exp \left[ \sum_{i \in {\cal N}(j) \cap X_{t}} \log(1 - p_{i,j}) \right] \\ -& = 1 - \exp \left[ X_{t} \cdot \theta_{j}\right] -\end{align*} -\end{itemize} -\end{frame} - - - -%%%%%%%%%%%%%%%%%%%%%%%%% - -\begin{frame} -\frametitle{Independent Cascade Model} -For each susceptible node $j$, the event that it becomes {\color{blue} infected} conditioned on previous time step is a Bernoulli: -$$(j \in X_{t+1} | X_t) \sim {\cal B} \big(f(X_t \cdot \theta_j) \big)$$ - -\begin{block}{Decomposability} -\begin{itemize} -\item Conditioned on $X_t$, the state of the nodes at the next time step are mutually independent -\item We can learn the parents of each node independently -\end{itemize} -\end{block} - -\begin{block}{Sparsity} -\begin{itemize} -\item $\theta_{i,j} = 0 \Leftrightarrow \log(1 - p_{i,j}) = 0 \Leftrightarrow p_{i,j} = 0$ -\item If graph is ``sparse'', then $p_{j}$ is sparse, then $\theta_j$ is sparse. -\end{itemize} -\end{block} -\end{frame} - - - -%%%%%%%%%%%%%%%%%%%%%%% - -\begin{frame} -\frametitle{Learning from Diffusion Processes} -\begin{block}{Problem Statement} -\begin{itemize} -\item We are given a graph ${\cal G}$, and a diffusion process $f$ parameterized by $\left((\theta_j)_j, p_{\text{init}}\right)$. -\item Suppose we {\bf only} observe $(X_t)$ from the diffusion process. -\item Under what conditions can we learn $\theta_{i,j}$ for all $(i,j)$? -\end{itemize} -\end{block} -\end{frame} - -%%%%%%%%%%%%%%%%%%%%%%%%% - -\begin{frame} -\frametitle{Sparse Recovery} -\begin{figure} -\includegraphics[scale=.6]{../images/sparse_recovery_illustration_copy.pdf} -\caption{$\mathbb{P}(j \in X_{t+1}| X_t) = f(X_t\cdot \theta)$} -\end{figure} -\end{frame} - - - -%%%%%%%%%%%%%%%%%%%%% - -\begin{frame} -\frametitle{Learning from Diffusion Processes} - -% \begin{figure} -% \includegraphics[scale=.4]{../images/sparse_recovery_illustration.pdf} -% \caption{Generalized Cascade Model for node $i$} -% \end{figure} - -\begin{block}{Likelihood Function} -\begin{align*} -{\cal L}(\theta_1, \dots, \theta_m| X_1, \dots X_n) = \sum_{i=1}^m \sum_{t} & X_{t+1}^i \log f(\theta_i \cdot X_t) + \\ -& (1 - X_{t+1}^i) \log(1 - f(\theta_i \cdot X_t)) -\end{align*} -\end{block} - -\begin{block}{MLE} -For each node $i$, $$\theta_i \in \arg \max \frac{1}{n_i}{\cal {L}}_i(\theta_i | X_1, X_2, \dots, X_{n_i}) - \lambda \|\theta_i\|_1$$ -\end{block} - -\end{frame} - -%%%%%%%%%%%%%%%%%%%%% - -\begin{frame} -\frametitle{Conditions} -\begin{block}{On $f$} -\begin{itemize} -\item $\log f$ and $\log (1-f)$ have to be concave -\item $\log f$ and $\log (1-f)$ have to have bounded gradient -\end{itemize} -\end{block} - -\begin{block}{On $(X_t)$} -\begin{itemize} -\item Want ${\cal H}$, the hessian of ${\cal L}$ with respect to $\theta$, to be well-conditioned. -\item $ n < dim(\theta) \implies {\cal H}$ is degenerate. -\item {\bf Restricted Eigenvalue condition} = ``almost invertible'' on sparse vectors. -\end{itemize} -\end{block} - -\end{frame} - -%%%%%%%%%%%%%%%%%%%%%%%% - -\begin{frame} -\frametitle{Restricted Eigenvalue Condition} - -\begin{definition} -For a set $S$, -$${\cal C} := \{ \Delta : \|\Delta\|_2 = 1, \|\Delta_{\bar S}\|_1 \leq 3 \| \Delta_S\|_1 \}$$ -${\cal H}$ verifies the $(S, \gamma)$-RE condition if: -$$\forall \Delta \in {\cal C}, \Delta {\cal H} \Delta \geq \gamma$$ -\end{definition} -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%% - -\begin{frame} -\frametitle{Main Result} -Adapting a result from \cite{Negahban:2009}, we have the following theorem: - -\begin{theorem} -For node $i$, assume -\begin{itemize} -\item the Hessian verifies the $(S,\gamma)$-RE condition where $S$ is the set of parents of node $i$ (support of $\theta_i$) -\item $f$ and $1-f$ are log-concave -\item $|(\log f)'| < \frac{1}{\alpha}$ and $|(\log 1- f)'| < \frac{1}{\alpha}$ -\end{itemize} then with high probability: -$$\| \theta^*_i - \hat \theta_i \|_2 \leq \frac{6}{\gamma}\sqrt{\frac{s\log m}{\alpha n}}$$ -\end{theorem} - -\begin{corollary} -By thresholding $\hat \theta_i$, if $n > C' s \log m$, we recover the support of $\theta^*$ and therefore the edges of ${\cal G}$ -\end{corollary} - -\end{frame} - -%%%%%%%%%%%%%%%%%%%%%%%% - -\begin{frame} -\frametitle{Main result} - -\begin{block}{Correlation} -\begin{itemize} -\item Positive result despite correlated measurements \smiley -\item Independent measurements $\implies$ taking one measurement per cascade. -\end{itemize} -\end{block} - -\begin{block}{Statement w.r.t observations and not the model} -\begin{itemize} -\item The Hessian must verify the $(S,\gamma)$-RE condition \frownie -\item Can we make a conditional statement on $\theta$ and not $X_t$? -\end{itemize} -\end{block} - -\end{frame} - -%%%%%%%%%%%%%%%%%%%%%%% - -\begin{frame} -\frametitle{Restricted Eigenvalue Condition} - -\begin{block}{From Hessian to Gram Matrix} -\begin{itemize} -\item If $\log f$ and $\log 1 -f$ are strictly log-concave with constant $c$, then if $(S, \gamma)$-RE holds for the gram matrix $\frac{1}{n}X X^T$ , then $(S, c \gamma)$-RE holds for ${\cal H}$ -\end{itemize} -\end{block} - -\begin{block}{From Gram Matrix to Expected Gram Matrix} -\begin{itemize} -\item If $n > C' s^2 \log m$ and $(S, \gamma)$-RE holds for $\mathbb{E}(\frac{1}{n}XX^T)$, then $(S, \gamma/2)$-RE holds for $\frac{1}{n}XX^T$ w.h.p -\item $\mathbb{E}(\frac{1}{n}XX^T)$ only depends on $p_{\text{init}}$ and $(\theta_j)_j$ -\end{itemize} -\end{block} - -\begin{block}{Expected Gram Matrix} -\begin{itemize} -\item Diagonal : average number of times node is infected -\item Outer-diagonal : average number of times pair of nodes is infected {\emph together} -\end{itemize} -\end{block} - -\end{frame} - -%%%%%%%%%%%%%%%%%%% -\begin{frame} -\frametitle{Approximate Sparsity} -\begin{itemize} -\item $\theta^*_{\lceil s \rceil}$ best s-sparse approximation to $\theta^*$ -\item $\|\theta^* - \theta^*_{\lceil s \rceil} \|_1$: `tail' of $\theta^*$ -\end{itemize} -\begin{theorem} -Under similar conditions on $f$ and a relaxed RE condition, there $\exists C_1, C_2>0$ such that with high probability: -\begin{equation} -\|\hat \theta_i - \theta^*_i\|_2 \leq C_1 \sqrt{\frac{s\log m}{n}} + C_2 \sqrt[4]{\frac{s\log m}{n}}\|\theta^* - \theta^*_{\lceil s \rceil} \|_1 -\end{equation} -\end{theorem} -\end{frame} - -%%%%%%%%%%%%%%%%%%%%%%% - -\begin{frame} -\frametitle{Lower Bound} -\begin{itemize} -\item Under correlation decay assumption for the IC model, ${\Omega}(s \log N/s)$ cascades necessary for graph reconstruction (Netrapalli et Sanghavi SIGMETRICS'12) -\item Adapting (Price \& Woodruff STOC'12), in the approximately sparse case, any algorithm for any generalized linear cascade model such that: -$$\|\hat \theta - \theta^*\|_2 \leq C \|\theta^* - \theta^*_{\lfloor s \rfloor}\|_2$$ -requires ${\cal O}(s \log (n/s)/\log C)$ measurement. -\end{itemize} -\end{frame} - -%%%%%%%%%%%%%%%%%%%%%% - -\begin{frame} -\frametitle{Voter Model} -\begin{itemize} -\pause -\item {\color{red} Red} and {\color{blue} Blue} nodes. At every step, each node $i$ chooses one of its neighbors $j$ with probability $p_{j,i}$ and adopts that color at $t+1$ -\pause -\item If {\color{blue} Blue} is `contagious' state: -\pause -\begin{equation} -\nonumber -\mathbb{P}(i \in X^{t+1}|X^t) = \sum_{j \in {\cal N}(i)\cap X^t} p_{ji} = X^t \cdot \theta_i -\end{equation} -\end{itemize} -\end{frame} - -%%%%%%%%%%%%%%%%%%%%%%% - -\begin{frame} -\frametitle{Future Work} -\begin{itemize} -\item Lower bound restricted eigenvalues of expected gram matrix -\item Confidence Intervals -\item Show that $n > C' s \log m$ measurements are necessary w.r.t. expected hessian. -\item Linear Threshold model $\rightarrow$ 1-bit compressed sensing formulation -\item Better lower bounds -\item Active Learning -\end{itemize} -\end{frame} - - -%%%%%%%%%%%%%%%%% - -\bibliography{../../paper/sparse} -\bibliographystyle{apalike} - -\end{document} \ No newline at end of file diff --git a/notes/presentation/extended_abstract.txt b/notes/presentation/extended_abstract.txt deleted file mode 100644 index 47a12b9..0000000 --- a/notes/presentation/extended_abstract.txt +++ /dev/null @@ -1,10 +0,0 @@ -Title: How can we estimate the parameters of a graph by observing its cascades? - - -A standard problem in Social Network Theory is to understand how the parameters of a graph affect the properties of its cascades, which are diffusion processes that spread from node to node along the graph's weighted edges. In other words, can we predict cascades from the graph's parameters? - -Recent work has considered the dual problem: what knowledge about the existence of an edge in the graph do we gain by observing its cascades and how can we leverage that knowledge efficiently? A natural extension to this problem is: can we learn the weights of the graph's edges from cascades? These questions are fundamental to many aspects of social network theory: knowing the parameters of the graph precedes influence maximization or conversely influence minimization. - -In this talk, we present a "sparse recovery" framework for tackling the "graph inference" problem from cascades. This framework achieves a better convergence rate under weaker assumptions than prior work. We show that we (almost) match the lower bound and that our assumptions are robust to approximately sparse graphs. Finally, the approach is validated on synthetic networks. - -Joint work with Thibaut Horel \ No newline at end of file diff --git a/notes/presentation/figures/Screen Shot 2015-03-08 at 13.08.01.png b/notes/presentation/figures/Screen Shot 2015-03-08 at 13.08.01.png deleted file mode 100644 index b053f0c..0000000 Binary files a/notes/presentation/figures/Screen Shot 2015-03-08 at 13.08.01.png and /dev/null differ diff --git a/notes/presentation/figures/weighted_graph.png b/notes/presentation/figures/weighted_graph.png deleted file mode 100644 index 7deccc3..0000000 Binary files a/notes/presentation/figures/weighted_graph.png and /dev/null differ diff --git a/notes/presentation/sparse.bib b/notes/presentation/sparse.bib deleted file mode 100644 index 5df4b59..0000000 --- a/notes/presentation/sparse.bib +++ /dev/null @@ -1,503 +0,0 @@ -@article {CandesRomberTao:2006, -author = {Candès, Emmanuel J. and Romberg, Justin K. and Tao, Terence}, -title = {Stable signal recovery from incomplete and inaccurate measurements}, -journal = {Communications on Pure and Applied Mathematics}, -volume = {59}, -number = {8}, -publisher = {Wiley Subscription Services, Inc., A Wiley Company}, -issn = {1097-0312}, -pages = {1207--1223}, -year = {2006}, -} - - -@inproceedings{GomezRodriguez:2010, - author = {Gomez Rodriguez, Manuel and Leskovec, Jure and Krause, Andreas}, - title = {Inferring Networks of Diffusion and Influence}, - booktitle = {Proceedings of the 16th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining}, - series = {KDD '10}, - year = {2010}, - isbn = {978-1-4503-0055-1}, - location = {Washington, DC, USA}, - pages = {1019--1028}, - numpages = {10}, - publisher = {ACM}, - address = {New York, NY, USA}, -} - - -@article{Netrapalli:2012, - author = {Netrapalli, Praneeth and Sanghavi, Sujay}, - title = {Learning the Graph of Epidemic Cascades}, - journal = {SIGMETRICS Perform. Eval. Rev.}, - volume = {40}, - number = {1}, - month = {June}, - year = {2012}, - issn = {0163-5999}, - pages = {211--222}, - numpages = {12}, - publisher = {ACM}, - address = {New York, NY, USA}, - keywords = {cascades, epidemics, graph structure learning}, -} - -@article{Negahban:2009, - author = {Negahban, Sahand N. and Ravikumar, Pradeep and Wrainwright, Martin J. and Yu, Bin}, - title = {A Unified Framework for High-Dimensional Analysis of M-Estimators with Decomposable Regularizers}, - Journal = {Statistical Science}, - year = {2012}, - month = {December}, - volume = {27}, - number = {4}, - pages = {538--557}, -} - -@article{Zhao:2006, - author = {Zhao, Peng and Yu, Bin}, - title = {On Model Selection Consistency of Lasso}, - journal = {J. Mach. Learn. Res.}, - issue_date = {12/1/2006}, - volume = {7}, - month = dec, - year = {2006}, - issn = {1532-4435}, - pages = {2541--2563}, - numpages = {23}, - url = {http://dl.acm.org/citation.cfm?id=1248547.1248637}, - acmid = {1248637}, - publisher = {JMLR.org}, -} - -@inproceedings{Daneshmand:2014, - author = {Hadi Daneshmand and - Manuel Gomez{-}Rodriguez and - Le Song and - Bernhard Sch{\"{o}}lkopf}, - title = {Estimating Diffusion Network Structures: Recovery Conditions, Sample - Complexity {\&} Soft-thresholding Algorithm}, - booktitle = {Proceedings of the 31th International Conference on Machine Learning, - {ICML} 2014, Beijing, China, 21-26 June 2014}, - pages = {793--801}, - year = {2014}, - url = {http://jmlr.org/proceedings/papers/v32/daneshmand14.html}, - timestamp = {Fri, 07 Nov 2014 20:42:30 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/conf/icml/DaneshmandGSS14}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - -@inproceedings{Kempe:03, - author = {David Kempe and - Jon M. Kleinberg and - {\'{E}}va Tardos}, - title = {Maximizing the spread of influence through a social network}, - booktitle = {Proceedings of the Ninth {ACM} {SIGKDD} International Conference on - Knowledge Discovery and Data Mining, Washington, DC, USA, August 24 - - 27, 2003}, - pages = {137--146}, - year = {2003}, - url = {http://doi.acm.org/10.1145/956750.956769}, - doi = {10.1145/956750.956769}, - timestamp = {Mon, 13 Feb 2006 15:34:20 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/conf/kdd/KempeKT03}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - -@inproceedings{Abrahao:13, - author = {Bruno D. Abrahao and - Flavio Chierichetti and - Robert Kleinberg and - Alessandro Panconesi}, - title = {Trace complexity of network inference}, - booktitle = {The 19th {ACM} {SIGKDD} International Conference on Knowledge Discovery - and Data Mining, {KDD} 2013, Chicago, IL, USA, August 11-14, 2013}, - pages = {491--499}, - year = {2013}, - url = {http://doi.acm.org/10.1145/2487575.2487664}, - doi = {10.1145/2487575.2487664}, - timestamp = {Tue, 10 Sep 2013 10:11:57 +0200}, - biburl = {http://dblp.uni-trier.de/rec/bib/conf/kdd/AbrahaoCKP13}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - - -@article{vandegeer:2009, -author = "van de Geer, Sara A. and B{\"u}hlmann, Peter", -doi = "10.1214/09-EJS506", -fjournal = "Electronic Journal of Statistics", -journal = "Electron. J. Statist.", -pages = "1360--1392", -publisher = "The Institute of Mathematical Statistics and the Bernoulli Society", -title = "On the conditions used to prove oracle results for the Lasso", -url = "http://dx.doi.org/10.1214/09-EJS506", -volume = "3", -year = "2009" -} - -@article{vandegeer:2011, -author = "van de Geer, Sara and Bühlmann, Peter and Zhou, Shuheng", -doi = "10.1214/11-EJS624", -fjournal = "Electronic Journal of Statistics", -journal = "Electron. J. Statist.", -pages = "688--749", -publisher = "The Institute of Mathematical Statistics and the Bernoulli Society", -title = "The adaptive and the thresholded Lasso for potentially misspecified models (and a lower bound for the Lasso)", -url = "http://dx.doi.org/10.1214/11-EJS624", -volume = "5", -year = "2011" -} - -@article{Zou:2006, -author = {Zou, Hui}, -title = {The Adaptive Lasso and Its Oracle Properties}, -journal = {Journal of the American Statistical Association}, -volume = {101}, -number = {476}, -pages = {1418-1429}, -year = {2006}, -doi = {10.1198/016214506000000735}, -URL = {http://dx.doi.org/10.1198/016214506000000735}, -} - -@article{Jacques:2013, - author = {Laurent Jacques and - Jason N. Laska and - Petros T. Boufounos and - Richard G. Baraniuk}, - title = {Robust 1-Bit Compressive Sensing via Binary Stable Embeddings of Sparse - Vectors}, - journal = {{IEEE} Transactions on Information Theory}, - volume = {59}, - number = {4}, - pages = {2082--2102}, - year = {2013}, - url = {http://dx.doi.org/10.1109/TIT.2012.2234823}, - doi = {10.1109/TIT.2012.2234823}, - timestamp = {Tue, 09 Apr 2013 19:57:48 +0200}, - biburl = {http://dblp.uni-trier.de/rec/bib/journals/tit/JacquesLBB13}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - -@inproceedings{Boufounos:2008, - author = {Petros Boufounos and - Richard G. Baraniuk}, - title = {1-Bit compressive sensing}, - booktitle = {42nd Annual Conference on Information Sciences and Systems, {CISS} - 2008, Princeton, NJ, USA, 19-21 March 2008}, - pages = {16--21}, - year = {2008}, - url = {http://dx.doi.org/10.1109/CISS.2008.4558487}, - doi = {10.1109/CISS.2008.4558487}, - timestamp = {Wed, 15 Oct 2014 17:04:27 +0200}, - biburl = {http://dblp.uni-trier.de/rec/bib/conf/ciss/BoufounosB08}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - -@inproceedings{Gupta:2010, - author = {Ankit Gupta and - Robert Nowak and - Benjamin Recht}, - title = {Sample complexity for 1-bit compressed sensing and sparse classification}, - booktitle = {{IEEE} International Symposium on Information Theory, {ISIT} 2010, - June 13-18, 2010, Austin, Texas, USA, Proceedings}, - pages = {1553--1557}, - year = {2010}, - url = {http://dx.doi.org/10.1109/ISIT.2010.5513510}, - doi = {10.1109/ISIT.2010.5513510}, - timestamp = {Thu, 15 Jan 2015 17:11:50 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/conf/isit/GuptaNR10}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - -@article{Plan:2014, - author = {Yaniv Plan and - Roman Vershynin}, - title = {Dimension Reduction by Random Hyperplane Tessellations}, - journal = {Discrete {\&} Computational Geometry}, - volume = {51}, - number = {2}, - pages = {438--461}, - year = {2014}, - url = {http://dx.doi.org/10.1007/s00454-013-9561-6}, - doi = {10.1007/s00454-013-9561-6}, - timestamp = {Tue, 11 Feb 2014 13:48:56 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/journals/dcg/PlanV14}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - -@article{bickel:2009, -author = "Bickel, Peter J. and Ritov, Ya’acov and Tsybakov, Alexandre B.", -doi = "10.1214/08-AOS620", -fjournal = "The Annals of Statistics", -journal = "Ann. Statist.", -month = "08", -number = "4", -pages = "1705--1732", -publisher = "The Institute of Mathematical Statistics", -title = "Simultaneous analysis of Lasso and Dantzig selector", -url = "http://dx.doi.org/10.1214/08-AOS620", -volume = "37", -year = "2009" -} - -@article{raskutti:10, - author = {Garvesh Raskutti and - Martin J. Wainwright and - Bin Yu}, - title = {Restricted Eigenvalue Properties for Correlated Gaussian Designs}, - journal = {Journal of Machine Learning Research}, - volume = {11}, - pages = {2241--2259}, - year = {2010}, - url = {http://portal.acm.org/citation.cfm?id=1859929}, - timestamp = {Wed, 15 Oct 2014 17:04:32 +0200}, - biburl = {http://dblp.uni-trier.de/rec/bib/journals/jmlr/RaskuttiWY10}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - -@article{rudelson:13, - author = {Mark Rudelson and - Shuheng Zhou}, - title = {Reconstruction From Anisotropic Random Measurements}, - journal = {{IEEE} Transactions on Information Theory}, - volume = {59}, - number = {6}, - pages = {3434--3447}, - year = {2013}, - url = {http://dx.doi.org/10.1109/TIT.2013.2243201}, - doi = {10.1109/TIT.2013.2243201}, - timestamp = {Tue, 21 May 2013 14:15:50 +0200}, - biburl = {http://dblp.uni-trier.de/rec/bib/journals/tit/RudelsonZ13}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - -@article{bipw11, - author = {Khanh Do Ba and - Piotr Indyk and - Eric Price and - David P. Woodruff}, - title = {Lower Bounds for Sparse Recovery}, - journal = {CoRR}, - volume = {abs/1106.0365}, - year = {2011}, - url = {http://arxiv.org/abs/1106.0365}, - timestamp = {Mon, 05 Dec 2011 18:04:39 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/abs-1106-0365}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - -@inproceedings{pw11, - author = {Eric Price and - David P. Woodruff}, - title = {{(1} + eps)-Approximate Sparse Recovery}, - booktitle = {{IEEE} 52nd Annual Symposium on Foundations of Computer Science, {FOCS} - 2011, Palm Springs, CA, USA, October 22-25, 2011}, - pages = {295--304}, - year = {2011}, - crossref = {DBLP:conf/focs/2011}, - url = {http://dx.doi.org/10.1109/FOCS.2011.92}, - doi = {10.1109/FOCS.2011.92}, - timestamp = {Tue, 16 Dec 2014 09:57:24 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/conf/focs/PriceW11}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - -@proceedings{DBLP:conf/focs/2011, - editor = {Rafail Ostrovsky}, - title = {{IEEE} 52nd Annual Symposium on Foundations of Computer Science, {FOCS} - 2011, Palm Springs, CA, USA, October 22-25, 2011}, - publisher = {{IEEE} Computer Society}, - year = {2011}, - url = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6108120}, - isbn = {978-1-4577-1843-4}, - timestamp = {Mon, 15 Dec 2014 18:48:45 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/conf/focs/2011}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - -@inproceedings{pw12, - author = {Eric Price and - David P. Woodruff}, - title = {Applications of the Shannon-Hartley theorem to data streams and sparse - recovery}, - booktitle = {Proceedings of the 2012 {IEEE} International Symposium on Information - Theory, {ISIT} 2012, Cambridge, MA, USA, July 1-6, 2012}, - pages = {2446--2450}, - year = {2012}, - crossref = {DBLP:conf/isit/2012}, - url = {http://dx.doi.org/10.1109/ISIT.2012.6283954}, - doi = {10.1109/ISIT.2012.6283954}, - timestamp = {Mon, 01 Oct 2012 17:34:07 +0200}, - biburl = {http://dblp.uni-trier.de/rec/bib/conf/isit/PriceW12}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - -@proceedings{DBLP:conf/isit/2012, - title = {Proceedings of the 2012 {IEEE} International Symposium on Information - Theory, {ISIT} 2012, Cambridge, MA, USA, July 1-6, 2012}, - publisher = {{IEEE}}, - year = {2012}, - url = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6268627}, - isbn = {978-1-4673-2580-6}, - timestamp = {Mon, 01 Oct 2012 17:33:45 +0200}, - biburl = {http://dblp.uni-trier.de/rec/bib/conf/isit/2012}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - -@article{Leskovec:2010, - author = {Jure Leskovec and - Deepayan Chakrabarti and - Jon M. Kleinberg and - Christos Faloutsos and - Zoubin Ghahramani}, - title = {Kronecker Graphs: An Approach to Modeling Networks}, - journal = {Journal of Machine Learning Research}, - volume = {11}, - pages = {985--1042}, - year = {2010}, - url = {http://doi.acm.org/10.1145/1756006.1756039}, - doi = {10.1145/1756006.1756039}, - timestamp = {Thu, 22 Apr 2010 13:26:26 +0200}, - biburl = {http://dblp.uni-trier.de/rec/bib/journals/jmlr/LeskovecCKFG10}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - -@article{Holme:2002, - author= {Petter Holme and Beom Jun Kim}, - title = {Growing scale-free networks with tunable clustering}, - journal = {Physical review E}, - volume = {65}, - issue = {2}, - pages = {026--107}, - year = {2002} -} - - -@article{watts:1998, - Annote = {10.1038/30918}, - Author = {Watts, Duncan J. and Strogatz, Steven H.}, - Date = {1998/06/04/print}, - Isbn = {0028-0836}, - Journal = {Nature}, - Number = {6684}, - Pages = {440--442}, - Read = {0}, - Title = {Collective dynamics of `small-world' networks}, - Url = {http://dx.doi.org/10.1038/30918}, - Volume = {393}, - Year = {1998}, -} - -@article{barabasi:2001, - author = {R{\'{e}}ka Albert and - Albert{-}L{\'{a}}szl{\'{o}} Barab{\'{a}}si}, - title = {Statistical mechanics of complex networks}, - journal = {CoRR}, - volume = {cond-mat/0106096}, - year = {2001}, - url = {http://arxiv.org/abs/cond-mat/0106096}, - timestamp = {Mon, 05 Dec 2011 18:05:15 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/cond-mat-0106096}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - - -@article{gomezbalduzzi:2011, - author = {Manuel Gomez{-}Rodriguez and - David Balduzzi and - Bernhard Sch{\"{o}}lkopf}, - title = {Uncovering the Temporal Dynamics of Diffusion Networks}, - journal = {CoRR}, - volume = {abs/1105.0697}, - year = {2011}, - url = {http://arxiv.org/abs/1105.0697}, - timestamp = {Mon, 05 Dec 2011 18:05:23 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/abs-1105-0697}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - -@article{Nowell08, - author = {Liben-Nowell, David and Kleinberg, Jon}, - biburl = {http://www.bibsonomy.org/bibtex/250b9b1ca1849fa9cb8bb92d6d9031436/mkroell}, - doi = {10.1073/pnas.0708471105}, - eprint = {http://www.pnas.org/content/105/12/4633.full.pdf+html}, - journal = {Proceedings of the National Academy of Sciences}, - keywords = {SNA graph networks}, - number = 12, - pages = {4633-4638}, - timestamp = {2008-10-09T10:32:56.000+0200}, - title = {{Tracing information flow on a global scale using Internet chain-letter data}}, - url = {http://www.pnas.org/content/105/12/4633.abstract}, - volume = 105, - year = 2008 -} - -@inproceedings{Leskovec07, - author = {Jure Leskovec and - Mary McGlohon and - Christos Faloutsos and - Natalie S. Glance and - Matthew Hurst}, - title = {Patterns of Cascading Behavior in Large Blog Graphs}, - booktitle = {Proceedings of the Seventh {SIAM} International Conference on Data - Mining, April 26-28, 2007, Minneapolis, Minnesota, {USA}}, - pages = {551--556}, - year = {2007}, - url = {http://dx.doi.org/10.1137/1.9781611972771.60}, - doi = {10.1137/1.9781611972771.60}, - timestamp = {Wed, 12 Feb 2014 17:08:15 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/conf/sdm/LeskovecMFGH07}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - - -@inproceedings{AdarA05, - author = {Eytan Adar and - Lada A. Adamic}, - title = {Tracking Information Epidemics in Blogspace}, - booktitle = {2005 {IEEE} / {WIC} / {ACM} International Conference on Web Intelligence - {(WI} 2005), 19-22 September 2005, Compiegne, France}, - pages = {207--214}, - year = {2005}, - url = {http://dx.doi.org/10.1109/WI.2005.151}, - doi = {10.1109/WI.2005.151}, - timestamp = {Tue, 12 Aug 2014 16:59:16 +0200}, - biburl = {http://dblp.uni-trier.de/rec/bib/conf/webi/AdarA05}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - -@inproceedings{Kleinberg:00, - author = {Jon M. Kleinberg}, - title = {The small-world phenomenon: an algorithm perspective}, - booktitle = {Proceedings of the Thirty-Second Annual {ACM} Symposium on Theory - of Computing, May 21-23, 2000, Portland, OR, {USA}}, - pages = {163--170}, - year = {2000}, - url = {http://doi.acm.org/10.1145/335305.335325}, - doi = {10.1145/335305.335325}, - timestamp = {Thu, 16 Feb 2012 12:06:08 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/conf/stoc/Kleinberg00}, - bibsource = {dblp computer science bibliography, http://dblp.org} -} - -@article{zhang2014, - title={Confidence intervals for low dimensional parameters in high dimensional linear models}, - author={Zhang, Cun-Hui and Zhang, Stephanie S}, - journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)}, - volume={76}, - number={1}, - pages={217--242}, - year={2014}, - publisher={Wiley Online Library} -} - -@article{javanmard2014, - title={Confidence intervals and hypothesis testing for high-dimensional regression}, - author={Javanmard, Adel and Montanari, Andrea}, - journal={The Journal of Machine Learning Research}, - volume={15}, - number={1}, - pages={2869--2909}, - year={2014}, - publisher={JMLR. org} -} diff --git a/notes/reportYaron.tex b/notes/reportYaron.tex deleted file mode 100644 index f4878c7..0000000 --- a/notes/reportYaron.tex +++ /dev/null @@ -1,354 +0,0 @@ -\documentclass[10pt]{article} -\usepackage{fullpage, amsmath, amsthm, amssymb} -\usepackage{graphicx, algorithm, algpseudocode} -\usepackage[utf8x]{inputenc} -\newtheorem{theorem}{Theorem} - -\title{Sparse Recovery for Graph Reconstruction} - -\date{} - -\begin{document} - -\maketitle - -\section{Introduction} - -Given a set of observed cascades, the graph reconstruction problem consists of finding the underlying graph on which these cascades spread. We explore different models for cascade creation, namely the voter model and the independent cascade model. Our objective is therefore to approximately compute the `inverse' of the cascade generation function from as few observations as possible. - -\section{Related Work} - -There have been several works tackling the graph reconstruction problem in variants of the independent cascade. We briefly summarize their results and approaches below. - -\begin{itemize} -\item Manuel Gomez-Rodriguez, Jure Leskovec, and Andreas Klause were amongst the first to tackle the problem of graph reconstruction from cascade observations \cite{gomez}. Their first method \textsc{NetInf}, and a series of other similar methods published subsequently, solves a heuristic which approximates a NP-hard maximum-likelihood formulation. Unfortunately \textsc{NetInf} has no known theoretical guarantees. One difficulty with comparing ourselves to their heuristic is that they assume a continuous time model, which is close but un-adaptable to our framework. Namely, as we will see in Section~\ref{sec:icc}, nodes in our model cannot influence nodes beyond the time step at which they are active. -\item Their paper was later followed by a publication from Praneeth Netrapalli and Sujay Sanghavi \cite{netrapalli}, which provides two algorithms with interesting theoretical guarantees. - \begin{itemize} - \item The first algorithm they discuss is a convex and parallelizable optimization problem. The algorithm is guaranteed to achieve perfect reconstruction of `strong' edges after ${\cal O}(\Delta^2 \log n)$ with high probability, where $\Delta$ is the maximum degree in the graph. There are several things to note about this result: - \begin{itemize} - \item Researchers believe the true threshold of cascades necessary to achieve perfect reconstruction is closer to ${\cal O}(\Delta \log n)$. - \item The big ${\cal O}$ is highly polynomial in the different constants of the model. The constant is of the order of billions even for very generous initializations of these parameters - \item The authors make a restrictive assumption, called `correlation decay', on the probabilities of each edge: $\forall i, \sum_{j \in {\cal N}(i)} p_{j,i} < 1 -\alpha$, where $0 < \alpha < 1$. - \end{itemize} - \item The second algorithm suggested in \cite{netrapalli}, to which we will compare ourselves in Section~\ref{sec:exp}, is \textsc{Greedy}. It achieves perfect reconstruction after ${\cal O}(d \log n)$ observed cascades with high probability. However, this guarantee has been proven only in the case of tree-graphs. The authors mention that it does fairly well in pratice on other types of graphs. - \end{itemize} -\item Finally, a recent paper by Bruno Abrahao, Flavio Chierichetti, Robert Kleinberg, Alessandro Panconesi gives several algorithms and a different approach to obtaining theoretical guarantees. Their strongest result is for bounded-degree graphs, where they show that they can obtain perfect reconstruction after observing ${\cal O}(\Delta^9 \log^2 \Delta \log n)$ algorithm. This is weaker than the first algorithm suggested by \cite{netrapalli}, but does not make the `correlation decay' assumption. -\end{itemize} - -What we aim for with the following framework is to achieve theoretical guarantees promising close-to-perfect reconstruction with observing ${\cal O}(\Delta \log n)$ cascades with high probability without relying on unreasonable assumptions like `correlation decay'. We also want our method to be easily implementable in practice and compare favorably to the above algorithms. - -\section{The Voter Model} - -\subsection{Description} -In the voter model, there are two types of nodes, \textsc{red} and \textsc{blue}. We fix a horizon $T > 0$. At $t=0$, we decide on a set of \textsc{blue} nodes, called the seed set. Every other node is colored $\textsc{red}$. At each time step, each node $u$ chooses one of its neighbors uniformly, with probability $1/deg(u)$, and adopts the color of that neighbor. A cascade in the voter model can be described as the evolution of which nodes are \textsc{blue} for each time step $0\leq t\leq T$. - -Several variants of the model can be formulated, namely whether or not nodes have self-loops and maintain their color with a certain probability or whether the probability that a node is part of a seed set is uniform versus non-uniform, independent versus correlated. For simplicity, we suppose here that nodes do not have self-loops and that the probability that a node is part of the seed set is indepedent and equal to $1/p_{\text{init}}$ for $0 < p_{\text{init}} < 1$. - -\subsection{Formulation} - -We are going to graph reconstruction problem from voter-model type cascades as a sparse recovery problem. We introduce the following notation: - -\paragraph{Notation} -Denote by $(\vec m^t_k)_{1 \leq k \leq m; 1 \leq t \leq T}$ the set of blue nodes in cascade $k$ at time step $t$. In other words, $\vec m^t_k$ is a vector in $\mathbb{R}^n$ such that: - -\[ (\vec m^t_k)_j = \left\{ - \begin{array}{l l} - 1 & \quad \text{if $j$ is \textsc{blue} in cascade $k$ at time step $t$}\\ - 0 & \quad \text{otherwise} - \end{array} \right.\] - -Let $\vec x_i$ be the vector representing the neighbors of a node $i$ in the graph ${\cal G}$. In other words, $\vec x_i$ is a vector in $\mathbb{R}^n$ such that: - -\[ (\vec x_{i})_j = \left\{ - \begin{array}{l l} - 1/\text{deg(i)} & \quad \text{if $j$ is a parent of $i$ in ${\cal G}$}\\ - 0 & \quad \text{otherwise} - \end{array} \right.\] - -Let $w^t_{k,i}$ be the scalar representing the probability that in cascade $k$ at time step $t$ node $i$ stays or becomes \textsc{blue}. - -\[ w^t_{k,i} = \left\{ - \begin{array}{l l} - \frac{\text{Number of \textsc{blue} neighbors of i in $(k, t-1)$}}{\text{deg}(i)} & \quad \text{if $t\neq 0$}\\ - 1/p_{\text{init}} & \quad \text{otherwise} - \end{array} \right.\] - -Our objective is to recover the neighbors for each node $i$ in ${\cal G}$. This corresponds to recovering the vectors $(\vec x_i)_{i \in {\cal G}}$. Notice that for each cascade $k$ and every time step $t < T - 1$, we have: - -\begin{equation} -\langle \vec m_k^t, \vec x_i \rangle = \mathbb{P}(\text{node $i$ is \textsc{blue} in cascade k at $t+1$}) = w_{k,i}^{t+1} \nonumber -\end{equation} - -We can concatenate each equality in matrix form $M$, such that the rows of $M$ are the row vectors $(\vec m^t_k)^T$, and for each node $i$, the entries of $\vec w_i$ are the corresponding probabilities: - -$$M \vec x_i = \vec w_i$$ - -Note that if $M$ had full column rank, then we could recover $\vec x_i$ from $\vec b$. This is however an unreasonable assumption, even after having observed many cascades. We must explore which assumptions on $M$ allow us to recover $\vec x_i$ when the number of observed cascades is small. - -Further note that we do not immediately observe $\vec w_i$. Instead, we observe a bernoulli vector, whose j$^{th}$ entry is equal to $1$ with probability $(\vec w_i)_j$ and $0$ otherwise, corresponding to whether at the next time step $i$ became or stayed \textsc{blue}. We will denote this vector $\vec b_i$, and denote by $\vec \epsilon_i$ the zero-mean noise vector such that: - -$$\vec w_i = \vec b_i + \vec \epsilon_i$$ - -For each node $i$, we can therefore reformulate the problem with our observables: - -\begin{equation} -\label{eq:sparse_voter} -M \vec x_i = \vec b_i + \epsilon_i -\end{equation} - -If the vector $\vec x_i$ is sufficiently sparse, i.e. node $i$ has sufficiently few neighbors, then we hope to apply common sparse recovery techniques to solve for $\vec x_i$. - -\subsection{Example} - - -\begin{figure} -\centering -\includegraphics[width=0.3\textwidth]{images/voter.png} -\caption{A cascade in the voter model with time steps $t = 0,1,2$ over a graph with 5 vertices} -\end{figure} - - - - -To recover the neighbors of $v_1$, we get the following matrix $M$ for the example in Figure 1: - -\begin{align*} -&\hspace{0.35cm} v_2 \hspace{0.2 cm} v_3 \hspace{0.2 cm} v_4 \hspace{0.2 cm} v_5 &\\ -\vspace{1 cm} -M = & \left( \begin{array}{cccc} -0 & 0 & 1 & 1 \\ -1 & 1 & 0 & 0 \\ -\end{array} \right) & \begin{array}{l} \hspace{ -4.5 cm} -\text{time step 0} \\ - \hspace{ - 4.5cm} \text{time step 1} \\ -\end{array} -\end{align*} - -and the vector $b_1$: - -\begin{align*} -b_1 = & \left( \begin{array}{c} -1 \\ -1 \\ -\end{array} \right) & \begin{array}{l} \hspace{ - 5cm} -\text{time step 1} \\ - \hspace{ - 5cm} \text{time step 2} \\ - \end{array} -\end{align*} - -\section{The Independent Cascade Model} -\label{sec:icc} -\subsection{Description} - -In the independent cascade model, nodes have three possible states: susceptible, infected or active\footnote{The words `infected' and `active' are used interchangeably}, and inactive. All nodes start either as susceptible or infected. The infected nodes at $t=0$ constitute the seed set. Similarly to the voter model, we will suppose that nodes are part of the seed set with independent probability $p_{\text{init}}$. At each time step $t$, for every pair of nodes $(j,i)$ where $j$ is infected and $i$ is susceptible, $j$ attempts to infect $i$ with probability $p_{j,i}$. If $j$ succeeds, $i$ will become active at time step $t+1$. Regardless of $j$'s success, node $j$ will be inactive at time $t+1$: nodes stay active for only one time step. The cascade continues until time step $T_k$ when no active nodes remain. - -\subsection{Formulation} - -We are going to graph reconstruction problem from independent-cascade-model-type cascades as a sparse recovery problem. We introduce the following notation: - -\paragraph{Notation} -We adapt slightly the definition of $(\vec m^t_k)$: - -\[ (\vec m^t_k)_j = \left\{ - \begin{array}{l l} - 1 & \quad \text{if $j$ is \textsc{active} in cascade $k$ at time step $t$}\\ - 0 & \quad \text{otherwise} - \end{array} \right.\] - -Similarly to the voter model, if $\vec p_i$ is the vector representing the `parents' of node $i$ in the graph ${\cal G}$ such that $(\vec p_i)_j$ is the probability that node $j$ infects $i$, then $\vec \theta_i$ is the vector: - -\[ (\vec \theta_{i})_j = \left\{ - \begin{array}{l l} - \log ( 1- p_{j,i}) & \quad \text{if $j$ is a parent of $i$ in ${\cal G}$}\\ - 0 & \quad \text{otherwise} - \end{array} \right.\] - -Note that if $p_{j,i} = 0$, then $\theta_{j,i} = 0$ and we can write $\vec \theta = \log (1 - \vec p)$ element-wise. For $t>0$, let $w^t_{k,i}$ be the scalar representing the probability that in cascade $k$ at time step $t$ node $i$ becomes \textsc{active}, conditioned on the state of cascade $k$ at time step $t-1$. As long as node $i$ has not yet been infected in cascade $k$, we can write as in the voter model, we can observe that: - -$$\langle \vec m_k^t, \vec \theta_i \rangle = \vec w^t_{k,i}$$ - -As in the voter model, by concatenating all row vectors $\vec m^t_{k,i}$ such that $t < t^k_i$, where $t^k_i$ is the infection of node $i$ in cascade $k$, in the matrix $M_i$, we can write: - -$$M_i \vec \theta_i = \vec w_i$$ - -Once again, we do not have access to $\vec w_i$. Instead, for $t t$, where $t_i^k = + \infty$ if $i$ was not infected in that cascade. +If we concatenate these rows $\mathbbm{1}_{A_k^t}$ into a matrix $M$ (for +measurement), we have: + +$$M \theta_{*,i} = b$$ +where $b$ is a column vector corresponding to $\log \mathbb{P}(\text{i was not infected at } t+1| S \text{ active at } t)$. Note that even if we had access to $b$ directly but $M$ does not have full column rank, then solving for $x$ is not trivial! However, under sparsity assumptions on $x$, we could apply results for the sparse recovery/sparse coding/compressed sensing literature + +In reality, we do not have access to $b$. We have access to the realization vector $\vec w$ of $\{i$ was infected at $t+1 | S$ active at $t\}$: + +$$\vec w\sim B(1 - e^{M \theta_{*,i}})$$ + +where the operation $f: \vec v \rightarrow 1 - e^{\vec v}$ is done element-wise and $B(\vec p)$ is a vector whose entries are bernoulli variables of parameter the entries of $\vec p$. + +\subsection{Maximum likelihood problem} + +The maximum likelihood problem takes the following form: +\begin{displaymath} + \begin{split} + \max_\theta &\sum_{k,t}\left(b_{k,t}\log\left(1-\exp\inprod{A_k^t, \theta}\right) + + (1-b_{k,t})\log\exp\inprod{A_k^t,\theta}\right)\\ + \text{s.t. }& \|\theta\|_1 \leq k + \end{split} +\end{displaymath} +which we can rewrite as: +\begin{displaymath} + \begin{split} + \max_\theta &\sum_{k,t}b_{k,t}\log\left(\exp\left(-\inprod{A_k^t, \theta}\right)-1\right) + + \sum_{k,t}\inprod{A_k^t,\theta}\\ + \text{s.t. }& \|\theta\|_1 \leq k + \end{split} +\end{displaymath} +In this form it is easy to see that this is a convex program: the objective +function is the sum of a linear function and a concave function. + +\paragraph{Questions} to check: is the program also convex in the $p_{j,i}$'s? +What is the theory of sparse recovery for maximum likelihood problems? (TO DO: +read about Bregman divergence, I think it could be related). + +\subsection{Results under strong assumptions} + +What can we hope to have etc. (If M has full column rank or if same sets S occur frequently). + +\subsection{Result under RIP condition} + +For ease of notation, let $\theta := \theta_{*,i}$ The following resolution of the above problem is heavily borrowed from \cite{2005math......3066C}. We can see $w$ as a bounded perturbation of $f(b)$: +$$w = f(M\theta) + \epsilon, \text{ where } \|\epsilon\|_{\infty} < 1$$ + +Define the following program, which we can solve with all available information $M$ and $w$: +\begin{equation} +(N_1) \quad \min_\theta \|\theta\|_1 \quad \text{subject to} \quad \|e^{M\theta} - (1 - w) \|_2 \leq \|\epsilon \|_\infty \nonumber +\end{equation} + +While we can hope for the columns of $M$ to be `loosely' orthogonal, we must normalize them if we hope for $\delta_T$ to be small. Let $M'$ correspond to $M$ where the columns have been normalized, we will therefore define $\epsilon'$ and $M'$, such that: +$$w = f(M'\theta) + \epsilon'$$ +We see that $\epsilon'= w - (w - \epsilon)^{\vec \lambda}$ (element-wise operation), where $\lambda_i = 1/\sqrt{\sum \text{col}(M)_i}$ such that $\|\epsilon'\|_{\infty} < 1$. We can define the following program: + +\begin{equation} +(N_2) \quad \min_\theta \|\theta\|_1 \quad \text{subject to} \quad \|e^{M'\theta} - (1 - w) \|_2 \leq \|\epsilon' \|_\infty \nonumber +\end{equation} + +With the corresponding $\delta_T'$ defined for $M'$, we are going to prove the following: + +\begin{theorem} + +Suppose that all nodes in our graph have {\bf bounded degree by $S$}, that no edge between $i$ and its neighbors has a probability of $1$, such that $\forall j, p_{j,i} < 1 - \eta \ \text{ where } 0<\eta<1$, and that $\delta'_{3S} + 3 \delta'_{4S} < 2$. If $\theta_0$ is the true vector for node $i$, the solution $\hat \theta$ to $(N_2)$ obeys: +$$\|\hat \theta - \theta_0 \|_2 \leq \frac{C_S}{\eta} \|\epsilon' \|_{\infty} $$ where $C_S$ is well behaved ($\approx 10$ for reasonable values of $\delta'_{4S}$) +\end{theorem} + + +\begin{proof} +The proof borrows heavily from \cite{2005math......3066C}. Define $h := \theta - \theta_0$. We list the results as they are given in \cite{2005math......3066C} or as they can be adapted in our case. + +\begin{itemize} +\item $\|e^{M'\theta} - e^{M' \theta_0} \|_2 \leq \| e^{M'\theta} - ( 1 - w) \|_2 + \| e^{M'\theta} - (1 - w) \|_2 \leq 2 \|\epsilon' \|_{\infty} $ +\item $\|h\|_2^2 \leq (1 + |T_0|/N) \|h_{T_{01}}\|_2^2$, where N is an arbitrary positive integer, and $h_{T_{01}}$ is the vector $h$, where all entries are masked to 0, except those corresponding to the non-zero entries of $\theta_0$ and the $N$ largest entries of $h_{T_0^c}$. +\item $\|e^{M'h} \|_2 \geq |\eta| \|M'h\|_2 \geq |\eta| C_{T_0, N} \cdot \|h_{T_{0,1}} \|_2$, where $C_{T_0,N} := \sqrt{1 - \delta'_{N + T_0}} - \sqrt{T_0 / N} \sqrt{1 + \delta'_N}$ +\end{itemize} +The first bullet point is obtained by the triangle inequality. The second bullet point is identical to the proof in \cite{2005math......3066C}. The first inequality in the third bullet-point is obtained because by assumption all entries of $M'\theta$ are greater than $\ln \eta$ (remember that the $e^{\vec v}$ operation is done element-wise). The rest follows like in \cite{2005math......3066C}, and we obtain that: +$$\|h\|_2 \leq 2 \frac{\sqrt{1 + T_0/N}}{ \eta \cdot C_{T_0, N}} \|\epsilon'\|_{\infty} = \frac{C_S}{\eta} \|\epsilon'\|_{\infty}$$ +For $\delta'_{4S} \leq \frac{1}{5}, C_S \approx 8.82$, for $\delta'_{4S} \leq \frac{1}{4}, C_S \approx 10.47$ +\end{proof} + +\begin{remark} A few notes on the result: +\begin{itemize} +\item With this naive reduction of $w$ as an $e-$pertubation to $f(b)$, the best $\|\epsilon\|_{\infty}$ we can hope for is $1 - \eta$. Seeing $w$ as more than an arbitrary perturbation will most likely yield a much better upper-bound. +\item The upper-bound is done in $\log$-space for the true probabilities: +$$\frac{C_S}{\eta} > \sum \left[ \log \left(\frac{1 - p_j}{1 - p_j'} \right) \right]^2 > \frac{\eta \log(1/\eta)}{1 - \eta} \| \frac{1 - p_j}{1 - p'_j} - 1 \| $$ + +This bullet-point needs to be finished +\item To what extent can we approximate our measurement matrix to a matrix with independent bernoulli entries? What $\delta_T$'s can we hope for? Which constant $C_S$ can we hope for? How does it change with the rows of $M$? +\end{itemize} + +\end{remark} + + + +\subsection{Results under isotropic condition} + + + +% Given $M$ and $w$, we can solve the following maximum log-likelihood problem: +% \begin{equation} +% \max_{\|\theta_{*,i}\|_0 \leq k} +% \label{eq:max_loglik_obj} +% \end{equation} + +% Since optimizing under the $\|\theta\|_0 \leq k$ is hard, we solve instead the Lagrangian relaxation: + +% \begin{equation} +% \max_{\theta_{*,i}} + \lambda \|\theta_{*,i}\|_0 +% \label{eq:lagra_max_loglik_obj} +% \end{equation} + + +% Eq~\ref{eq:lagra_max_loglik_obj} is concave in $\theta_{*,i}$ and can therefore be solved efficiently. We would like to exploit the sparsity of $\theta_{*,i}$ to show that we recover the true sparse $\theta_{*,i}$ with few measurements (rows of M). + + +\bibliography{sparse}{} +\bibliographystyle{plain} + +\end{document} + + + + +% Let us now consider the matrix M, whose rows are the vectors $\mathbbm{1}_S$. If we could estimate the vector $X := \mathbb{P}(i \text{ not infected at } t | S \text{ active at } t)$, we could solve for $\theta$: +% $$M \theta_{*i} = X, \text{ where }\theta_{*,i} \text{ is a sparse vector}$$ + +% There are many powerful tools from sparse coding theory which would allow us to recover the sparsest $\theta$ under certain assumptions on $M$. One such assumption is incoherence of the columns of $M$. In other words, if we can show that for all columns $M_i, M_j$ of M, we have $|| \leq \mu \|M_i\| \|M_j\|$, i.e M is $\mu-incoherent$, then for any node $i$ with fewer than $k = \frac{1}{2 \mu}$ parents, we can recover $\theta_{*i}$ exactly. To get a better intuition of what this means, if the entries of $M$ were independent gaussian variables, we would need approximately $k \ln(\frac{n}{k})$ measurements to recover $\theta_{*i}$ exactly. + +% This would seem to suggest that the quality of observed cascades is much more important than quantity--a notion which has not truly been explored in graph reconstruction as of yet. Another stronger assumption on the columns of M is separability (see Moitra's work in Topic Modeling) leading to even stronger results. + +% In reality of course, we do not know $X$. Instead, we have access to $\hat X$ which estimates $X$ in the following way. For any set S in any cascade that was active before i was, we obtain a binary signal, $\hat X_S:=0$ if $i$ was infected immediately afterwards and $\hat X_S:=1$ otherwise. As the number of times set S was observed active, we have $$\frac{1}{m} \sum \hat X_S \rightarrow \mathbb{P}(i \text{ not infected at }t+1 | S \text{ active at } t)$$. + +% Certain sets might have been infected many times in our set of cascades, certain sets might have been infected only once. One possible way to rewrite the program above is: $$\hat X_j = H(w_j - e^{(M \theta_{*i})_j}), \text{ where } w\sim {\cal U}[0,1] \text{ and H is the step function } \mathbbm{1}_{\cdot \geq 0}$$ + +% This problem seems to be closely related to the growing field of 1-bit compressed sensing, with at least one difference: when scientists consider 1bitCS with noise, they are usually in a position to decide which measurement to do next to recover $\theta_{*i}$, whereas in this framework, we are limited to an observer status\footnote{Research in the other direction could be very interesting: see discussion about exploration-exploitation with Thibaut}. + +% I need to do more reading on the subject, namely to understand what guarantees people have found for sample complexity. + diff --git a/old_work/maximum_likelihood_approach.tex b/old_work/maximum_likelihood_approach.tex new file mode 100644 index 0000000..4a22158 --- /dev/null +++ b/old_work/maximum_likelihood_approach.tex @@ -0,0 +1,230 @@ +\documentclass[11pt]{article} +\usepackage{fullpage, amsmath, amssymb, amsthm} +\usepackage{color} +\newtheorem{theorem}{Theorem} +\newtheorem{lemma}{Lemma} +\newtheorem{remark}{Remark} +\newtheorem{proposition}{Proposition} + +\title{Maximum Likelihood Approach} +\author{Jean Pouget-Abadie} + +\begin{document} + +\maketitle + +We consider the node $\alpha$. We index the measurements by $i \in [1, n]$. Let $b^i$ be the indicator variable for node $\alpha$ active at the round following measurememt $i$ and let $x^i$ be the vector of active nodes for measurement $i$. Recall that: + +\begin{equation} +\label{eq:probability_of_infection} +1 - \exp(\langle x^i, \theta \rangle) = \mathbb{P}(\text{node } \alpha \text{ is active at the following round}) +\end{equation} + +The likelihood problem can be formulated as such: + +\begin{equation} +\label{eq:main_formulation} +\min_{\theta \in \mathbb{R}^p} \quad \lambda_n \| \theta \|_1 + \sum^n_{i=1} - b^i \log \left(e^{-\langle x^i, \theta \rangle} - 1 \right) - \langle x^i, \theta \rangle +\end{equation} + +We define $f(\theta):= \sum^n_{i=1} - b^i \log \left(\exp(-\langle x^i, \theta \rangle) \right) - \langle x^i, \theta \rangle$ such that Eq.~\ref{eq:main_formulation} can be rewritten as: + +\begin{equation} +\label{eq:small_formulation} +\min_{\theta \in \mathbb{R}^p} \quad f(\theta) + \lambda_n \| \theta \|_1 +\end{equation} + +We cite the following theorem from \cite{Negahban:2009} (roughly, because the statements of the theorem are either slightly wrong or unclear): + +\begin{proposition} +\label{thm:cited_theorem} +Let ${\cal C}:=\{\Delta \in \mathbb{R}^p : \exists S \subset [1, n] \ s.t. \ \|\Delta_{S^c}\|_1 \leq 3 \| \Delta_S \|_1 \}$. Suppose that $\theta^*$ is s-sparse, and the following two conditions are met: +\begin{equation} +\lambda_n \geq 2 \|\nabla f(\theta^*) \|_\infty +\label{eq:lambda_condition} +\end{equation} +\begin{equation} +\forall \Delta \in {\cal C}, \ \Delta^T \cdot \nabla^2 f(\theta^*) \cdot \Delta \geq \gamma_n \| \Delta \|_2^2 +\label{eq:RSC_condition} +\end{equation} +then: +\begin{equation} +\| \theta - \theta^* \|_2 \leq \frac{\sqrt{s} \lambda_n}{\gamma_n} +\end{equation} +\end{proposition} + +It remains to show the two conditions for Proposition~\ref{thm:cited_theorem} are met. + +\section*{Condition~\ref{eq:lambda_condition}} +Condition~\ref{eq:lambda_condition} requires us to find an upper-bound for $ 2 \|\nabla f(\theta^*) \|_\infty$. If we only consider the first measurement of every cascade, this can be done easily. Let $N$ be the number of cascades (to distinguish from $n$ number of total measurements). Begin by noting that: + +\begin{equation} +\nabla_k f(\theta) = \sum^n_{i=1} \frac{b^i x^i_k}{1 - e^{\langle x^i, \theta \rangle}} - \sum^n_{i=1} x^i_k = \sum_{i=1}^n x^k_i \left( \frac{b^i}{\mathbb{P}(\text{node } \alpha \text { infected})} - 1\right) +\end{equation} + +\begin{lemma} +\label{lem:subgaussian_variable} +$\nabla f(\theta^*)$ is a $N/p_{\min}$-subgaussian variable, where $p_{\min}$ is the smallest non-zero link to node $\alpha$. +\end{lemma} + +\begin{proof} +\begin{align} +\mathbb{E} \left( \nabla_k f(\theta) \right) & = \sum_{i=1}^N \mathbb{E} \left[ x^i_k \left( \frac{b^i}{\mathbb{P}(\text{node } \alpha \text { infected})} - 1\right) \right] \nonumber \\ +& = \sum_{i=1}^N \mathbb{E}_S \left[ \mathbb{E}\left[x^i_k \left( \frac{b^i}{\mathbb{P}(\text{node } \alpha \text { infected})} - 1\right) \middle| S \right] \right] \quad \text{where S is the seed set} \nonumber \\ +& = \sum_{i=1}^N \mathbb{E}\left[x^i_k \left( \frac{ \mathbb{E}_S \left[ b^i \middle| S \right]}{\mathbb{P}(\text{node } \alpha \text { infected})} - 1\right) \right] \nonumber \\ +& = 0 +\end{align} +Therefore, $\nabla f(\theta^*)$ is the sum of zero-mean variables, upper-bounded by $1/p_{\min}$. It follows that $\nabla f(\theta^*)$ is $N/p_{\min}$-subgaussian. +\end{proof} + +By union bound and characterization of sub-gaussian variables: + +\begin{equation} +\mathbb{P}(\| \nabla f(\theta) \|_{\infty} > \lambda) \leq 2 \exp \left( -\frac{\lambda^2 p_{\min}}{2n} + \log p \right) +\end{equation} + +In conclusion, for $\delta>0$, $\lambda := 2 \sqrt{\frac{n^{\delta + 1} \log p}{p_{\min}}}$ meets Condition~\ref{eq:lambda_condition} with probability $1 - \exp(-n^\delta \log p )$ + + +\section*{Condition~\ref{eq:RSC_condition}} + +Note that: +\begin{equation} +\nabla_{kj} f(\theta) = \sum_{i=1}^n \frac{b^i x_k^i x_j^i e^{\langle x^i, \theta \rangle}}{\left(1 - e^{\langle x^i, \theta \rangle} \right)^2} = \sum_{i=1}^n b^i x_k^i x_j^i \frac{\mathbb{P}(\text{node } \alpha \text { not infected})}{\mathbb{P}(\text{node } \alpha \text { infected})^2} +\end{equation} + + +We are going to explicitate a constant $\gamma$ such that: $\forall \Delta \in {\cal C}, \Delta^T \cdot \nabla^2 f(\theta^*) \cdot \Delta \geq \gamma n \|\Delta\|_2^2$. + +\paragraph{Notation} Let $p_i := \mathbb{P}(\text{node } \alpha \text { infected})$. Let $Z^i_k := b^i x^i_k \frac{1-p_i}{p_i^2}$and let $Z^i_{k,j} := b^i x^i_k x^i_j \frac{1-p_i}{p_i^2}$. We also define $Z_k := \sum_i Z^i_k$ and $Z_{k,j} := \sum_i Z^i_{k,j}$. + +\begin{align} +\Delta^T \cdot \nabla^2 f(\theta^*) \cdot \Delta & = \sum_k \Delta_k^2 \left[ \sum_i b^i x_k^i \frac{1 - p_i}{p_i^2} \right] + 2 \sum_{k< j} \Delta_k \Delta_j \left[ \sum_i b^i x^i_k x^i_j \frac{1 - p_i}{p_i^2}\right] \nonumber \\ +& = \sum_k \Delta_k^2 Z_k + 2 \sum_{k< j} \Delta_k \Delta_j Z_{k,j} \nonumber +\end{align} + + + +\begin{lemma} +\label{lem:first_term} +Suppose that $\forall k$, $\mathbb{E}_{S(k)} \left[ \frac{1}{p_i} \right] \geq 1 + \alpha$, then with probability greater than $1 - 2 p e^{-2 \alpha^2 (1-c)^2p_{\min}^2 p_{\text{init}}^2 N}$, $$\forall k, \ Z_k > c \alpha p_{\text{init}} N$$ +\end{lemma} + +\begin{proof} +Let $S(k)$ denote the active set conditioned on the fact that node $k$ is active AND that one parent is active. We denote $p_{S(k)}$ the probability that the active set verifies the previous two conditions. +\begin{align} +\nonumber +\mathbb{E}[Z^i_k] & = p_{S(k)} \mathbb{E}_{S(k)} \left[ \mathbb{E}[b^i | S(k)] \frac{1 - p_i}{p_i^2} \right] \\ \nonumber +& = p_{S(k)} \left( \mathbb{E}_{S(k)} \left[ \frac{1}{p_i} \right] - 1 \right) \\ \nonumber +& \geq \alpha p_{S(k)} \quad \text{by assumption} +\end{align} + +Note that $|Z^i_k| < \frac{1}{p_{\text{min}}^2}$ {\it a.s.}. By Hoeffding's first inequality, for $0 c \beta p_{S(k,j)} N \right) & \leq e^{- \frac{2(1-c)^2}{Nb^2} \left( \mathbb{E}_{S(k,j)} \left[ \frac{1}{p_i} \right] - 1 \right)^2} \\ \nonumber +& \leq e^{-2 \alpha^2 (1-c)^2p_{\min}^2 p_{S(k,j)}^2 N} +\end{align} +We conclude by union bound. +\end{proof} + +\begin{proposition} +Suppose that $\forall k,j$, $1 + \alpha \leq \mathbb{E}_{S(k, j)} \left[ \frac{1}{p_i} \right] \leq 1 + \beta$, then with probability greater than $1 - XXX$, condition~\ref{eq:RSC_condition} is met with $\gamma_n = \gamma n$ where $\gamma := \alpha p_{S(k)} - 16 \sqrt{s} \beta p_{S(k,j)}$ +\end{proposition} + +\begin{proof} +(Sketch) By the triangle inequality followed by MacLaurin's inequality, +\begin{align} +\mid \frac{2}{\binom{n}{2}} \sum_{i \frac{1}{p_{init}} \nonumber +% \end{align} + +% We can conclude using the following Hoeffding inequality for independent random variables bounded by $[0, b_i]$ by noticing that our variables are bounded by above by $\frac{1 - p_{\min}}{p_{\min}^2}$ + +% \paragraph{Hoeffding's inequality} +% \begin{equation} +% \label{eq:hoeffding_inequality} +% \mathbb{P} \left(\sum Z_i \geq \mathbb{E}[\sum Z_i] - t \right) \leq \exp\left(- \frac{2 N t^2}{b^2} \right) +% \end{equation} + +% It follows that for $c<1$ with probability $1 - \exp \left( - n^3 c^2 s^2 p_{init}^4 p_{\min}^6 \frac{\eta^2}{(1 - \eta)^4} \right)$, we have that $$\sum_k \Delta_k^2 \left[ \sum_i b^i x_k^i \frac{1 - p_i}{p_i^2} \right] \geq \gamma N =: (1 -c) s p_{init}^2 p_{\min} \frac{\eta}{(1 - \eta)^2} N$$ + +% \begin{remark} +% Would it be possible to extend this result using Azuma's inequality on Martingales to not just the first measurement of every cascade? +% \end{remark} + +% \subsection*{Second term} +% We are now going to find an upper-bound on the term $\sum_i b^i x^i_k x^i_j \frac{1 - p_i}{p_i^2}$. + + +\section*{Conclusion} + +Suppose we show that Condition~\ref{eq:RSC_condition} is met for $\gamma_n = \gamma N$, then we have the following theorems: + +\begin{theorem} +\label{thm:l2_bound} +Suppose that $\theta^* \in \mathbb{R}^p$ is s-sparse and that we choose $\lambda_n = 2 \sqrt{\frac{n^{\delta + 1} \log p}{p_{\min}}}$ for $\delta >0$, then with probability $1 - \exp(-n^\delta \log p )$, we have +\begin{equation} +\|\hat \theta - \theta^* \|_2 \leq \frac{2}{\gamma} \sqrt{\frac{s \log p}{p_{\min} N^{1 - \delta}}} +\end{equation} +\end{theorem} + +Note that we can choose $\delta = 0$ in high-dimensions since the probability of success will then be $1 - \frac{1}{p} \approx 1$. We can also conclude on support recovery with the following reasoning. + +\begin{theorem} +\label{thm:support_recovery} +Suppose that $N$ is chosen such that $\frac{2}{\gamma}\sqrt{\frac{s \log p}{p_{\min} N^{1 -\delta}}} < \eta$ and suppose we only keep as elements of the support of $\theta^*$ the coordinates $\hat \theta_i > \eta$. Then no wrong parent will be included, and all `strong' parents will be included, where `strong' signifies: $\theta^*_i > 2 \eta$. +\end{theorem} + +It follows that we have found an ${\cal O}(s \log p)$ algorithm for recovering the graph, with better constants and fewer assumptions than any previous work. + +\bibliography{sparse} +\bibliographystyle{plain} + +\end{document} \ No newline at end of file diff --git a/old_work/reportYaron.tex b/old_work/reportYaron.tex new file mode 100644 index 0000000..f4878c7 --- /dev/null +++ b/old_work/reportYaron.tex @@ -0,0 +1,354 @@ +\documentclass[10pt]{article} +\usepackage{fullpage, amsmath, amsthm, amssymb} +\usepackage{graphicx, algorithm, algpseudocode} +\usepackage[utf8x]{inputenc} +\newtheorem{theorem}{Theorem} + +\title{Sparse Recovery for Graph Reconstruction} + +\date{} + +\begin{document} + +\maketitle + +\section{Introduction} + +Given a set of observed cascades, the graph reconstruction problem consists of finding the underlying graph on which these cascades spread. We explore different models for cascade creation, namely the voter model and the independent cascade model. Our objective is therefore to approximately compute the `inverse' of the cascade generation function from as few observations as possible. + +\section{Related Work} + +There have been several works tackling the graph reconstruction problem in variants of the independent cascade. We briefly summarize their results and approaches below. + +\begin{itemize} +\item Manuel Gomez-Rodriguez, Jure Leskovec, and Andreas Klause were amongst the first to tackle the problem of graph reconstruction from cascade observations \cite{gomez}. Their first method \textsc{NetInf}, and a series of other similar methods published subsequently, solves a heuristic which approximates a NP-hard maximum-likelihood formulation. Unfortunately \textsc{NetInf} has no known theoretical guarantees. One difficulty with comparing ourselves to their heuristic is that they assume a continuous time model, which is close but un-adaptable to our framework. Namely, as we will see in Section~\ref{sec:icc}, nodes in our model cannot influence nodes beyond the time step at which they are active. +\item Their paper was later followed by a publication from Praneeth Netrapalli and Sujay Sanghavi \cite{netrapalli}, which provides two algorithms with interesting theoretical guarantees. + \begin{itemize} + \item The first algorithm they discuss is a convex and parallelizable optimization problem. The algorithm is guaranteed to achieve perfect reconstruction of `strong' edges after ${\cal O}(\Delta^2 \log n)$ with high probability, where $\Delta$ is the maximum degree in the graph. There are several things to note about this result: + \begin{itemize} + \item Researchers believe the true threshold of cascades necessary to achieve perfect reconstruction is closer to ${\cal O}(\Delta \log n)$. + \item The big ${\cal O}$ is highly polynomial in the different constants of the model. The constant is of the order of billions even for very generous initializations of these parameters + \item The authors make a restrictive assumption, called `correlation decay', on the probabilities of each edge: $\forall i, \sum_{j \in {\cal N}(i)} p_{j,i} < 1 -\alpha$, where $0 < \alpha < 1$. + \end{itemize} + \item The second algorithm suggested in \cite{netrapalli}, to which we will compare ourselves in Section~\ref{sec:exp}, is \textsc{Greedy}. It achieves perfect reconstruction after ${\cal O}(d \log n)$ observed cascades with high probability. However, this guarantee has been proven only in the case of tree-graphs. The authors mention that it does fairly well in pratice on other types of graphs. + \end{itemize} +\item Finally, a recent paper by Bruno Abrahao, Flavio Chierichetti, Robert Kleinberg, Alessandro Panconesi gives several algorithms and a different approach to obtaining theoretical guarantees. Their strongest result is for bounded-degree graphs, where they show that they can obtain perfect reconstruction after observing ${\cal O}(\Delta^9 \log^2 \Delta \log n)$ algorithm. This is weaker than the first algorithm suggested by \cite{netrapalli}, but does not make the `correlation decay' assumption. +\end{itemize} + +What we aim for with the following framework is to achieve theoretical guarantees promising close-to-perfect reconstruction with observing ${\cal O}(\Delta \log n)$ cascades with high probability without relying on unreasonable assumptions like `correlation decay'. We also want our method to be easily implementable in practice and compare favorably to the above algorithms. + +\section{The Voter Model} + +\subsection{Description} +In the voter model, there are two types of nodes, \textsc{red} and \textsc{blue}. We fix a horizon $T > 0$. At $t=0$, we decide on a set of \textsc{blue} nodes, called the seed set. Every other node is colored $\textsc{red}$. At each time step, each node $u$ chooses one of its neighbors uniformly, with probability $1/deg(u)$, and adopts the color of that neighbor. A cascade in the voter model can be described as the evolution of which nodes are \textsc{blue} for each time step $0\leq t\leq T$. + +Several variants of the model can be formulated, namely whether or not nodes have self-loops and maintain their color with a certain probability or whether the probability that a node is part of a seed set is uniform versus non-uniform, independent versus correlated. For simplicity, we suppose here that nodes do not have self-loops and that the probability that a node is part of the seed set is indepedent and equal to $1/p_{\text{init}}$ for $0 < p_{\text{init}} < 1$. + +\subsection{Formulation} + +We are going to graph reconstruction problem from voter-model type cascades as a sparse recovery problem. We introduce the following notation: + +\paragraph{Notation} +Denote by $(\vec m^t_k)_{1 \leq k \leq m; 1 \leq t \leq T}$ the set of blue nodes in cascade $k$ at time step $t$. In other words, $\vec m^t_k$ is a vector in $\mathbb{R}^n$ such that: + +\[ (\vec m^t_k)_j = \left\{ + \begin{array}{l l} + 1 & \quad \text{if $j$ is \textsc{blue} in cascade $k$ at time step $t$}\\ + 0 & \quad \text{otherwise} + \end{array} \right.\] + +Let $\vec x_i$ be the vector representing the neighbors of a node $i$ in the graph ${\cal G}$. In other words, $\vec x_i$ is a vector in $\mathbb{R}^n$ such that: + +\[ (\vec x_{i})_j = \left\{ + \begin{array}{l l} + 1/\text{deg(i)} & \quad \text{if $j$ is a parent of $i$ in ${\cal G}$}\\ + 0 & \quad \text{otherwise} + \end{array} \right.\] + +Let $w^t_{k,i}$ be the scalar representing the probability that in cascade $k$ at time step $t$ node $i$ stays or becomes \textsc{blue}. + +\[ w^t_{k,i} = \left\{ + \begin{array}{l l} + \frac{\text{Number of \textsc{blue} neighbors of i in $(k, t-1)$}}{\text{deg}(i)} & \quad \text{if $t\neq 0$}\\ + 1/p_{\text{init}} & \quad \text{otherwise} + \end{array} \right.\] + +Our objective is to recover the neighbors for each node $i$ in ${\cal G}$. This corresponds to recovering the vectors $(\vec x_i)_{i \in {\cal G}}$. Notice that for each cascade $k$ and every time step $t < T - 1$, we have: + +\begin{equation} +\langle \vec m_k^t, \vec x_i \rangle = \mathbb{P}(\text{node $i$ is \textsc{blue} in cascade k at $t+1$}) = w_{k,i}^{t+1} \nonumber +\end{equation} + +We can concatenate each equality in matrix form $M$, such that the rows of $M$ are the row vectors $(\vec m^t_k)^T$, and for each node $i$, the entries of $\vec w_i$ are the corresponding probabilities: + +$$M \vec x_i = \vec w_i$$ + +Note that if $M$ had full column rank, then we could recover $\vec x_i$ from $\vec b$. This is however an unreasonable assumption, even after having observed many cascades. We must explore which assumptions on $M$ allow us to recover $\vec x_i$ when the number of observed cascades is small. + +Further note that we do not immediately observe $\vec w_i$. Instead, we observe a bernoulli vector, whose j$^{th}$ entry is equal to $1$ with probability $(\vec w_i)_j$ and $0$ otherwise, corresponding to whether at the next time step $i$ became or stayed \textsc{blue}. We will denote this vector $\vec b_i$, and denote by $\vec \epsilon_i$ the zero-mean noise vector such that: + +$$\vec w_i = \vec b_i + \vec \epsilon_i$$ + +For each node $i$, we can therefore reformulate the problem with our observables: + +\begin{equation} +\label{eq:sparse_voter} +M \vec x_i = \vec b_i + \epsilon_i +\end{equation} + +If the vector $\vec x_i$ is sufficiently sparse, i.e. node $i$ has sufficiently few neighbors, then we hope to apply common sparse recovery techniques to solve for $\vec x_i$. + +\subsection{Example} + + +\begin{figure} +\centering +\includegraphics[width=0.3\textwidth]{images/voter.png} +\caption{A cascade in the voter model with time steps $t = 0,1,2$ over a graph with 5 vertices} +\end{figure} + + + + +To recover the neighbors of $v_1$, we get the following matrix $M$ for the example in Figure 1: + +\begin{align*} +&\hspace{0.35cm} v_2 \hspace{0.2 cm} v_3 \hspace{0.2 cm} v_4 \hspace{0.2 cm} v_5 &\\ +\vspace{1 cm} +M = & \left( \begin{array}{cccc} +0 & 0 & 1 & 1 \\ +1 & 1 & 0 & 0 \\ +\end{array} \right) & \begin{array}{l} \hspace{ -4.5 cm} +\text{time step 0} \\ + \hspace{ - 4.5cm} \text{time step 1} \\ +\end{array} +\end{align*} + +and the vector $b_1$: + +\begin{align*} +b_1 = & \left( \begin{array}{c} +1 \\ +1 \\ +\end{array} \right) & \begin{array}{l} \hspace{ - 5cm} +\text{time step 1} \\ + \hspace{ - 5cm} \text{time step 2} \\ + \end{array} +\end{align*} + +\section{The Independent Cascade Model} +\label{sec:icc} +\subsection{Description} + +In the independent cascade model, nodes have three possible states: susceptible, infected or active\footnote{The words `infected' and `active' are used interchangeably}, and inactive. All nodes start either as susceptible or infected. The infected nodes at $t=0$ constitute the seed set. Similarly to the voter model, we will suppose that nodes are part of the seed set with independent probability $p_{\text{init}}$. At each time step $t$, for every pair of nodes $(j,i)$ where $j$ is infected and $i$ is susceptible, $j$ attempts to infect $i$ with probability $p_{j,i}$. If $j$ succeeds, $i$ will become active at time step $t+1$. Regardless of $j$'s success, node $j$ will be inactive at time $t+1$: nodes stay active for only one time step. The cascade continues until time step $T_k$ when no active nodes remain. + +\subsection{Formulation} + +We are going to graph reconstruction problem from independent-cascade-model-type cascades as a sparse recovery problem. We introduce the following notation: + +\paragraph{Notation} +We adapt slightly the definition of $(\vec m^t_k)$: + +\[ (\vec m^t_k)_j = \left\{ + \begin{array}{l l} + 1 & \quad \text{if $j$ is \textsc{active} in cascade $k$ at time step $t$}\\ + 0 & \quad \text{otherwise} + \end{array} \right.\] + +Similarly to the voter model, if $\vec p_i$ is the vector representing the `parents' of node $i$ in the graph ${\cal G}$ such that $(\vec p_i)_j$ is the probability that node $j$ infects $i$, then $\vec \theta_i$ is the vector: + +\[ (\vec \theta_{i})_j = \left\{ + \begin{array}{l l} + \log ( 1- p_{j,i}) & \quad \text{if $j$ is a parent of $i$ in ${\cal G}$}\\ + 0 & \quad \text{otherwise} + \end{array} \right.\] + +Note that if $p_{j,i} = 0$, then $\theta_{j,i} = 0$ and we can write $\vec \theta = \log (1 - \vec p)$ element-wise. For $t>0$, let $w^t_{k,i}$ be the scalar representing the probability that in cascade $k$ at time step $t$ node $i$ becomes \textsc{active}, conditioned on the state of cascade $k$ at time step $t-1$. As long as node $i$ has not yet been infected in cascade $k$, we can write as in the voter model, we can observe that: + +$$\langle \vec m_k^t, \vec \theta_i \rangle = \vec w^t_{k,i}$$ + +As in the voter model, by concatenating all row vectors $\vec m^t_{k,i}$ such that $t < t^k_i$, where $t^k_i$ is the infection of node $i$ in cascade $k$, in the matrix $M_i$, we can write: + +$$M_i \vec \theta_i = \vec w_i$$ + +Once again, we do not have access to $\vec w_i$. Instead, for $t x_{i+1}$} - \STATE Swap $x_i$ and $x_{i+1}$ - \STATE $noChange = false$ - \ENDIF - \ENDFOR - \UNTIL{$noChange$ is $true$} -\end{algorithmic} -\end{algorithm} - -\subsection{Tables} - -You may also want to include tables that summarize material. Like -figures, these should be centered, legible, and numbered consecutively. -However, place the title {\it above\/} the table with at least -0.1~inches of space before the title and the same after it, as in -Table~\ref{sample-table}. The table title should be set in 9~point -type and centered unless it runs two or more lines, in which case it -should be flush left. - -% Note use of \abovespace and \belowspace to get reasonable spacing -% above and below tabular lines. - -\begin{table}[t] -\caption{Classification accuracies for naive Bayes and flexible -Bayes on various data sets.} -\label{sample-table} -\vskip 0.15in -\begin{center} -\begin{small} -\begin{sc} -\begin{tabular}{lcccr} -\hline -\abovespace\belowspace -Data set & Naive & Flexible & Better? \\ -\hline -\abovespace -Breast & 95.9$\pm$ 0.2& 96.7$\pm$ 0.2& $\surd$ \\ -Cleveland & 83.3$\pm$ 0.6& 80.0$\pm$ 0.6& $\times$\\ -Glass2 & 61.9$\pm$ 1.4& 83.8$\pm$ 0.7& $\surd$ \\ -Credit & 74.8$\pm$ 0.5& 78.3$\pm$ 0.6& \\ -Horse & 73.3$\pm$ 0.9& 69.7$\pm$ 1.0& $\times$\\ -Meta & 67.1$\pm$ 0.6& 76.5$\pm$ 0.5& $\surd$ \\ -Pima & 75.1$\pm$ 0.6& 73.9$\pm$ 0.5& \\ -\belowspace -Vehicle & 44.9$\pm$ 0.6& 61.5$\pm$ 0.4& $\surd$ \\ -\hline -\end{tabular} -\end{sc} -\end{small} -\end{center} -\vskip -0.1in -\end{table} - -Tables contain textual material that can be typeset, as contrasted -with figures, which contain graphical material that must be drawn. -Specify the contents of each row and column in the table's topmost -row. Again, you may float tables to a column's top or bottom, and set -wide tables across both columns, but place two-column tables at the -top or bottom of the page. - -\subsection{Citations and References} - -Please use APA reference format regardless of your formatter -or word processor. If you rely on the \LaTeX\/ bibliographic -facility, use {\tt natbib.sty} and {\tt icml2015.bst} -included in the style-file package to obtain this format. - -Citations within the text should include the authors' last names and -year. If the authors' names are included in the sentence, place only -the year in parentheses, for example when referencing Arthur Samuel's -pioneering work \yrcite{Samuel59}. Otherwise place the entire -reference in parentheses with the authors and year separated by a -comma \cite{Samuel59}. List multiple references separated by -semicolons \cite{kearns89,Samuel59,mitchell80}. Use the `et~al.' -construct only for citations with three or more authors or after -listing all authors to a publication in an earlier reference \cite{MachineLearningI}. - -Authors should cite their own work in the third person -in the initial version of their paper submitted for blind review. -Please refer to Section~\ref{author info} for detailed instructions on how to -cite your own papers. - -Use an unnumbered first-level section heading for the references, and -use a hanging indent style, with the first line of the reference flush -against the left margin and subsequent lines indented by 10 points. -The references at the end of this document give examples for journal -articles \cite{Samuel59}, conference publications \cite{langley00}, book chapters \cite{Newell81}, books \cite{DudaHart2nd}, edited volumes \cite{MachineLearningI}, -technical reports \cite{mitchell80}, and dissertations \cite{kearns89}. - -Alphabetize references by the surnames of the first authors, with -single author entries preceding multiple author entries. Order -references for the same authors by year of publication, with the -earliest first. Make sure that each reference includes all relevant -information (e.g., page numbers). - -\subsection{Software and Data} - -We strongly encourage the publication of software and data with the -camera-ready version of the paper whenever appropriate. This can be -done by including a URL in the camera-ready copy. However, do not -include URLs that reveal your institution or identity in your -submission for review. Instead, provide an anonymous URL or upload -the material as ``Supplementary Material'' into the CMT reviewing -system. Note that reviewers are not required to look a this material -when writing their review. - - -% Acknowledgements should only appear in the accepted version. -\section*{Acknowledgments} - -\textbf{Do not} include acknowledgements in the initial version of -the paper submitted for blind review. - -If a paper is accepted, the final camera-ready version can (and -probably should) include acknowledgements. In this case, please -place such acknowledgements in an unnumbered section at the -end of the paper. Typically, this will include thanks to reviewers -who gave useful comments, to colleagues who contributed to the ideas, -and to funding agencies and corporate sponsors that provided financial -support. - - -% In the unusual situation where you want a paper to appear in the -% references without citing it in the main text, use \nocite -\nocite{langley00} - -\bibliography{example_paper} -\bibliographystyle{icml2015} - -\end{document} - - -% This document was modified from the file originally made available by -% Pat Langley and Andrea Danyluk for ICML-2K. This version was -% created by Lise Getoor and Tobias Scheffer, it was slightly modified -% from the 2010 version by Thorsten Joachims & Johannes Fuernkranz, -% slightly modified from the 2009 version by Kiri Wagstaff and -% Sam Roweis's 2008 version, which is slightly modified from -% Prasad Tadepalli's 2007 version which is a lightly -% changed version of the previous year's version by Andrew Moore, -% which was in turn edited from those of Kristian Kersting and -% Codrina Lauth. Alex Smola contributed to the algorithmic style files. diff --git a/poster/beamerthemeconfposter.sty b/poster/beamerthemeconfposter.sty new file mode 100644 index 0000000..bcba6a7 --- /dev/null +++ b/poster/beamerthemeconfposter.sty @@ -0,0 +1,184 @@ +%============================================================================== +% Beamer style for the poster template posted at +% www.nathanieljohnston.com/index.php/2009/08/latex-poster-template +% +% Created by the Computational Physics and Biophysics Group at Jacobs University +% https://teamwork.jacobs-university.de:8443/confluence/display/CoPandBiG/LaTeX+Poster +% Modified by Nathaniel Johnston (nathaniel@nathanieljohnston.com) in August 2009 +% ============================================================================= + +\ProvidesPackage{beamerthemeconfposter} +\RequirePackage{tikz} % for drawing the nice rounded boxes +\usetikzlibrary{arrows,backgrounds} +\RequirePackage[T1]{fontenc} +\RequirePackage{lmodern} +\RequirePackage{textcomp} +\RequirePackage{amsmath,amssymb} +\usefonttheme{professionalfonts} +\newcommand{\makeruleinbox}{{\usebeamercolor[bg]{block alerted title}\centering\hspace*{-0.7cm}\rule{\inboxrule}{0.5cm}}} +\usepackage{ragged2e} +\usepackage{wrapfig} +%----------------------------------------------------------- +% Define a whole bunch of custom colours and fonts +%----------------------------------------------------------- + +\definecolor{lgreen} {RGB}{180,210,100} +\definecolor{dblue} {RGB}{20,66,129} +\definecolor{ddblue} {RGB}{11,36,69} +\definecolor{lred} {RGB}{220,0,0} +\definecolor{nred} {RGB}{224,0,0} +\definecolor{norange}{RGB}{244,127,36} +\definecolor{nyellow}{RGB}{255,221,0} +\definecolor{ngreen} {RGB}{98,158,31} +\definecolor{dgreen} {RGB}{78,138,21} +\definecolor{nblue} {RGB}{28,130,185} +\definecolor{jblue} {RGB}{20,50,100} +\definecolor{jalpha} {RGB}{0,0,0} + +% set the basic colors +\setbeamercolor{palette primary} {fg=black,bg=white} +\setbeamercolor{palette secondary} {fg=black,bg=white} +\setbeamercolor{palette tertiary} {bg=jblue,fg=white} +\setbeamercolor{palette quaternary}{fg=black,bg=white} +\setbeamercolor{structure}{fg=jblue} +\setbeamercolor{titlelike} {bg=jblue,fg=white} +\setbeamercolor{frametitle} {bg=jblue!10,fg=jblue} +\setbeamercolor{cboxb}{fg=black,bg=jblue} +\setbeamercolor{cboxr}{fg=black,bg=red} + +% set colors for itemize/enumerate +\setbeamercolor{item}{fg=ngreen} +\setbeamercolor{item projected}{fg=white,bg=ngreen} + +% set colors for blocks +%\setbeamercolor{block title}{fg=ngreen,bg=white} +%\setbeamercolor{block body}{fg=black,bg=white} + +%set colors for alerted blocks (blocks with frame) +\setbeamercolor{block alerted title}{fg=white,bg=jblue} +\setbeamercolor{block alerted body}{fg=black,bg=jblue!10} + +% set the fonts +\setbeamerfont{section in head/foot}{series=\bfseries} +\setbeamerfont{block title}{series=\bfseries} +\setbeamerfont{block alerted title}{series=\bfseries} +\setbeamerfont{frametitle}{series=\bfseries} +\setbeamerfont{frametitle}{size=\Large} + +% set some beamer theme options +\setbeamertemplate{title page}[default][colsep=-4bp,rounded=true] +\setbeamertemplate{sections/subsections in toc}[square] +\setbeamertemplate{items}[circle] +\setbeamertemplate{blocks}[width=0.0] +\beamertemplatenavigationsymbolsempty + +% set bibliography style +\setbeamertemplate{bibliography item}[text] +\setbeamercolor{bibliography item}{fg=black,bg=white} +\setbeamercolor{bibliography entry author}{fg=black,bg=white} +\setbeamercolor{bibliography item}{fg=black,bg=white} + +% define some length variables that are used by the template +\newlength{\inboxwd} +\newlength{\iinboxwd} +\newlength{\inboxrule} +\makeatletter +\makeatother + +%============================================================================== +% build the poster title +%============================================================================== +\setbeamertemplate{headline}{ + \leavevmode + \begin{columns} + \begin{column}{.2\linewidth} + \vskip1cm + \centering + %\includegraphics[height=4in]{UIUC-logo} + \end{column} + \vspace{1cm} + \begin{column}{.6\linewidth} + \vskip1cm + \centering + \usebeamercolor{title in headline}{\color{jblue}\Huge{\textbf{\inserttitle}}\\[0.5ex]} + \usebeamercolor{author in headline}{\color{fg}\Large{\insertauthor}\\[1ex]} + \usebeamercolor{institute in headline}{\color{fg}\large{\insertinstitute}\\[1ex]} + \vskip1cm + \end{column} + \vspace{1cm} + \begin{column}{.2\linewidth} + \vskip1cm + \centering + %\includegraphics[height=4in]{graph500-logo} + \vskip1cm + \end{column} + \vspace{1cm} + \end{columns} + \vspace{0.5in} + \hspace{0.5in}\begin{beamercolorbox}[wd=47in,colsep=0.15cm]{cboxb}\end{beamercolorbox} + \vspace{0.1in} +} + +% Block definition +\setbeamertemplate{block begin} +{ + \par\vskip\medskipamount + \begin{beamercolorbox}[colsep*=0ex,dp={2ex},center]{block title} + \vskip-0.25cm + \usebeamerfont{block title}\large\insertblocktitle + \begin{flushleft} + \vskip-1cm + \begin{tikzpicture}[remember picture,overlay] + \shade [inner color=gray,outer color=white] + (0,0) rectangle (\textwidth,0.3cm); + \end{tikzpicture} + \end{flushleft} + \end{beamercolorbox} + {\parskip0pt\par} + \ifbeamercolorempty[bg]{block title} + {} + {\ifbeamercolorempty[bg]{block body}{}{\nointerlineskip\vskip-0.5pt}} + \usebeamerfont{block body} + \vskip-0.5cm + \begin{beamercolorbox}[colsep*=0ex,vmode]{block body} + \justifying +} + +\setbeamertemplate{block end} +{ + \end{beamercolorbox} + \vskip\smallskipamount +} + +% Alert block definition (with frame) +\setbeamertemplate{block alerted begin} +{ + \par\vskip\medskipamount + \begin{beamercolorbox}[sep=0ex,rounded=true,center,dp={2ex}]{block alerted title} + \vskip0.01cm + \usebeamerfont{block title}\large\insertblocktitle + \end{beamercolorbox} + {\parskip0pt\par} + \usebeamerfont{block body} + \vskip-0.8cm + \begin{beamercolorbox}[sep=0.5cm, rounded=true,center]{block alerted title} + \setlength{\inboxwd}{\linewidth} + \addtolength{\inboxwd}{-1cm} + \begin{beamercolorbox}[rounded=true,wd={\inboxwd},center]{block alerted body} + \setlength{\iinboxwd}{\inboxwd} + \setlength{\inboxrule}{\inboxwd} + \addtolength{\iinboxwd}{-0.5cm} + \addtolength{\inboxrule}{0.5cm} + \begin{center} + \begin{minipage}{\iinboxwd} + \justifying +} + +\setbeamertemplate{block alerted end} +{ + \end{minipage} + \end{center} + \end{beamercolorbox} + \end{beamercolorbox} + \vskip\smallskipamount +} \ No newline at end of file diff --git a/poster/cracking_cascades_classposter.tex b/poster/cracking_cascades_classposter.tex new file mode 100644 index 0000000..3f8204e --- /dev/null +++ b/poster/cracking_cascades_classposter.tex @@ -0,0 +1,461 @@ +\documentclass[final]{beamer} +\usepackage[utf8]{inputenc} +\usepackage[scale=1.6]{beamerposter} % Use the beamerposter package for laying out the poster + +\usetheme{confposter} % Use the confposter theme supplied with this template + +\usepackage{color, bbm} +\setbeamercolor{block title}{fg=dblue,bg=white} % Colors of the block titles +\setbeamercolor{block body}{fg=black,bg=white} % Colors of the body of blocks +\setbeamercolor{block alerted title}{fg=white,bg=dblue!70} % Colors of the highlighted block titles +\setbeamercolor{block alerted body}{fg=black,bg=dblue!10} % Colors of the body of highlighted blocks +% Many more colors are available for use in beamerthemeconfposter.sty + +%----------------------------------------------------------- +% Define the column widths and overall poster size +% To set effective sepwid, onecolwid and twocolwid values, first choose how many columns you want and how much separation you want between columns +% In this template, the separation width chosen is 0.024 of the paper width and a 4-column layout +% onecolwid should therefore be (1-(# of columns+1)*sepwid)/# of columns e.g. (1-(4+1)*0.024)/4 = 0.22 +% Set twocolwid to be (2*onecolwid)+sepwid = 0.464 +% Set threecolwid to be (3*onecolwid)+2*sepwid = 0.708 + +\newlength{\sepwid} +\newlength{\onecolwid} +\newlength{\twocolwid} +\newlength{\threecolwid} +\setlength{\paperwidth}{48in} % A0 width: 46.8in +\setlength{\paperheight}{40in} % A0 height: 33.1in +\setlength{\sepwid}{0.024\paperwidth} % Separation width (white space) between columns +\setlength{\onecolwid}{0.22\paperwidth} % Width of one column +\setlength{\twocolwid}{0.464\paperwidth} % Width of two columns +\setlength{\threecolwid}{0.708\paperwidth} % Width of three columns +\setlength{\topmargin}{-1in} % Reduce the top margin size +%----------------------------------------------------------- + +\usepackage{graphicx} % Required for including images + +\usepackage{booktabs} % Top and bottom rules for tables + + + +%---------------------------------------------------------------------------------------- +% TITLE SECTION +%---------------------------------------------------------------------------------------- + +\title{Sparse Recovery for Graph Reconstruction } % Poster title + +\author{Eric Balkanski, Jean Pouget-Abadie} % Author(s) + +\institute{Harvard University} % Institution(s) +%---------------------------------------------------------------------------------------- +\begin{document} +\addtobeamertemplate{block end}{}{\vspace*{2ex}} % White space under blocks +\addtobeamertemplate{block alerted end}{}{\vspace*{2ex}} % White space under highlighted (alert) blocks + +\setlength{\belowcaptionskip}{2ex} % White space under figures +\setlength\belowdisplayshortskip{2ex} % White space under equations + +\begin{frame}[t] % The whole poster is enclosed in one beamer frame + +\begin{columns}[t] % The whole poster consists of three major columns, the second of which is split into two columns twice - the [t] option aligns each column's content to the top + +\begin{column}{\sepwid}\end{column} % Empty spacer column + +\begin{column}{\onecolwid} % The first column + +%---------------------------------------------------------------------------------------- +% INTODUCTION +%---------------------------------------------------------------------------------------- + + +%\vspace{- 15.2 cm} +%\begin{center} +%{\includegraphics[height=7em]{logo.png}} % First university/lab logo on the left +%\end{center} + +%\vspace{4.6 cm} + +\begin{block}{Problem Statement} +\begin{center} +\bf{How can we reconstruct a graph on which observed cascades spread?} +\end{center} +\end{block} + + +%{\bf Graph Reconstruction}: + +%\begin{itemize} +%\item \{${\cal G}, \vec p$\}: directed graph, edge probabilities +%\item $F$: cascade generating model +%\item ${\cal M} := F\{{\cal G}, \vec p\}$: cascade +%\end{itemize} + +%{\bf Objective}: +%\begin{itemize} +%\item Find algorithm which computes $F^{-1}({\cal M}) = \{{\cal G}, \vec p\}$ w.h.p., i.e. recovers graph from cascades. +%\end{itemize} + +%{\bf Approach} +%\begin{itemize} +%\item Frame graph reconstruction as a {\it Sparse Recovery} problem for two cascade generating models. +%\end{itemize} + +%Given a set of observed cascades, the \textbf{graph reconstruction problem} consists of finding the underlying graph on which these cascades spread. We assume that these cascades come from the classical \textbf{Independent Cascade Model} where at each time step, newly infected nodes infect each of their neighbor with some probability. + +%In previous work, this problem has been formulated in different ways, including a convex optimization and a maximum likelihood problem. However, there is no known algorithm for graph reconstruction with theoretical guarantees and with a reasonable required sample size. + +%We formulate a novel approach to this problem in which we use \textbf{Sparse Recovery} to find the edges in the unknown underlying network. Sparse Recovery is the problem of finding the sparsest vector $x$ such that $\mathbf{M x =b}$. In our case, for each node $i$, we wish to recover the vector $x = p_i$ where $p_{i_j}$ is the probability that node $j$ infects node $i$ if $j$ is active. To recover this vector, we are given $M$, where row $M_{t,k}$ indicates which nodes are infected at time $t$ in observed cascade $k$, and $b$, where $b_{t+1,k}$ indicates if node $i$ is infected at time $t+1$ in cascade $k$. Since most nodes have a small number of neighbors in large networks, we can assume that these vectors are sparse. Sparse Recovery is a well studied problem which can be solved efficiently and with small error if $M$ satisfies certain properties. In this project, we empirically study to what extent $M$ satisfies the Restricted Isometry Property. + + +%--------------------------------------------------------------------------------- +%--------------------------------------------------------------------------------- + +\begin{block}{Voter Model} + +\begin{figure} +\centering +\includegraphics[width=0.6\textwidth]{images/voter.png} +\end{figure} + + + +\vspace{0.5 cm} +{\bf Description} + +\vspace{0.5 cm} + +\begin{itemize} +\item $\mathbb{P}$({\color{blue} blue} at $t=0) = p_{\text{init}}$ +\item $\mathbb{P}$({\color{blue} blue} at $t+1) = \frac{\text{Number of {\color{blue}blue} neighbors}}{\text{Total number of neighbors}}$ +\end{itemize} + +\vspace{0.5 cm} + +{\bf Sparse Recovery Formulation} + +\vspace{0.5 cm} + + +To recover the neighbors of $v_1$, observe which nodes are {\color{red} red} (1) or {\color{blue} blue} (0) at time step $t$: +\begin{align*} +&v_2 \hspace{0.2 cm} v_3 \hspace{0.2 cm} v_4 \hspace{0.2 cm} v_5 &\\ +\vspace{1 cm} +M = & \left( \begin{array}{cccc} +0 & 0 & 1 & 1 \\ +1 & 1 & 0 & 0 \\ +\end{array} \right) & \begin{array}{l} \hspace{ - 5cm} +\text{time step 0} \\ + \hspace{ - 5cm} \text{time step 1} \\ +\end{array} +\end{align*} + +and which color $v_1$ is at time step $t+1$ due to $M$: + +\begin{align*} +b_1 = & \left( \begin{array}{c} +1 \\ +1 \\ +\end{array} \right) & \begin{array}{l} \hspace{ - 5cm} +\text{time step 1} \\ + \hspace{ - 5cm} \text{time step 2} \\ + \end{array} +\end{align*} + +Then , + +\begin{equation} +\boxed{M \vec x_1 = \vec b_1 + \epsilon \nonumber} +\end{equation} + +where $(\vec x_{1})_j := \frac{\text{1}}{\text{deg}(i)} \cdot \left[\text{j parent of 1 in }{\cal G}\right] $ + + +\end{block} + + + + + +%--------------------------------------------------------------------------------- +%--------------------------------------------------------------------------------- + + +%--------------------------------------------------------------------------------- +%--------------------------------------------------------------------------------- + + + + + + +%--------------------------------------------------------------------------------- +%--------------------------------------------------------------------------------- + + + +\end{column} % End of the first column + +\begin{column}{\sepwid}\end{column} % Empty spacer column + +\begin{column}{\onecolwid} % The first column + +%---------------------------------------------------------------------------------------- +% CONSTRAINT SATISFACTION - BACKTRACKING +%---------------------------------------------------------------------------------------- +\begin{block}{Independent Cascades Model} +\begin{figure} +\centering +\includegraphics[width=0.6\textwidth]{images/icc.png} +\end{figure} + +\vspace{0.5 cm} + +{\bf Description} + +\vspace{0.5 cm} + +\begin{itemize} +\item Three possible states: {\color{blue} susceptible}, {\color{red} infected}, {\color{yellow} inactive } +\item $\mathbb{P}$(infected at t=0)$=p_{\text{init}}$ +\item Infected node $j$ infects its susceptible neighbors $i$ with probability $p_{j,i}$ independently +\end{itemize} + +\vspace{0.5 cm} + +{\bf Sparse Recovery Formulation} + +\vspace{0.5 cm} + +To recover the neighbors of $v_5$,observe which nodes are {\color{red} red} (1), {\color{blue} blue} (0), or {\color{yellow} yellow} (0) at time step $t$: +\begin{align*} +&v_1 \hspace{0.2 cm} v_2 \hspace{0.2 cm} v_3 \hspace{0.2 cm} v_4 \\ +\vspace{1 cm} +M = & \left( \begin{array}{cccc} +1 & 0 & 0 & 0 \\ +0 & 1 & 1 & 0 \\ +\end{array} \right) \begin{array}{l} \hspace{ 1cm} +\text{time step 0} \\ + \hspace{ 1cm} \text{time step 1} \\ + \end{array} +\end{align*} + +and if $M$ caused $v_5$ to be infected at time step $t+1$: + +\begin{align*} +b_5 = & \left( \begin{array}{c} +0 \\ +1 \\ +\end{array} \right) \begin{array}{l} \hspace{ 1cm} +\text{time step 1} \\ + \hspace{ 1cm} \text{time step 2} \\ + \end{array} +\end{align*} + + +Then, + +\begin{equation} +\boxed{e^{M \vec \theta_5} = (1 - \vec b_5) + \epsilon} \nonumber +\end{equation} + +where $(\vec \theta_5)_j := \log ( 1 - p_{j,5}) $ + +\vspace{1 cm} + + +This problem is a {\bf Noisy Sparse Recovery} problem, which has been studied extensively. Here, the vectors $\vec x_i$ are deg(i)-sparse. + + +\end{block} + +%---------------------------------------------------------------------------------------- + + + + + + + + + +%---------------------------------------------------------------------------------------- +% MIP +%---------------------------------------------------------------------------------------- + +% \begin{block}{RIP property} + +% %The Restricted Isometry Property (RIP) characterizes a quasi-orthonormality of the measurement matrix M on sparse vectors. + +% For all k, we define $\delta_k$ as the best possible constant such that for all unit-normed ($l$2) and k-sparse vectors $x$ + +% \begin{equation} +% 1-\delta_k \leq \|Mx\|^2_2 \leq 1 + \delta_k +% \end{equation} + +% In general, the smaller $\delta_k$ is, the better we can recover $k$-sparse vectors $x$! + +% \end{block} + +%---------------------------------------------------------------------------------------- + +\end{column} + +\begin{column}{\sepwid}\end{column} % Empty spacer column + +\begin{column}{\onecolwid} % The first column within column 2 (column 2.1) + + +%---------------------------------------------------------------------------------------- + + +\begin{block}{Algorithms} + +{\bf Voter Model} + +\begin{itemize} +\item Solve for each node i: +\begin{equation} +\min_{\vec x_i} \|\vec x_i\|_1 + \lambda \|M \vec x_i - \vec b_i \|_2 \nonumber +\end{equation} +\end{itemize} + +{\bf Independent Cascade Model} + +\begin{itemize} +\item Solve for each node i: +\begin{equation} +\min_{\vec \theta_i} \|\vec \theta_i\|_1 + \lambda \|e^{M \vec \theta_i} - (1 - \vec b_i) \|_2 \nonumber +\end{equation} +\end{itemize} + +\end{block} + +\begin{block}{Restricted Isometry Property (RIP)} +{\bf Definition} +\begin{itemize} +\item Characterizes a quasi-orthonormality of M on sparse vectors. + +\item The RIP constant $\delta_k$ is the best possible constant such that for all unit-normed ($l$2) and k-sparse vectors $x$: + +\begin{equation} +1-\delta_k \leq \|Mx\|^2_2 \leq 1 + \delta_k \nonumber +\end{equation} + +\item The smaller $\delta_k$ is, the better we can recover $k$-sparse vectors $x$. +\end{itemize} + + + + +\end{block} + +\begin{block}{Theoretical Guarantees} + +With small RIP constants $(\delta \leq 0.25)$ for $M$ and some assumption on the noise $\epsilon$: + +{\bf Theorem \cite{candes}} + +If node $i$ has degree $\Delta$ and $n_{\text{rows}}(M) \geq C_1 \mu \Delta \log n$, then, w.h.p., + +$$\| \hat x - x^* \|_2 \leq C (1 + \log^{3/2}(n))\sqrt{\frac{\Delta \log n}{n_{\text{rows}}(M) }}$$ + + +\end{block} + + + + +%---------------------------------------------------------------------------------------- +% RESULTS +%---------------------------------------------------------------------------------------- + +\begin{block}{RIP Experiments} + +\begin{center} +\begin{table} +\begin{tabular}{c | c | c | c | c } +& $c$ = 100, &$c$ = 1000,& $c$ = 100, &$c$ = 1000,\\ +& $i$ = 0.1& $i$ = 0.1& $i$ = 0.05& $i$ = 0.05\\ + \hline + $\delta_4$ & 0.54 & 0.37 &0.43&0.23 \\ + \end{tabular} + \caption{RIP constant for a small graph. Here, $c$ is the number of cascades and $i$ is $p_{\text{init}}$.} +\end{table} +\end{center} + + +\end{block} + +%---------------------------------------------------------------------------------------- + + +\end{column} % End of the second column + +\begin{column}{\sepwid}\end{column} % Empty spacer column + +\begin{column}{\onecolwid} % The third column + +%---------------------------------------------------------------------------------------- +% IOVERALL COMPARISON +%---------------------------------------------------------------------------------------- + +%\vspace{- 14.2 cm} +%\begin{center} +%{\includegraphics[height=7em]{cmu_logo.png}} % First university/lab logo on the left +%\end{center} + +%\vspace{4 cm} + +\begin{alertblock}{Experimental Results} + + + + +\end{alertblock} + +%---------------------------------------------------------------------------------------- + + +%---------------------------------------------------------------------------------------- +% CONCLUSION +%---------------------------------------------------------------------------------------- + +\begin{block}{Conclusion} + +\begin{center} + + +{\bf Graph reconstruction can naturally be expressed as Sparse Recovery. Understanding properties of $M$, for example RIP, leads to theoretical guarantees on the reconstruction.} + +\end{center} + +\end{block} + +%---------------------------------------------------------------------------------------- +% REFERENCES +%---------------------------------------------------------------------------------------- + +\begin{block}{References} + +\begin{thebibliography}{42} + +\bibitem{candes} +Candès, E., and Plan, Y. +\newblock {\it A Probabilistic and RIPless Theory of Compressed Sensing} +\newblock Information Theory, IEEE Transactions on, 57(11): 7235--7254, +\newblock 2011. +\end{thebibliography} + +\end{block} + +%---------------------------------------------------------------------------------------- + +\end{column} % End of the third column + +\end{columns} % End of all the columns in the poster + +\end{frame} % End of the enclosing frame + + + +\end{document} diff --git a/poster/images/greedy_sparse_comparison.png b/poster/images/greedy_sparse_comparison.png new file mode 100644 index 0000000..3fab5b0 Binary files /dev/null and b/poster/images/greedy_sparse_comparison.png differ diff --git a/poster/images/icc.png b/poster/images/icc.png new file mode 100644 index 0000000..2148eed Binary files /dev/null and b/poster/images/icc.png differ diff --git a/poster/images/sparse_recovery_illustration.svg b/poster/images/sparse_recovery_illustration.svg new file mode 100644 index 0000000..bef82c2 --- /dev/null +++ b/poster/images/sparse_recovery_illustration.svg @@ -0,0 +1,655 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + diff --git a/poster/images/voter.png b/poster/images/voter.png new file mode 100644 index 0000000..c1325cb Binary files /dev/null and b/poster/images/voter.png differ diff --git a/presentation/econcs/beamer.tex b/presentation/econcs/beamer.tex new file mode 100644 index 0000000..7d4b5c1 --- /dev/null +++ b/presentation/econcs/beamer.tex @@ -0,0 +1,381 @@ +\documentclass[10pt]{beamer} + +\usepackage{amssymb, amsmath, graphicx, amsfonts, color} + +\title{Estimating a Graph's Parameters from Cascades} +\author{Jean (John) Pouget-Abadie \\ Joint Work with Thibaut (T-bo) Horel} +\date{} + +\begin{document} + +\begin{frame} +\titlepage +\end{frame} + +\begin{frame} +\frametitle{Example} +\begin{figure} +\includegraphics[scale=.25]{../images/drawing.pdf} +\caption{A network} +\end{figure} +\end{frame} + +\begin{frame} +\frametitle{Example} +\begin{figure} +\includegraphics[scale=.25]{../images/noedges_step1.pdf} +\caption{Cascade 1: $t=0$} +\end{figure} +\end{frame} + +\begin{frame} +\frametitle{Example} +\begin{figure} +\includegraphics[scale=.25]{../images/noedges_step2.pdf} +\caption{Cascade 1: $t=1$} +\end{figure} +\end{frame} + +\begin{frame} +\frametitle{Example} +\begin{figure} +\includegraphics[scale=.25]{../images/noedges_step3.pdf} +\caption{Cascade 1: $t=2$} +\end{figure} +\end{frame} + +\begin{frame} +\frametitle{Example} +\begin{figure} +\includegraphics[scale=.25]{../images/noedges_step1_cascade2} +\caption{Cascade 2: $t=0$} +\end{figure} +\end{frame} + +\begin{frame} +\frametitle{Example} +\begin{figure} +\includegraphics[scale=.25]{../images/noedges_step2_cascade2} +\caption{Cascade 2: $t=1$} +\end{figure} +\end{frame} + +\begin{frame} +\frametitle{Example} +\begin{figure} +\includegraphics[scale=.25]{../images/noedges_step3_cascade2} +\caption{Cascade 2: $t=2$} +\end{figure} +\end{frame} + + +\begin{frame} +\frametitle{Context} + +Notation: +\begin{itemize} +\item $({\cal G}, \theta)$ : graph, parameters +\item Cascade: diffusion process of a `behavior' on $({\cal G}, \theta)$ +\item $(X_t)_c$ : set of `active' nodes at time t for cascade $c$ +\end{itemize} + + +\begin{table} +\begin{tabular}{c c} +Graph $\implies$ Cascades & Cascades $\implies$ Graph \\ \hline +$({\cal G}, \theta)$ is known & $(X_t)_c$ is observed \\ +Predict $(X_t) | X_0$ & Recover $({\cal G}, \theta)$ \\ +\end{tabular} +\end{table} + +Summary: +\begin{itemize} +\item Many algorithms \emph{require} knowledge of $({\cal G}, \theta)$ +\item {\bf Graph Inference} is the problem of \emph{learning} $({\cal G}, \theta)$ +\end{itemize} +\end{frame} + +\begin{frame} +\begin{block}{Decomposability} +Learning the graph $\Leftrightarrow$ Learning the parents of a single node +\end{block} +\end{frame} + + +\begin{frame} +\frametitle{Problem Statement} +\begin{itemize} +\pause +\item Can we learn ${\cal G}$ from $(X_t)_c$? +\pause +\item How many cascades? How many steps in each cascade? +\pause +\item Can we learn $\theta$ from $(X_t)_c$? +\pause +\item How does the error decrease with $n_{\text{cascades}}$? +\pause +\item Are there graphs which are easy to learn? Harder to learn? +\pause +\item What kind of diffusion processes can we consider? +\pause +\item What is the minimal number of cascades required to learn $({\cal G}, \theta)$? +\end{itemize} +\end{frame} + +\begin{frame} +\frametitle{Notation} +\begin{itemize} +\item n : number of measurements +\item N : number of cascades +\item m : number of nodes +\item s : degree of node considered +\end{itemize} +\end{frame} + + +\begin{frame} +\frametitle{Related Work} + +\begin{itemize} +\pause +\item Can we learn ${\cal G}$ from $(X_t)_c$? +\pause +\\{\color{blue} Yes} +\pause +\item How many cascades? How many steps in each cascade? +\pause +\\ {\color{blue} poly(s)$ \log m$ cascades} +\pause +\item Can we learn $\theta$ from $(X_t)_c$? +\pause +\\ {\color{blue} (?)} +\pause +\item How does the error decrease with $n_{\text{cascades}}$? +\pause +\\ {\color{blue} (?)} +\pause +\item Are there graphs which are easy to learn? Harder to learn? +\pause +\\{\color{blue} Sparse Graphs are easy} +\pause +\item What kind of diffusion processes can we consider? +\pause +\\{\color{blue} IC Model (discrete and continuous)} +\pause +\item What is the minimal number of cascades required to learn $({\cal G}, \theta)$? +\pause +\\{\color{blue} (?)\dots$s \log m/s$ in specific setting} +\end{itemize} +\end{frame} + + + +\begin{frame} +\frametitle{Our Work} +\begin{itemize} +\pause +\item Can we learn ${\cal G}$ from $(X_t)_c$? +\pause +\\{\color{blue} Yes} $\rightarrow$ {\color{red} Yes} +\pause +\item How many cascades? How many steps in each cascade? +\pause +\\ {\color{blue} poly(s)$ \log m$ cascades} $\rightarrow$ {\color{red} $s\log m$ measurements} +\pause +\item Can we learn $\theta$ from $(X_t)_c$? +\pause +\\ {\color{blue} (?)} $\rightarrow$ {\color{red} Yes!} +\pause +\item How does the error decrease with $n_{\text{cascades}}$? +\pause +\\ {\color{blue} (?)} $\rightarrow$ {\color{red} ${\cal O}(\sqrt{s\log m/n})$} +\pause +\item Are there graphs which are easy to learn? Harder to learn? +\pause +\\ {\color{blue} Sparse Graphs are easy} $\rightarrow$ {\color{red} Approx. sparsity is also easy} +\pause +\item What kind of diffusion processes can we consider? +\pause +\\ {\color{blue} IC Model (discrete, continuous)} $\rightarrow$ {\color{red} Large class of Cascade Models} +\pause +\item What is the minimal number of cascades required to learn $({\cal G}, \theta)$? +\pause +\\{\color{blue} $s \log m/s$ in specific setting} $\rightarrow$ {\color{red} $s \log m/s$ for approx. sparse graphs} +\end{itemize} + +\end{frame} + +\begin{frame} +\frametitle{Voter Model} +\begin{itemize} +\pause +\item {\color{red} Red} and {\color{blue} Blue} nodes. At every step, each node $i$ chooses one of its neighbors $j$ with probability $p_{j,i}$ and adopts that color at $t+1$ +\pause +\item If {\color{blue} Blue} is `contagious' state: +\pause +\begin{equation} +\nonumber +\mathbb{P}(i \in X^{t+1}|X^t) = \sum_{j \in {\cal N}(i)\cap X^t} p_{ji} = X^t \cdot \theta_i +\end{equation} +\end{itemize} +\end{frame} + +\begin{frame} +\frametitle{Independent Cascade Model} +\begin{itemize} +\pause +\item Each `infected' node $i$ has a probability $p_{i,j}$ of infecting each of his neighbors $j$. +\pause +\item A node stays `infected' for a single turn. Then it becomes `inactive'. +$$\mathbb{P}(j \text{ becomes infected at t+1}|X_{t}) = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j})$$ +\end{itemize} +\end{frame} + +\begin{frame} +\frametitle{Independent Cascade Model} +\begin{align} +\mathbb{P}(j\in X_{t+1}|X_{t}) & = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j}) \\ +& = 1 - \exp \left[ \sum_{i \in {\cal N}(j) \cap X_{t}} \log(1 - p_{i,j}) \right] \\ +& = 1 - \exp \left[ X_{t} \cdot \theta_{i,j}\right] +\end{align} + +where $\theta_{i,j} := \log (1 - p_{i,j})$ and $\theta_i := (\theta_{i,j})_j$ +\\[5ex] +\begin{itemize} +\item Support of $\vec \theta$ $\Leftrightarrow$ support of $\vec p$ +\end{itemize} +\end{frame} + +\begin{frame} +\frametitle{Model Comparison} +\begin{table} +\centering +\begin{tabular}{c | c} +Voter Model & Indep. Casc. Model \\[1ex] +\hline \\[.1ex] +Markov & Markov \\[3ex] +Indep. prob. of $\in X^{t+1} | X^t$ & Indep. prob. of $\in X^{t+1} | X^t$ \\[3ex] +$\mathbb{P}(j\in X_{t+1}|X_{t}) = X_{t} \cdot \theta_{i}$ & $\mathbb{P}(j\in X_{t+1}|X_{t}) = 1 - \exp(X_{t} \cdot \theta_{i})$ \\[3ex] +Always Susceptible & Susceptible until infected \\ +\includegraphics[scale=.4]{../images/voter_model.pdf} & \includegraphics[scale=.3]{../images/icc_model.pdf} \\ +\end{tabular} +\end{table} +\end{frame} + +\begin{frame} +\frametitle{Generalized Linear Cascade Models} +\begin{definition} +{\bf Generalized Linear Cascade Model} with inverse link function $f : \mathbb{R} \rightarrow [0,1]$: +\begin{itemize} +\item for each \emph{susceptible} node $j$ in state $s$ at $t$, $\mathbb{P}(j \in X^{t+1}|X^t)$ is a Bernoulli of parameter $f(\theta_j \cdot X^t)$ +\end{itemize} +\end{definition} +\end{frame} + +\begin{frame} +\frametitle{Sparse Recovery} +\begin{figure} +\includegraphics[scale=.6]{../images/sparse_recovery_illustration.pdf} +\caption{$f(X\cdot\theta) = b$} +\end{figure} +\end{frame} + + +\begin{frame} +\frametitle{$\ell1$ penalized Maximum Likelihood} +\begin{itemize} +\item Decomposable node by node +\item Sum over susceptible steps +\end{itemize} + +\begin{block}{Likelihood function} +\begin{equation} +\nonumber +{\cal L}(\theta| x^1, \dots x^n) = \frac{1}{{\cal T}_i} \sum_{t \in {\cal T}_i} x^{t+1}_i \log f(\theta_i \cdot x^t) + (1 - x^{t+1}_i) \log(1 - f(\theta_i \cdot x^t)) +\end{equation} +\end{block} + +\begin{block}{Algorithm} +\begin{equation} +\nonumber +\theta \in \arg \max_\theta {\cal L}(\theta| x^1, \dots x^n) - \lambda \|\theta\|_1 +\end{equation} +\end{block} + +\end{frame} + +\begin{frame} +\frametitle{Main Result} +\begin{theorem} +Assume condition on the Hessian and certain regularity properties on $f$, then $\exists C>0$ depending only on the properties of the ${\cal G}$, with high probability: +$$\| \theta^*_i - \hat \theta_i \|_2 \leq C\sqrt{\frac{s\log m}{n}}$$ +\end{theorem} + +\begin{corollary} +By thresholding $\hat \theta_i$, if $n > C' s \log m$, we recover the support of $\theta^*$ and therefore the edges of ${\cal G}$ +\end{corollary} + +\end{frame} + +\begin{frame} +\frametitle{Approximate Sparsity} +\begin{itemize} +\item $\theta^*_{\lceil s \rceil}$ best s-sparse approximation to $\theta^*$ +\item $\|\theta^* - \theta^*_{\lceil s \rceil} \|_1$: `tail' of $\theta^*$ +\end{itemize} +\begin{theorem} +Assume condition on Hessian and certain regularity properties on $f$, then $\exists C_1, C_2>0$ depending only on the properties of ${\cal G}$, with high probability: +\begin{equation} +\|\hat \theta_i - \theta^*_i\|_2 \leq C_1 \sqrt{\frac{s\log m}{n}} + C_2 \sqrt[4]{\frac{s\log m}{n}}\|\theta^* - \theta^*_{\lceil s \rceil} \|_1 +\end{equation} +\end{theorem} +\end{frame} + +\begin{frame} +\frametitle{Lower Bound} +\begin{itemize} +\item Under correlation decay assumption for the IC model, ${\Omega}(s \log N/s)$ cascades necessary for graph reconstruction (Netrapalli et Sanghavi SIGMETRICS'12) +\item Adapting (Price \& Woodruff STOC'12), in the approximately sparse case, any algorithm for any generalized linear cascade model such that: +$$\|\hat \theta - \theta^*\|_2 \leq C \|\theta^* - \theta^*_{\lfloor s \rfloor}\|_2$$ +requires ${\cal O}(s \log (n/s)/\log C)$ measurement. +\end{itemize} +\end{frame} + +\begin{frame} +\frametitle{(RE) assumptions} +\begin{block}{Assumption on Hessian} +\begin{itemize} +\item +Hessian has to verify a `restricted eigenvalue property' i.e smallest eigenvalue on sparse vectors is away from $0$. +\end{itemize} +\end{block} + +\begin{block}{From Hessian to Gram Matrix} +\begin{itemize} +\item $\mathbb{E}[X X^T]$ : `expected' Gram matrix of observations +\item $\mathbb{E}[X X^T]_{i,i}$ : $\mathbb{P}$ that node $i$ is infected +\item $\mathbb{E}[X X^T]_{i,j}$ : $\mathbb{P} $that node $i$ and node $j$ are infected simultaneously +\end{itemize} +\end{block} +\end{frame} + +\begin{frame} +\frametitle{Future Work} + +\begin{block}{Linear Threshold Model} +\begin{itemize} +\item Linear threshold model is a generalized linear cascade, with non-differential inverse link function. $$\mathbb{P}(j \in X^{t+1}|X^t) = sign(\theta_j \cdot X^t - t_j)$$ +\end{itemize} +\end{block} + +\begin{block}{Noisy Influence Maximization} +\end{block} + +\begin{block}{Confidence Intervals} +\end{block} + +\begin{block}{Active Learning} +\end{block} +\end{frame} + +\end{document} diff --git a/presentation/econcs/figures/Screen Shot 2015-03-08 at 13.08.01.png b/presentation/econcs/figures/Screen Shot 2015-03-08 at 13.08.01.png new file mode 100644 index 0000000..b053f0c Binary files /dev/null and b/presentation/econcs/figures/Screen Shot 2015-03-08 at 13.08.01.png differ diff --git a/presentation/econcs/figures/weighted_graph.png b/presentation/econcs/figures/weighted_graph.png new file mode 100644 index 0000000..7deccc3 Binary files /dev/null and b/presentation/econcs/figures/weighted_graph.png differ diff --git a/presentation/econcs/sparse.bib b/presentation/econcs/sparse.bib new file mode 100644 index 0000000..5df4b59 --- /dev/null +++ b/presentation/econcs/sparse.bib @@ -0,0 +1,503 @@ +@article {CandesRomberTao:2006, +author = {Candès, Emmanuel J. and Romberg, Justin K. and Tao, Terence}, +title = {Stable signal recovery from incomplete and inaccurate measurements}, +journal = {Communications on Pure and Applied Mathematics}, +volume = {59}, +number = {8}, +publisher = {Wiley Subscription Services, Inc., A Wiley Company}, +issn = {1097-0312}, +pages = {1207--1223}, +year = {2006}, +} + + +@inproceedings{GomezRodriguez:2010, + author = {Gomez Rodriguez, Manuel and Leskovec, Jure and Krause, Andreas}, + title = {Inferring Networks of Diffusion and Influence}, + booktitle = {Proceedings of the 16th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining}, + series = {KDD '10}, + year = {2010}, + isbn = {978-1-4503-0055-1}, + location = {Washington, DC, USA}, + pages = {1019--1028}, + numpages = {10}, + publisher = {ACM}, + address = {New York, NY, USA}, +} + + +@article{Netrapalli:2012, + author = {Netrapalli, Praneeth and Sanghavi, Sujay}, + title = {Learning the Graph of Epidemic Cascades}, + journal = {SIGMETRICS Perform. Eval. Rev.}, + volume = {40}, + number = {1}, + month = {June}, + year = {2012}, + issn = {0163-5999}, + pages = {211--222}, + numpages = {12}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {cascades, epidemics, graph structure learning}, +} + +@article{Negahban:2009, + author = {Negahban, Sahand N. and Ravikumar, Pradeep and Wrainwright, Martin J. and Yu, Bin}, + title = {A Unified Framework for High-Dimensional Analysis of M-Estimators with Decomposable Regularizers}, + Journal = {Statistical Science}, + year = {2012}, + month = {December}, + volume = {27}, + number = {4}, + pages = {538--557}, +} + +@article{Zhao:2006, + author = {Zhao, Peng and Yu, Bin}, + title = {On Model Selection Consistency of Lasso}, + journal = {J. Mach. Learn. Res.}, + issue_date = {12/1/2006}, + volume = {7}, + month = dec, + year = {2006}, + issn = {1532-4435}, + pages = {2541--2563}, + numpages = {23}, + url = {http://dl.acm.org/citation.cfm?id=1248547.1248637}, + acmid = {1248637}, + publisher = {JMLR.org}, +} + +@inproceedings{Daneshmand:2014, + author = {Hadi Daneshmand and + Manuel Gomez{-}Rodriguez and + Le Song and + Bernhard Sch{\"{o}}lkopf}, + title = {Estimating Diffusion Network Structures: Recovery Conditions, Sample + Complexity {\&} Soft-thresholding Algorithm}, + booktitle = {Proceedings of the 31th International Conference on Machine Learning, + {ICML} 2014, Beijing, China, 21-26 June 2014}, + pages = {793--801}, + year = {2014}, + url = {http://jmlr.org/proceedings/papers/v32/daneshmand14.html}, + timestamp = {Fri, 07 Nov 2014 20:42:30 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/icml/DaneshmandGSS14}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{Kempe:03, + author = {David Kempe and + Jon M. Kleinberg and + {\'{E}}va Tardos}, + title = {Maximizing the spread of influence through a social network}, + booktitle = {Proceedings of the Ninth {ACM} {SIGKDD} International Conference on + Knowledge Discovery and Data Mining, Washington, DC, USA, August 24 + - 27, 2003}, + pages = {137--146}, + year = {2003}, + url = {http://doi.acm.org/10.1145/956750.956769}, + doi = {10.1145/956750.956769}, + timestamp = {Mon, 13 Feb 2006 15:34:20 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/kdd/KempeKT03}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{Abrahao:13, + author = {Bruno D. Abrahao and + Flavio Chierichetti and + Robert Kleinberg and + Alessandro Panconesi}, + title = {Trace complexity of network inference}, + booktitle = {The 19th {ACM} {SIGKDD} International Conference on Knowledge Discovery + and Data Mining, {KDD} 2013, Chicago, IL, USA, August 11-14, 2013}, + pages = {491--499}, + year = {2013}, + url = {http://doi.acm.org/10.1145/2487575.2487664}, + doi = {10.1145/2487575.2487664}, + timestamp = {Tue, 10 Sep 2013 10:11:57 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/kdd/AbrahaoCKP13}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + + +@article{vandegeer:2009, +author = "van de Geer, Sara A. and B{\"u}hlmann, Peter", +doi = "10.1214/09-EJS506", +fjournal = "Electronic Journal of Statistics", +journal = "Electron. J. Statist.", +pages = "1360--1392", +publisher = "The Institute of Mathematical Statistics and the Bernoulli Society", +title = "On the conditions used to prove oracle results for the Lasso", +url = "http://dx.doi.org/10.1214/09-EJS506", +volume = "3", +year = "2009" +} + +@article{vandegeer:2011, +author = "van de Geer, Sara and Bühlmann, Peter and Zhou, Shuheng", +doi = "10.1214/11-EJS624", +fjournal = "Electronic Journal of Statistics", +journal = "Electron. J. Statist.", +pages = "688--749", +publisher = "The Institute of Mathematical Statistics and the Bernoulli Society", +title = "The adaptive and the thresholded Lasso for potentially misspecified models (and a lower bound for the Lasso)", +url = "http://dx.doi.org/10.1214/11-EJS624", +volume = "5", +year = "2011" +} + +@article{Zou:2006, +author = {Zou, Hui}, +title = {The Adaptive Lasso and Its Oracle Properties}, +journal = {Journal of the American Statistical Association}, +volume = {101}, +number = {476}, +pages = {1418-1429}, +year = {2006}, +doi = {10.1198/016214506000000735}, +URL = {http://dx.doi.org/10.1198/016214506000000735}, +} + +@article{Jacques:2013, + author = {Laurent Jacques and + Jason N. Laska and + Petros T. Boufounos and + Richard G. Baraniuk}, + title = {Robust 1-Bit Compressive Sensing via Binary Stable Embeddings of Sparse + Vectors}, + journal = {{IEEE} Transactions on Information Theory}, + volume = {59}, + number = {4}, + pages = {2082--2102}, + year = {2013}, + url = {http://dx.doi.org/10.1109/TIT.2012.2234823}, + doi = {10.1109/TIT.2012.2234823}, + timestamp = {Tue, 09 Apr 2013 19:57:48 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/tit/JacquesLBB13}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{Boufounos:2008, + author = {Petros Boufounos and + Richard G. Baraniuk}, + title = {1-Bit compressive sensing}, + booktitle = {42nd Annual Conference on Information Sciences and Systems, {CISS} + 2008, Princeton, NJ, USA, 19-21 March 2008}, + pages = {16--21}, + year = {2008}, + url = {http://dx.doi.org/10.1109/CISS.2008.4558487}, + doi = {10.1109/CISS.2008.4558487}, + timestamp = {Wed, 15 Oct 2014 17:04:27 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/ciss/BoufounosB08}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{Gupta:2010, + author = {Ankit Gupta and + Robert Nowak and + Benjamin Recht}, + title = {Sample complexity for 1-bit compressed sensing and sparse classification}, + booktitle = {{IEEE} International Symposium on Information Theory, {ISIT} 2010, + June 13-18, 2010, Austin, Texas, USA, Proceedings}, + pages = {1553--1557}, + year = {2010}, + url = {http://dx.doi.org/10.1109/ISIT.2010.5513510}, + doi = {10.1109/ISIT.2010.5513510}, + timestamp = {Thu, 15 Jan 2015 17:11:50 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/isit/GuptaNR10}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{Plan:2014, + author = {Yaniv Plan and + Roman Vershynin}, + title = {Dimension Reduction by Random Hyperplane Tessellations}, + journal = {Discrete {\&} Computational Geometry}, + volume = {51}, + number = {2}, + pages = {438--461}, + year = {2014}, + url = {http://dx.doi.org/10.1007/s00454-013-9561-6}, + doi = {10.1007/s00454-013-9561-6}, + timestamp = {Tue, 11 Feb 2014 13:48:56 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/dcg/PlanV14}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{bickel:2009, +author = "Bickel, Peter J. and Ritov, Ya’acov and Tsybakov, Alexandre B.", +doi = "10.1214/08-AOS620", +fjournal = "The Annals of Statistics", +journal = "Ann. Statist.", +month = "08", +number = "4", +pages = "1705--1732", +publisher = "The Institute of Mathematical Statistics", +title = "Simultaneous analysis of Lasso and Dantzig selector", +url = "http://dx.doi.org/10.1214/08-AOS620", +volume = "37", +year = "2009" +} + +@article{raskutti:10, + author = {Garvesh Raskutti and + Martin J. Wainwright and + Bin Yu}, + title = {Restricted Eigenvalue Properties for Correlated Gaussian Designs}, + journal = {Journal of Machine Learning Research}, + volume = {11}, + pages = {2241--2259}, + year = {2010}, + url = {http://portal.acm.org/citation.cfm?id=1859929}, + timestamp = {Wed, 15 Oct 2014 17:04:32 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/jmlr/RaskuttiWY10}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{rudelson:13, + author = {Mark Rudelson and + Shuheng Zhou}, + title = {Reconstruction From Anisotropic Random Measurements}, + journal = {{IEEE} Transactions on Information Theory}, + volume = {59}, + number = {6}, + pages = {3434--3447}, + year = {2013}, + url = {http://dx.doi.org/10.1109/TIT.2013.2243201}, + doi = {10.1109/TIT.2013.2243201}, + timestamp = {Tue, 21 May 2013 14:15:50 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/tit/RudelsonZ13}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{bipw11, + author = {Khanh Do Ba and + Piotr Indyk and + Eric Price and + David P. Woodruff}, + title = {Lower Bounds for Sparse Recovery}, + journal = {CoRR}, + volume = {abs/1106.0365}, + year = {2011}, + url = {http://arxiv.org/abs/1106.0365}, + timestamp = {Mon, 05 Dec 2011 18:04:39 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/abs-1106-0365}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{pw11, + author = {Eric Price and + David P. Woodruff}, + title = {{(1} + eps)-Approximate Sparse Recovery}, + booktitle = {{IEEE} 52nd Annual Symposium on Foundations of Computer Science, {FOCS} + 2011, Palm Springs, CA, USA, October 22-25, 2011}, + pages = {295--304}, + year = {2011}, + crossref = {DBLP:conf/focs/2011}, + url = {http://dx.doi.org/10.1109/FOCS.2011.92}, + doi = {10.1109/FOCS.2011.92}, + timestamp = {Tue, 16 Dec 2014 09:57:24 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/focs/PriceW11}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@proceedings{DBLP:conf/focs/2011, + editor = {Rafail Ostrovsky}, + title = {{IEEE} 52nd Annual Symposium on Foundations of Computer Science, {FOCS} + 2011, Palm Springs, CA, USA, October 22-25, 2011}, + publisher = {{IEEE} Computer Society}, + year = {2011}, + url = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6108120}, + isbn = {978-1-4577-1843-4}, + timestamp = {Mon, 15 Dec 2014 18:48:45 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/focs/2011}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{pw12, + author = {Eric Price and + David P. Woodruff}, + title = {Applications of the Shannon-Hartley theorem to data streams and sparse + recovery}, + booktitle = {Proceedings of the 2012 {IEEE} International Symposium on Information + Theory, {ISIT} 2012, Cambridge, MA, USA, July 1-6, 2012}, + pages = {2446--2450}, + year = {2012}, + crossref = {DBLP:conf/isit/2012}, + url = {http://dx.doi.org/10.1109/ISIT.2012.6283954}, + doi = {10.1109/ISIT.2012.6283954}, + timestamp = {Mon, 01 Oct 2012 17:34:07 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/isit/PriceW12}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@proceedings{DBLP:conf/isit/2012, + title = {Proceedings of the 2012 {IEEE} International Symposium on Information + Theory, {ISIT} 2012, Cambridge, MA, USA, July 1-6, 2012}, + publisher = {{IEEE}}, + year = {2012}, + url = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6268627}, + isbn = {978-1-4673-2580-6}, + timestamp = {Mon, 01 Oct 2012 17:33:45 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/isit/2012}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{Leskovec:2010, + author = {Jure Leskovec and + Deepayan Chakrabarti and + Jon M. Kleinberg and + Christos Faloutsos and + Zoubin Ghahramani}, + title = {Kronecker Graphs: An Approach to Modeling Networks}, + journal = {Journal of Machine Learning Research}, + volume = {11}, + pages = {985--1042}, + year = {2010}, + url = {http://doi.acm.org/10.1145/1756006.1756039}, + doi = {10.1145/1756006.1756039}, + timestamp = {Thu, 22 Apr 2010 13:26:26 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/jmlr/LeskovecCKFG10}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{Holme:2002, + author= {Petter Holme and Beom Jun Kim}, + title = {Growing scale-free networks with tunable clustering}, + journal = {Physical review E}, + volume = {65}, + issue = {2}, + pages = {026--107}, + year = {2002} +} + + +@article{watts:1998, + Annote = {10.1038/30918}, + Author = {Watts, Duncan J. and Strogatz, Steven H.}, + Date = {1998/06/04/print}, + Isbn = {0028-0836}, + Journal = {Nature}, + Number = {6684}, + Pages = {440--442}, + Read = {0}, + Title = {Collective dynamics of `small-world' networks}, + Url = {http://dx.doi.org/10.1038/30918}, + Volume = {393}, + Year = {1998}, +} + +@article{barabasi:2001, + author = {R{\'{e}}ka Albert and + Albert{-}L{\'{a}}szl{\'{o}} Barab{\'{a}}si}, + title = {Statistical mechanics of complex networks}, + journal = {CoRR}, + volume = {cond-mat/0106096}, + year = {2001}, + url = {http://arxiv.org/abs/cond-mat/0106096}, + timestamp = {Mon, 05 Dec 2011 18:05:15 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/cond-mat-0106096}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + + +@article{gomezbalduzzi:2011, + author = {Manuel Gomez{-}Rodriguez and + David Balduzzi and + Bernhard Sch{\"{o}}lkopf}, + title = {Uncovering the Temporal Dynamics of Diffusion Networks}, + journal = {CoRR}, + volume = {abs/1105.0697}, + year = {2011}, + url = {http://arxiv.org/abs/1105.0697}, + timestamp = {Mon, 05 Dec 2011 18:05:23 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/abs-1105-0697}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{Nowell08, + author = {Liben-Nowell, David and Kleinberg, Jon}, + biburl = {http://www.bibsonomy.org/bibtex/250b9b1ca1849fa9cb8bb92d6d9031436/mkroell}, + doi = {10.1073/pnas.0708471105}, + eprint = {http://www.pnas.org/content/105/12/4633.full.pdf+html}, + journal = {Proceedings of the National Academy of Sciences}, + keywords = {SNA graph networks}, + number = 12, + pages = {4633-4638}, + timestamp = {2008-10-09T10:32:56.000+0200}, + title = {{Tracing information flow on a global scale using Internet chain-letter data}}, + url = {http://www.pnas.org/content/105/12/4633.abstract}, + volume = 105, + year = 2008 +} + +@inproceedings{Leskovec07, + author = {Jure Leskovec and + Mary McGlohon and + Christos Faloutsos and + Natalie S. Glance and + Matthew Hurst}, + title = {Patterns of Cascading Behavior in Large Blog Graphs}, + booktitle = {Proceedings of the Seventh {SIAM} International Conference on Data + Mining, April 26-28, 2007, Minneapolis, Minnesota, {USA}}, + pages = {551--556}, + year = {2007}, + url = {http://dx.doi.org/10.1137/1.9781611972771.60}, + doi = {10.1137/1.9781611972771.60}, + timestamp = {Wed, 12 Feb 2014 17:08:15 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/sdm/LeskovecMFGH07}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + + +@inproceedings{AdarA05, + author = {Eytan Adar and + Lada A. Adamic}, + title = {Tracking Information Epidemics in Blogspace}, + booktitle = {2005 {IEEE} / {WIC} / {ACM} International Conference on Web Intelligence + {(WI} 2005), 19-22 September 2005, Compiegne, France}, + pages = {207--214}, + year = {2005}, + url = {http://dx.doi.org/10.1109/WI.2005.151}, + doi = {10.1109/WI.2005.151}, + timestamp = {Tue, 12 Aug 2014 16:59:16 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/webi/AdarA05}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{Kleinberg:00, + author = {Jon M. Kleinberg}, + title = {The small-world phenomenon: an algorithm perspective}, + booktitle = {Proceedings of the Thirty-Second Annual {ACM} Symposium on Theory + of Computing, May 21-23, 2000, Portland, OR, {USA}}, + pages = {163--170}, + year = {2000}, + url = {http://doi.acm.org/10.1145/335305.335325}, + doi = {10.1145/335305.335325}, + timestamp = {Thu, 16 Feb 2012 12:06:08 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/stoc/Kleinberg00}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{zhang2014, + title={Confidence intervals for low dimensional parameters in high dimensional linear models}, + author={Zhang, Cun-Hui and Zhang, Stephanie S}, + journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)}, + volume={76}, + number={1}, + pages={217--242}, + year={2014}, + publisher={Wiley Online Library} +} + +@article{javanmard2014, + title={Confidence intervals and hypothesis testing for high-dimensional regression}, + author={Javanmard, Adel and Montanari, Andrea}, + journal={The Journal of Machine Learning Research}, + volume={15}, + number={1}, + pages={2869--2909}, + year={2014}, + publisher={JMLR. org} +} diff --git a/presentation/extended_abstract.txt b/presentation/extended_abstract.txt new file mode 100644 index 0000000..47a12b9 --- /dev/null +++ b/presentation/extended_abstract.txt @@ -0,0 +1,10 @@ +Title: How can we estimate the parameters of a graph by observing its cascades? + + +A standard problem in Social Network Theory is to understand how the parameters of a graph affect the properties of its cascades, which are diffusion processes that spread from node to node along the graph's weighted edges. In other words, can we predict cascades from the graph's parameters? + +Recent work has considered the dual problem: what knowledge about the existence of an edge in the graph do we gain by observing its cascades and how can we leverage that knowledge efficiently? A natural extension to this problem is: can we learn the weights of the graph's edges from cascades? These questions are fundamental to many aspects of social network theory: knowing the parameters of the graph precedes influence maximization or conversely influence minimization. + +In this talk, we present a "sparse recovery" framework for tackling the "graph inference" problem from cascades. This framework achieves a better convergence rate under weaker assumptions than prior work. We show that we (almost) match the lower bound and that our assumptions are robust to approximately sparse graphs. Finally, the approach is validated on synthetic networks. + +Joint work with Thibaut Horel \ No newline at end of file diff --git a/presentation/images/greedy_sparse_comparison.png b/presentation/images/greedy_sparse_comparison.png new file mode 100644 index 0000000..3fab5b0 Binary files /dev/null and b/presentation/images/greedy_sparse_comparison.png differ diff --git a/presentation/images/icc.png b/presentation/images/icc.png new file mode 100644 index 0000000..2148eed Binary files /dev/null and b/presentation/images/icc.png differ diff --git a/presentation/images/sparse_recovery_illustration.svg b/presentation/images/sparse_recovery_illustration.svg new file mode 100644 index 0000000..bef82c2 --- /dev/null +++ b/presentation/images/sparse_recovery_illustration.svg @@ -0,0 +1,655 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + diff --git a/presentation/images/voter.png b/presentation/images/voter.png new file mode 100644 index 0000000..c1325cb Binary files /dev/null and b/presentation/images/voter.png differ diff --git a/presentation/stats/beamer_2.tex b/presentation/stats/beamer_2.tex new file mode 100644 index 0000000..ecc66ed --- /dev/null +++ b/presentation/stats/beamer_2.tex @@ -0,0 +1,397 @@ +\documentclass[10pt]{beamer} + +\usepackage{amssymb, amsmath, graphicx, amsfonts, color, amsthm, wasysym} + +\newtheorem{proposition}{Proposition} + +\title{Learning from Diffusion processes} +\subtitle{What cascades really teach us about networks} +\author{Jean (John) Pouget-Abadie \\ Joint Work with Thibaut (T-bo) Horel} + +\begin{document} + +\begin{frame} +\titlepage +\end{frame} + +\begin{frame} +\frametitle{Introduction} + +%notes: Learn what? the network, the parameters of the diffusion process. + +\begin{table} +\centering +\begin{tabular}{c | c} +Network & Diffusion process \\[1ex] +\hline +\\ +Airports & Infectious diseases (SARS) \\ + & Delays (Eyjafjallajökull) \\[3ex] +Social Network & Infectious diseases (flu) \\ + & Behaviors (Ice Bucket Challenge) \\[3ex] +Internet/WWW & Information diffusion (Memes, Pirated content \dots) +\end{tabular} +\end{table} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Introduction} + +What do we know? What do we want to know? + +\begin{itemize} +\item We know the {\bf airport network} structure. We observe delays. Can we learn how delays propagate? +\item We (sometimes) know the {\bf social network}. We observe behaviors. Can we learn who influences whom? +\item Rarely know {\bf blog network}. We observe discussions. Can we learn who learns from whom? +\end{itemize} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Independent Cascade Model} + +\begin{figure} +\includegraphics[scale=.3]{figures/weighted_graph.png} +\caption{Weighted, directed graph} +\end{figure} + +\begin{itemize} +\item At $t=0$, nodes are in three possible states: susceptible, {\color{blue} infected}, {\color{red} dead} +\pause +\item At time step $t$, each {\color{blue} infected} node $i$ has a ``one-shot'' probability $p_{i,j}$ of infecting each of his susceptible neighbors $j$ at $t+1$. +\pause +\item A node stays {\color{blue} infected} for one round, then it {\color{red} dies} +\pause +\item At $t=0$, each node is {\color{blue} infected} with probability $p_{\text{init}}$ +\pause +\item Process continues until random time $T$ when no more nodes can become infected. +\pause +\item $X_t$: set of {\color{blue} infected} nodes at time $t$ +\pause +\item A {\bf cascade} is an instance of the ICC model: $(X_t)_{t=0, t=T}$ +\end{itemize} + +%Notes: Revisit the celebrated independent cascade model -> Influence maximisation is tractable, requires knowledge of weights + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Independent Cascade Model} + +\begin{figure} +\includegraphics[scale=.5]{figures/weighted_graph.png} +\caption{Weighted, directed graph} +\end{figure} + +\begin{block}{Example} +\begin{itemize} +\item At $t=0$, the {\color{orange} orange} node is infected, and the two other nodes are susceptible. $X_0 = $({\color{orange} orange}) +\item At $t=1$, the {\color{orange}} node infects the {\color{blue} blue} node and fails to infect the {\color{green} green} node. The {\color{orange} orange} node dies. $X_1 = $({\color{blue} blue}) +\item At $t=2$, {\color{blue} blue} dies. $X_2 = \emptyset$ +\end{itemize} +\end{block} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Independent Cascade Model} + +\begin{figure} +\includegraphics[scale=.5]{figures/weighted_graph.png} +\caption{Weighted, directed graph} +\end{figure} + +\begin{itemize} +\item If the {\color{orange} orange} node and the {\color{green} green} node are infected at $t=0$, what is the probability that the {\color{blue} blue} node is infected at $t=1$? +$$1 - \mathbb{P}(\text{not infected}) = 1 - (1 - .45)(1-.04)$$ +\end{itemize} + + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Independent Cascade Model} +\begin{figure} +\includegraphics[scale=.5]{figures/weighted_graph.png} +\caption{Weighted, directed graph} +\end{figure} + +\begin{itemize} +\item In general, for each susceptible node $j$: +$$\mathbb{P}(j \text{ becomes infected at t+1}|X_{t}) = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j})$$ +\end{itemize} + +\end{frame} + + +%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Independent Cascade Model} +For each susceptible node $j$, the event that it becomes {\color{blue} infected} conditioned on previous time step is a Bernoulli: +$$(j \in X_{t+1} | X_t) \sim {\cal B} \big(f(X_t \cdot \theta_j) \big)$$ +\begin{itemize} +\item $\theta_{i,j} := \log(1 - p_{i,j})$ +\item $\theta_j := (0, 0, 0, \theta_{4,j}, 0 \dots, \theta_{k,j}, \dots)$ +\item $f : x \mapsto 1 - e^x$ +\begin{align*} +\mathbb{P}(j\in X_{t+1}|X_{t}) & = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j}) \\ +& = 1 - \exp \left[ \sum_{i \in {\cal N}(j) \cap X_{t}} \log(1 - p_{i,j}) \right] \\ +& = 1 - \exp \left[ X_{t} \cdot \theta_{j}\right] +\end{align*} +\end{itemize} +\end{frame} + + + +%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Independent Cascade Model} +For each susceptible node $j$, the event that it becomes {\color{blue} infected} conditioned on previous time step is a Bernoulli: +$$(j \in X_{t+1} | X_t) \sim {\cal B} \big(f(X_t \cdot \theta_j) \big)$$ + +\begin{block}{Decomposability} +\begin{itemize} +\item Conditioned on $X_t$, the state of the nodes at the next time step are mutually independent +\item We can learn the parents of each node independently +\end{itemize} +\end{block} + +\begin{block}{Sparsity} +\begin{itemize} +\item $\theta_{i,j} = 0 \Leftrightarrow \log(1 - p_{i,j}) = 0 \Leftrightarrow p_{i,j} = 0$ +\item If graph is ``sparse'', then $p_{j}$ is sparse, then $\theta_j$ is sparse. +\end{itemize} +\end{block} +\end{frame} + + + +%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Learning from Diffusion Processes} +\begin{block}{Problem Statement} +\begin{itemize} +\item We are given a graph ${\cal G}$, and a diffusion process $f$ parameterized by $\left((\theta_j)_j, p_{\text{init}}\right)$. +\item Suppose we {\bf only} observe $(X_t)$ from the diffusion process. +\item Under what conditions can we learn $\theta_{i,j}$ for all $(i,j)$? +\end{itemize} +\end{block} +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Sparse Recovery} +\begin{figure} +\includegraphics[scale=.6]{../images/sparse_recovery_illustration_copy.pdf} +\caption{$\mathbb{P}(j \in X_{t+1}| X_t) = f(X_t\cdot \theta)$} +\end{figure} +\end{frame} + + + +%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Learning from Diffusion Processes} + +% \begin{figure} +% \includegraphics[scale=.4]{../images/sparse_recovery_illustration.pdf} +% \caption{Generalized Cascade Model for node $i$} +% \end{figure} + +\begin{block}{Likelihood Function} +\begin{align*} +{\cal L}(\theta_1, \dots, \theta_m| X_1, \dots X_n) = \sum_{i=1}^m \sum_{t} & X_{t+1}^i \log f(\theta_i \cdot X_t) + \\ +& (1 - X_{t+1}^i) \log(1 - f(\theta_i \cdot X_t)) +\end{align*} +\end{block} + +\begin{block}{MLE} +For each node $i$, $$\theta_i \in \arg \max \frac{1}{n_i}{\cal {L}}_i(\theta_i | X_1, X_2, \dots, X_{n_i}) - \lambda \|\theta_i\|_1$$ +\end{block} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Conditions} +\begin{block}{On $f$} +\begin{itemize} +\item $\log f$ and $\log (1-f)$ have to be concave +\item $\log f$ and $\log (1-f)$ have to have bounded gradient +\end{itemize} +\end{block} + +\begin{block}{On $(X_t)$} +\begin{itemize} +\item Want ${\cal H}$, the hessian of ${\cal L}$ with respect to $\theta$, to be well-conditioned. +\item $ n < dim(\theta) \implies {\cal H}$ is degenerate. +\item {\bf Restricted Eigenvalue condition} = ``almost invertible'' on sparse vectors. +\end{itemize} +\end{block} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Restricted Eigenvalue Condition} + +\begin{definition} +For a set $S$, +$${\cal C} := \{ \Delta : \|\Delta\|_2 = 1, \|\Delta_{\bar S}\|_1 \leq 3 \| \Delta_S\|_1 \}$$ +${\cal H}$ verifies the $(S, \gamma)$-RE condition if: +$$\forall \Delta \in {\cal C}, \Delta {\cal H} \Delta \geq \gamma$$ +\end{definition} +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Main Result} +Adapting a result from \cite{Negahban:2009}, we have the following theorem: + +\begin{theorem} +For node $i$, assume +\begin{itemize} +\item the Hessian verifies the $(S,\gamma)$-RE condition where $S$ is the set of parents of node $i$ (support of $\theta_i$) +\item $f$ and $1-f$ are log-concave +\item $|(\log f)'| < \frac{1}{\alpha}$ and $|(\log 1- f)'| < \frac{1}{\alpha}$ +\end{itemize} then with high probability: +$$\| \theta^*_i - \hat \theta_i \|_2 \leq \frac{6}{\gamma}\sqrt{\frac{s\log m}{\alpha n}}$$ +\end{theorem} + +\begin{corollary} +By thresholding $\hat \theta_i$, if $n > C' s \log m$, we recover the support of $\theta^*$ and therefore the edges of ${\cal G}$ +\end{corollary} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Main result} + +\begin{block}{Correlation} +\begin{itemize} +\item Positive result despite correlated measurements \smiley +\item Independent measurements $\implies$ taking one measurement per cascade. +\end{itemize} +\end{block} + +\begin{block}{Statement w.r.t observations and not the model} +\begin{itemize} +\item The Hessian must verify the $(S,\gamma)$-RE condition \frownie +\item Can we make a conditional statement on $\theta$ and not $X_t$? +\end{itemize} +\end{block} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Restricted Eigenvalue Condition} + +\begin{block}{From Hessian to Gram Matrix} +\begin{itemize} +\item If $\log f$ and $\log 1 -f$ are strictly log-concave with constant $c$, then if $(S, \gamma)$-RE holds for the gram matrix $\frac{1}{n}X X^T$ , then $(S, c \gamma)$-RE holds for ${\cal H}$ +\end{itemize} +\end{block} + +\begin{block}{From Gram Matrix to Expected Gram Matrix} +\begin{itemize} +\item If $n > C' s^2 \log m$ and $(S, \gamma)$-RE holds for $\mathbb{E}(\frac{1}{n}XX^T)$, then $(S, \gamma/2)$-RE holds for $\frac{1}{n}XX^T$ w.h.p +\item $\mathbb{E}(\frac{1}{n}XX^T)$ only depends on $p_{\text{init}}$ and $(\theta_j)_j$ +\end{itemize} +\end{block} + +\begin{block}{Expected Gram Matrix} +\begin{itemize} +\item Diagonal : average number of times node is infected +\item Outer-diagonal : average number of times pair of nodes is infected {\emph together} +\end{itemize} +\end{block} + +\end{frame} + +%%%%%%%%%%%%%%%%%%% +\begin{frame} +\frametitle{Approximate Sparsity} +\begin{itemize} +\item $\theta^*_{\lceil s \rceil}$ best s-sparse approximation to $\theta^*$ +\item $\|\theta^* - \theta^*_{\lceil s \rceil} \|_1$: `tail' of $\theta^*$ +\end{itemize} +\begin{theorem} +Under similar conditions on $f$ and a relaxed RE condition, there $\exists C_1, C_2>0$ such that with high probability: +\begin{equation} +\|\hat \theta_i - \theta^*_i\|_2 \leq C_1 \sqrt{\frac{s\log m}{n}} + C_2 \sqrt[4]{\frac{s\log m}{n}}\|\theta^* - \theta^*_{\lceil s \rceil} \|_1 +\end{equation} +\end{theorem} +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Lower Bound} +\begin{itemize} +\item Under correlation decay assumption for the IC model, ${\Omega}(s \log N/s)$ cascades necessary for graph reconstruction (Netrapalli et Sanghavi SIGMETRICS'12) +\item Adapting (Price \& Woodruff STOC'12), in the approximately sparse case, any algorithm for any generalized linear cascade model such that: +$$\|\hat \theta - \theta^*\|_2 \leq C \|\theta^* - \theta^*_{\lfloor s \rfloor}\|_2$$ +requires ${\cal O}(s \log (n/s)/\log C)$ measurement. +\end{itemize} +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Voter Model} +\begin{itemize} +\pause +\item {\color{red} Red} and {\color{blue} Blue} nodes. At every step, each node $i$ chooses one of its neighbors $j$ with probability $p_{j,i}$ and adopts that color at $t+1$ +\pause +\item If {\color{blue} Blue} is `contagious' state: +\pause +\begin{equation} +\nonumber +\mathbb{P}(i \in X^{t+1}|X^t) = \sum_{j \in {\cal N}(i)\cap X^t} p_{ji} = X^t \cdot \theta_i +\end{equation} +\end{itemize} +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Future Work} +\begin{itemize} +\item Lower bound restricted eigenvalues of expected gram matrix +\item Confidence Intervals +\item Show that $n > C' s \log m$ measurements are necessary w.r.t. expected hessian. +\item Linear Threshold model $\rightarrow$ 1-bit compressed sensing formulation +\item Better lower bounds +\item Active Learning +\end{itemize} +\end{frame} + + +%%%%%%%%%%%%%%%%% + +\bibliography{../../paper/sparse} +\bibliographystyle{apalike} + +\end{document} \ No newline at end of file diff --git a/presentation/stats/figures/Screen Shot 2015-03-08 at 13.08.01.png b/presentation/stats/figures/Screen Shot 2015-03-08 at 13.08.01.png new file mode 100644 index 0000000..b053f0c Binary files /dev/null and b/presentation/stats/figures/Screen Shot 2015-03-08 at 13.08.01.png differ diff --git a/presentation/stats/figures/weighted_graph.png b/presentation/stats/figures/weighted_graph.png new file mode 100644 index 0000000..7deccc3 Binary files /dev/null and b/presentation/stats/figures/weighted_graph.png differ diff --git a/presentation/stats/sparse.bib b/presentation/stats/sparse.bib new file mode 100644 index 0000000..5df4b59 --- /dev/null +++ b/presentation/stats/sparse.bib @@ -0,0 +1,503 @@ +@article {CandesRomberTao:2006, +author = {Candès, Emmanuel J. and Romberg, Justin K. and Tao, Terence}, +title = {Stable signal recovery from incomplete and inaccurate measurements}, +journal = {Communications on Pure and Applied Mathematics}, +volume = {59}, +number = {8}, +publisher = {Wiley Subscription Services, Inc., A Wiley Company}, +issn = {1097-0312}, +pages = {1207--1223}, +year = {2006}, +} + + +@inproceedings{GomezRodriguez:2010, + author = {Gomez Rodriguez, Manuel and Leskovec, Jure and Krause, Andreas}, + title = {Inferring Networks of Diffusion and Influence}, + booktitle = {Proceedings of the 16th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining}, + series = {KDD '10}, + year = {2010}, + isbn = {978-1-4503-0055-1}, + location = {Washington, DC, USA}, + pages = {1019--1028}, + numpages = {10}, + publisher = {ACM}, + address = {New York, NY, USA}, +} + + +@article{Netrapalli:2012, + author = {Netrapalli, Praneeth and Sanghavi, Sujay}, + title = {Learning the Graph of Epidemic Cascades}, + journal = {SIGMETRICS Perform. Eval. Rev.}, + volume = {40}, + number = {1}, + month = {June}, + year = {2012}, + issn = {0163-5999}, + pages = {211--222}, + numpages = {12}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {cascades, epidemics, graph structure learning}, +} + +@article{Negahban:2009, + author = {Negahban, Sahand N. and Ravikumar, Pradeep and Wrainwright, Martin J. and Yu, Bin}, + title = {A Unified Framework for High-Dimensional Analysis of M-Estimators with Decomposable Regularizers}, + Journal = {Statistical Science}, + year = {2012}, + month = {December}, + volume = {27}, + number = {4}, + pages = {538--557}, +} + +@article{Zhao:2006, + author = {Zhao, Peng and Yu, Bin}, + title = {On Model Selection Consistency of Lasso}, + journal = {J. Mach. Learn. Res.}, + issue_date = {12/1/2006}, + volume = {7}, + month = dec, + year = {2006}, + issn = {1532-4435}, + pages = {2541--2563}, + numpages = {23}, + url = {http://dl.acm.org/citation.cfm?id=1248547.1248637}, + acmid = {1248637}, + publisher = {JMLR.org}, +} + +@inproceedings{Daneshmand:2014, + author = {Hadi Daneshmand and + Manuel Gomez{-}Rodriguez and + Le Song and + Bernhard Sch{\"{o}}lkopf}, + title = {Estimating Diffusion Network Structures: Recovery Conditions, Sample + Complexity {\&} Soft-thresholding Algorithm}, + booktitle = {Proceedings of the 31th International Conference on Machine Learning, + {ICML} 2014, Beijing, China, 21-26 June 2014}, + pages = {793--801}, + year = {2014}, + url = {http://jmlr.org/proceedings/papers/v32/daneshmand14.html}, + timestamp = {Fri, 07 Nov 2014 20:42:30 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/icml/DaneshmandGSS14}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{Kempe:03, + author = {David Kempe and + Jon M. Kleinberg and + {\'{E}}va Tardos}, + title = {Maximizing the spread of influence through a social network}, + booktitle = {Proceedings of the Ninth {ACM} {SIGKDD} International Conference on + Knowledge Discovery and Data Mining, Washington, DC, USA, August 24 + - 27, 2003}, + pages = {137--146}, + year = {2003}, + url = {http://doi.acm.org/10.1145/956750.956769}, + doi = {10.1145/956750.956769}, + timestamp = {Mon, 13 Feb 2006 15:34:20 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/kdd/KempeKT03}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{Abrahao:13, + author = {Bruno D. Abrahao and + Flavio Chierichetti and + Robert Kleinberg and + Alessandro Panconesi}, + title = {Trace complexity of network inference}, + booktitle = {The 19th {ACM} {SIGKDD} International Conference on Knowledge Discovery + and Data Mining, {KDD} 2013, Chicago, IL, USA, August 11-14, 2013}, + pages = {491--499}, + year = {2013}, + url = {http://doi.acm.org/10.1145/2487575.2487664}, + doi = {10.1145/2487575.2487664}, + timestamp = {Tue, 10 Sep 2013 10:11:57 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/kdd/AbrahaoCKP13}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + + +@article{vandegeer:2009, +author = "van de Geer, Sara A. and B{\"u}hlmann, Peter", +doi = "10.1214/09-EJS506", +fjournal = "Electronic Journal of Statistics", +journal = "Electron. J. Statist.", +pages = "1360--1392", +publisher = "The Institute of Mathematical Statistics and the Bernoulli Society", +title = "On the conditions used to prove oracle results for the Lasso", +url = "http://dx.doi.org/10.1214/09-EJS506", +volume = "3", +year = "2009" +} + +@article{vandegeer:2011, +author = "van de Geer, Sara and Bühlmann, Peter and Zhou, Shuheng", +doi = "10.1214/11-EJS624", +fjournal = "Electronic Journal of Statistics", +journal = "Electron. J. Statist.", +pages = "688--749", +publisher = "The Institute of Mathematical Statistics and the Bernoulli Society", +title = "The adaptive and the thresholded Lasso for potentially misspecified models (and a lower bound for the Lasso)", +url = "http://dx.doi.org/10.1214/11-EJS624", +volume = "5", +year = "2011" +} + +@article{Zou:2006, +author = {Zou, Hui}, +title = {The Adaptive Lasso and Its Oracle Properties}, +journal = {Journal of the American Statistical Association}, +volume = {101}, +number = {476}, +pages = {1418-1429}, +year = {2006}, +doi = {10.1198/016214506000000735}, +URL = {http://dx.doi.org/10.1198/016214506000000735}, +} + +@article{Jacques:2013, + author = {Laurent Jacques and + Jason N. Laska and + Petros T. Boufounos and + Richard G. Baraniuk}, + title = {Robust 1-Bit Compressive Sensing via Binary Stable Embeddings of Sparse + Vectors}, + journal = {{IEEE} Transactions on Information Theory}, + volume = {59}, + number = {4}, + pages = {2082--2102}, + year = {2013}, + url = {http://dx.doi.org/10.1109/TIT.2012.2234823}, + doi = {10.1109/TIT.2012.2234823}, + timestamp = {Tue, 09 Apr 2013 19:57:48 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/tit/JacquesLBB13}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{Boufounos:2008, + author = {Petros Boufounos and + Richard G. Baraniuk}, + title = {1-Bit compressive sensing}, + booktitle = {42nd Annual Conference on Information Sciences and Systems, {CISS} + 2008, Princeton, NJ, USA, 19-21 March 2008}, + pages = {16--21}, + year = {2008}, + url = {http://dx.doi.org/10.1109/CISS.2008.4558487}, + doi = {10.1109/CISS.2008.4558487}, + timestamp = {Wed, 15 Oct 2014 17:04:27 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/ciss/BoufounosB08}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{Gupta:2010, + author = {Ankit Gupta and + Robert Nowak and + Benjamin Recht}, + title = {Sample complexity for 1-bit compressed sensing and sparse classification}, + booktitle = {{IEEE} International Symposium on Information Theory, {ISIT} 2010, + June 13-18, 2010, Austin, Texas, USA, Proceedings}, + pages = {1553--1557}, + year = {2010}, + url = {http://dx.doi.org/10.1109/ISIT.2010.5513510}, + doi = {10.1109/ISIT.2010.5513510}, + timestamp = {Thu, 15 Jan 2015 17:11:50 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/isit/GuptaNR10}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{Plan:2014, + author = {Yaniv Plan and + Roman Vershynin}, + title = {Dimension Reduction by Random Hyperplane Tessellations}, + journal = {Discrete {\&} Computational Geometry}, + volume = {51}, + number = {2}, + pages = {438--461}, + year = {2014}, + url = {http://dx.doi.org/10.1007/s00454-013-9561-6}, + doi = {10.1007/s00454-013-9561-6}, + timestamp = {Tue, 11 Feb 2014 13:48:56 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/dcg/PlanV14}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{bickel:2009, +author = "Bickel, Peter J. and Ritov, Ya’acov and Tsybakov, Alexandre B.", +doi = "10.1214/08-AOS620", +fjournal = "The Annals of Statistics", +journal = "Ann. Statist.", +month = "08", +number = "4", +pages = "1705--1732", +publisher = "The Institute of Mathematical Statistics", +title = "Simultaneous analysis of Lasso and Dantzig selector", +url = "http://dx.doi.org/10.1214/08-AOS620", +volume = "37", +year = "2009" +} + +@article{raskutti:10, + author = {Garvesh Raskutti and + Martin J. Wainwright and + Bin Yu}, + title = {Restricted Eigenvalue Properties for Correlated Gaussian Designs}, + journal = {Journal of Machine Learning Research}, + volume = {11}, + pages = {2241--2259}, + year = {2010}, + url = {http://portal.acm.org/citation.cfm?id=1859929}, + timestamp = {Wed, 15 Oct 2014 17:04:32 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/jmlr/RaskuttiWY10}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{rudelson:13, + author = {Mark Rudelson and + Shuheng Zhou}, + title = {Reconstruction From Anisotropic Random Measurements}, + journal = {{IEEE} Transactions on Information Theory}, + volume = {59}, + number = {6}, + pages = {3434--3447}, + year = {2013}, + url = {http://dx.doi.org/10.1109/TIT.2013.2243201}, + doi = {10.1109/TIT.2013.2243201}, + timestamp = {Tue, 21 May 2013 14:15:50 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/tit/RudelsonZ13}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{bipw11, + author = {Khanh Do Ba and + Piotr Indyk and + Eric Price and + David P. Woodruff}, + title = {Lower Bounds for Sparse Recovery}, + journal = {CoRR}, + volume = {abs/1106.0365}, + year = {2011}, + url = {http://arxiv.org/abs/1106.0365}, + timestamp = {Mon, 05 Dec 2011 18:04:39 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/abs-1106-0365}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{pw11, + author = {Eric Price and + David P. Woodruff}, + title = {{(1} + eps)-Approximate Sparse Recovery}, + booktitle = {{IEEE} 52nd Annual Symposium on Foundations of Computer Science, {FOCS} + 2011, Palm Springs, CA, USA, October 22-25, 2011}, + pages = {295--304}, + year = {2011}, + crossref = {DBLP:conf/focs/2011}, + url = {http://dx.doi.org/10.1109/FOCS.2011.92}, + doi = {10.1109/FOCS.2011.92}, + timestamp = {Tue, 16 Dec 2014 09:57:24 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/focs/PriceW11}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@proceedings{DBLP:conf/focs/2011, + editor = {Rafail Ostrovsky}, + title = {{IEEE} 52nd Annual Symposium on Foundations of Computer Science, {FOCS} + 2011, Palm Springs, CA, USA, October 22-25, 2011}, + publisher = {{IEEE} Computer Society}, + year = {2011}, + url = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6108120}, + isbn = {978-1-4577-1843-4}, + timestamp = {Mon, 15 Dec 2014 18:48:45 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/focs/2011}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{pw12, + author = {Eric Price and + David P. Woodruff}, + title = {Applications of the Shannon-Hartley theorem to data streams and sparse + recovery}, + booktitle = {Proceedings of the 2012 {IEEE} International Symposium on Information + Theory, {ISIT} 2012, Cambridge, MA, USA, July 1-6, 2012}, + pages = {2446--2450}, + year = {2012}, + crossref = {DBLP:conf/isit/2012}, + url = {http://dx.doi.org/10.1109/ISIT.2012.6283954}, + doi = {10.1109/ISIT.2012.6283954}, + timestamp = {Mon, 01 Oct 2012 17:34:07 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/isit/PriceW12}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@proceedings{DBLP:conf/isit/2012, + title = {Proceedings of the 2012 {IEEE} International Symposium on Information + Theory, {ISIT} 2012, Cambridge, MA, USA, July 1-6, 2012}, + publisher = {{IEEE}}, + year = {2012}, + url = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6268627}, + isbn = {978-1-4673-2580-6}, + timestamp = {Mon, 01 Oct 2012 17:33:45 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/isit/2012}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{Leskovec:2010, + author = {Jure Leskovec and + Deepayan Chakrabarti and + Jon M. Kleinberg and + Christos Faloutsos and + Zoubin Ghahramani}, + title = {Kronecker Graphs: An Approach to Modeling Networks}, + journal = {Journal of Machine Learning Research}, + volume = {11}, + pages = {985--1042}, + year = {2010}, + url = {http://doi.acm.org/10.1145/1756006.1756039}, + doi = {10.1145/1756006.1756039}, + timestamp = {Thu, 22 Apr 2010 13:26:26 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/jmlr/LeskovecCKFG10}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{Holme:2002, + author= {Petter Holme and Beom Jun Kim}, + title = {Growing scale-free networks with tunable clustering}, + journal = {Physical review E}, + volume = {65}, + issue = {2}, + pages = {026--107}, + year = {2002} +} + + +@article{watts:1998, + Annote = {10.1038/30918}, + Author = {Watts, Duncan J. and Strogatz, Steven H.}, + Date = {1998/06/04/print}, + Isbn = {0028-0836}, + Journal = {Nature}, + Number = {6684}, + Pages = {440--442}, + Read = {0}, + Title = {Collective dynamics of `small-world' networks}, + Url = {http://dx.doi.org/10.1038/30918}, + Volume = {393}, + Year = {1998}, +} + +@article{barabasi:2001, + author = {R{\'{e}}ka Albert and + Albert{-}L{\'{a}}szl{\'{o}} Barab{\'{a}}si}, + title = {Statistical mechanics of complex networks}, + journal = {CoRR}, + volume = {cond-mat/0106096}, + year = {2001}, + url = {http://arxiv.org/abs/cond-mat/0106096}, + timestamp = {Mon, 05 Dec 2011 18:05:15 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/cond-mat-0106096}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + + +@article{gomezbalduzzi:2011, + author = {Manuel Gomez{-}Rodriguez and + David Balduzzi and + Bernhard Sch{\"{o}}lkopf}, + title = {Uncovering the Temporal Dynamics of Diffusion Networks}, + journal = {CoRR}, + volume = {abs/1105.0697}, + year = {2011}, + url = {http://arxiv.org/abs/1105.0697}, + timestamp = {Mon, 05 Dec 2011 18:05:23 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/abs-1105-0697}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{Nowell08, + author = {Liben-Nowell, David and Kleinberg, Jon}, + biburl = {http://www.bibsonomy.org/bibtex/250b9b1ca1849fa9cb8bb92d6d9031436/mkroell}, + doi = {10.1073/pnas.0708471105}, + eprint = {http://www.pnas.org/content/105/12/4633.full.pdf+html}, + journal = {Proceedings of the National Academy of Sciences}, + keywords = {SNA graph networks}, + number = 12, + pages = {4633-4638}, + timestamp = {2008-10-09T10:32:56.000+0200}, + title = {{Tracing information flow on a global scale using Internet chain-letter data}}, + url = {http://www.pnas.org/content/105/12/4633.abstract}, + volume = 105, + year = 2008 +} + +@inproceedings{Leskovec07, + author = {Jure Leskovec and + Mary McGlohon and + Christos Faloutsos and + Natalie S. Glance and + Matthew Hurst}, + title = {Patterns of Cascading Behavior in Large Blog Graphs}, + booktitle = {Proceedings of the Seventh {SIAM} International Conference on Data + Mining, April 26-28, 2007, Minneapolis, Minnesota, {USA}}, + pages = {551--556}, + year = {2007}, + url = {http://dx.doi.org/10.1137/1.9781611972771.60}, + doi = {10.1137/1.9781611972771.60}, + timestamp = {Wed, 12 Feb 2014 17:08:15 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/sdm/LeskovecMFGH07}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + + +@inproceedings{AdarA05, + author = {Eytan Adar and + Lada A. Adamic}, + title = {Tracking Information Epidemics in Blogspace}, + booktitle = {2005 {IEEE} / {WIC} / {ACM} International Conference on Web Intelligence + {(WI} 2005), 19-22 September 2005, Compiegne, France}, + pages = {207--214}, + year = {2005}, + url = {http://dx.doi.org/10.1109/WI.2005.151}, + doi = {10.1109/WI.2005.151}, + timestamp = {Tue, 12 Aug 2014 16:59:16 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/webi/AdarA05}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{Kleinberg:00, + author = {Jon M. Kleinberg}, + title = {The small-world phenomenon: an algorithm perspective}, + booktitle = {Proceedings of the Thirty-Second Annual {ACM} Symposium on Theory + of Computing, May 21-23, 2000, Portland, OR, {USA}}, + pages = {163--170}, + year = {2000}, + url = {http://doi.acm.org/10.1145/335305.335325}, + doi = {10.1145/335305.335325}, + timestamp = {Thu, 16 Feb 2012 12:06:08 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/stoc/Kleinberg00}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{zhang2014, + title={Confidence intervals for low dimensional parameters in high dimensional linear models}, + author={Zhang, Cun-Hui and Zhang, Stephanie S}, + journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)}, + volume={76}, + number={1}, + pages={217--242}, + year={2014}, + publisher={Wiley Online Library} +} + +@article{javanmard2014, + title={Confidence intervals and hypothesis testing for high-dimensional regression}, + author={Javanmard, Adel and Montanari, Andrea}, + journal={The Journal of Machine Learning Research}, + volume={15}, + number={1}, + pages={2869--2909}, + year={2014}, + publisher={JMLR. org} +} -- cgit v1.2.3-70-g09d2