From 0b9031ba7f97e2b51fa47c7db8af7873a4884e39 Mon Sep 17 00:00:00 2001 From: jeanpouget-abadie Date: Sun, 8 Mar 2015 15:37:08 -0400 Subject: added new presentation --- notes/presentation/beamer.tex | 381 ++++++++++++++++ notes/presentation/beamer_2.tex | 240 ++++++++++ notes/presentation/extended_abstract.txt | 10 + .../figures/Screen Shot 2015-03-08 at 13.08.01.png | Bin 0 -> 126844 bytes notes/presentation/figures/weighted_graph.png | Bin 0 -> 24602 bytes notes/presentation/sparse.bib | 503 +++++++++++++++++++++ 6 files changed, 1134 insertions(+) create mode 100644 notes/presentation/beamer.tex create mode 100644 notes/presentation/beamer_2.tex create mode 100644 notes/presentation/extended_abstract.txt create mode 100644 notes/presentation/figures/Screen Shot 2015-03-08 at 13.08.01.png create mode 100644 notes/presentation/figures/weighted_graph.png create mode 100644 notes/presentation/sparse.bib (limited to 'notes/presentation') diff --git a/notes/presentation/beamer.tex b/notes/presentation/beamer.tex new file mode 100644 index 0000000..7d4b5c1 --- /dev/null +++ b/notes/presentation/beamer.tex @@ -0,0 +1,381 @@ +\documentclass[10pt]{beamer} + +\usepackage{amssymb, amsmath, graphicx, amsfonts, color} + +\title{Estimating a Graph's Parameters from Cascades} +\author{Jean (John) Pouget-Abadie \\ Joint Work with Thibaut (T-bo) Horel} +\date{} + +\begin{document} + +\begin{frame} +\titlepage +\end{frame} + +\begin{frame} +\frametitle{Example} +\begin{figure} +\includegraphics[scale=.25]{../images/drawing.pdf} +\caption{A network} +\end{figure} +\end{frame} + +\begin{frame} +\frametitle{Example} +\begin{figure} +\includegraphics[scale=.25]{../images/noedges_step1.pdf} +\caption{Cascade 1: $t=0$} +\end{figure} +\end{frame} + +\begin{frame} +\frametitle{Example} +\begin{figure} +\includegraphics[scale=.25]{../images/noedges_step2.pdf} +\caption{Cascade 1: $t=1$} +\end{figure} +\end{frame} + +\begin{frame} +\frametitle{Example} +\begin{figure} +\includegraphics[scale=.25]{../images/noedges_step3.pdf} +\caption{Cascade 1: $t=2$} +\end{figure} +\end{frame} + +\begin{frame} +\frametitle{Example} +\begin{figure} +\includegraphics[scale=.25]{../images/noedges_step1_cascade2} +\caption{Cascade 2: $t=0$} +\end{figure} +\end{frame} + +\begin{frame} +\frametitle{Example} +\begin{figure} +\includegraphics[scale=.25]{../images/noedges_step2_cascade2} +\caption{Cascade 2: $t=1$} +\end{figure} +\end{frame} + +\begin{frame} +\frametitle{Example} +\begin{figure} +\includegraphics[scale=.25]{../images/noedges_step3_cascade2} +\caption{Cascade 2: $t=2$} +\end{figure} +\end{frame} + + +\begin{frame} +\frametitle{Context} + +Notation: +\begin{itemize} +\item $({\cal G}, \theta)$ : graph, parameters +\item Cascade: diffusion process of a `behavior' on $({\cal G}, \theta)$ +\item $(X_t)_c$ : set of `active' nodes at time t for cascade $c$ +\end{itemize} + + +\begin{table} +\begin{tabular}{c c} +Graph $\implies$ Cascades & Cascades $\implies$ Graph \\ \hline +$({\cal G}, \theta)$ is known & $(X_t)_c$ is observed \\ +Predict $(X_t) | X_0$ & Recover $({\cal G}, \theta)$ \\ +\end{tabular} +\end{table} + +Summary: +\begin{itemize} +\item Many algorithms \emph{require} knowledge of $({\cal G}, \theta)$ +\item {\bf Graph Inference} is the problem of \emph{learning} $({\cal G}, \theta)$ +\end{itemize} +\end{frame} + +\begin{frame} +\begin{block}{Decomposability} +Learning the graph $\Leftrightarrow$ Learning the parents of a single node +\end{block} +\end{frame} + + +\begin{frame} +\frametitle{Problem Statement} +\begin{itemize} +\pause +\item Can we learn ${\cal G}$ from $(X_t)_c$? +\pause +\item How many cascades? How many steps in each cascade? +\pause +\item Can we learn $\theta$ from $(X_t)_c$? +\pause +\item How does the error decrease with $n_{\text{cascades}}$? +\pause +\item Are there graphs which are easy to learn? Harder to learn? +\pause +\item What kind of diffusion processes can we consider? +\pause +\item What is the minimal number of cascades required to learn $({\cal G}, \theta)$? +\end{itemize} +\end{frame} + +\begin{frame} +\frametitle{Notation} +\begin{itemize} +\item n : number of measurements +\item N : number of cascades +\item m : number of nodes +\item s : degree of node considered +\end{itemize} +\end{frame} + + +\begin{frame} +\frametitle{Related Work} + +\begin{itemize} +\pause +\item Can we learn ${\cal G}$ from $(X_t)_c$? +\pause +\\{\color{blue} Yes} +\pause +\item How many cascades? How many steps in each cascade? +\pause +\\ {\color{blue} poly(s)$ \log m$ cascades} +\pause +\item Can we learn $\theta$ from $(X_t)_c$? +\pause +\\ {\color{blue} (?)} +\pause +\item How does the error decrease with $n_{\text{cascades}}$? +\pause +\\ {\color{blue} (?)} +\pause +\item Are there graphs which are easy to learn? Harder to learn? +\pause +\\{\color{blue} Sparse Graphs are easy} +\pause +\item What kind of diffusion processes can we consider? +\pause +\\{\color{blue} IC Model (discrete and continuous)} +\pause +\item What is the minimal number of cascades required to learn $({\cal G}, \theta)$? +\pause +\\{\color{blue} (?)\dots$s \log m/s$ in specific setting} +\end{itemize} +\end{frame} + + + +\begin{frame} +\frametitle{Our Work} +\begin{itemize} +\pause +\item Can we learn ${\cal G}$ from $(X_t)_c$? +\pause +\\{\color{blue} Yes} $\rightarrow$ {\color{red} Yes} +\pause +\item How many cascades? How many steps in each cascade? +\pause +\\ {\color{blue} poly(s)$ \log m$ cascades} $\rightarrow$ {\color{red} $s\log m$ measurements} +\pause +\item Can we learn $\theta$ from $(X_t)_c$? +\pause +\\ {\color{blue} (?)} $\rightarrow$ {\color{red} Yes!} +\pause +\item How does the error decrease with $n_{\text{cascades}}$? +\pause +\\ {\color{blue} (?)} $\rightarrow$ {\color{red} ${\cal O}(\sqrt{s\log m/n})$} +\pause +\item Are there graphs which are easy to learn? Harder to learn? +\pause +\\ {\color{blue} Sparse Graphs are easy} $\rightarrow$ {\color{red} Approx. sparsity is also easy} +\pause +\item What kind of diffusion processes can we consider? +\pause +\\ {\color{blue} IC Model (discrete, continuous)} $\rightarrow$ {\color{red} Large class of Cascade Models} +\pause +\item What is the minimal number of cascades required to learn $({\cal G}, \theta)$? +\pause +\\{\color{blue} $s \log m/s$ in specific setting} $\rightarrow$ {\color{red} $s \log m/s$ for approx. sparse graphs} +\end{itemize} + +\end{frame} + +\begin{frame} +\frametitle{Voter Model} +\begin{itemize} +\pause +\item {\color{red} Red} and {\color{blue} Blue} nodes. At every step, each node $i$ chooses one of its neighbors $j$ with probability $p_{j,i}$ and adopts that color at $t+1$ +\pause +\item If {\color{blue} Blue} is `contagious' state: +\pause +\begin{equation} +\nonumber +\mathbb{P}(i \in X^{t+1}|X^t) = \sum_{j \in {\cal N}(i)\cap X^t} p_{ji} = X^t \cdot \theta_i +\end{equation} +\end{itemize} +\end{frame} + +\begin{frame} +\frametitle{Independent Cascade Model} +\begin{itemize} +\pause +\item Each `infected' node $i$ has a probability $p_{i,j}$ of infecting each of his neighbors $j$. +\pause +\item A node stays `infected' for a single turn. Then it becomes `inactive'. +$$\mathbb{P}(j \text{ becomes infected at t+1}|X_{t}) = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j})$$ +\end{itemize} +\end{frame} + +\begin{frame} +\frametitle{Independent Cascade Model} +\begin{align} +\mathbb{P}(j\in X_{t+1}|X_{t}) & = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j}) \\ +& = 1 - \exp \left[ \sum_{i \in {\cal N}(j) \cap X_{t}} \log(1 - p_{i,j}) \right] \\ +& = 1 - \exp \left[ X_{t} \cdot \theta_{i,j}\right] +\end{align} + +where $\theta_{i,j} := \log (1 - p_{i,j})$ and $\theta_i := (\theta_{i,j})_j$ +\\[5ex] +\begin{itemize} +\item Support of $\vec \theta$ $\Leftrightarrow$ support of $\vec p$ +\end{itemize} +\end{frame} + +\begin{frame} +\frametitle{Model Comparison} +\begin{table} +\centering +\begin{tabular}{c | c} +Voter Model & Indep. Casc. Model \\[1ex] +\hline \\[.1ex] +Markov & Markov \\[3ex] +Indep. prob. of $\in X^{t+1} | X^t$ & Indep. prob. of $\in X^{t+1} | X^t$ \\[3ex] +$\mathbb{P}(j\in X_{t+1}|X_{t}) = X_{t} \cdot \theta_{i}$ & $\mathbb{P}(j\in X_{t+1}|X_{t}) = 1 - \exp(X_{t} \cdot \theta_{i})$ \\[3ex] +Always Susceptible & Susceptible until infected \\ +\includegraphics[scale=.4]{../images/voter_model.pdf} & \includegraphics[scale=.3]{../images/icc_model.pdf} \\ +\end{tabular} +\end{table} +\end{frame} + +\begin{frame} +\frametitle{Generalized Linear Cascade Models} +\begin{definition} +{\bf Generalized Linear Cascade Model} with inverse link function $f : \mathbb{R} \rightarrow [0,1]$: +\begin{itemize} +\item for each \emph{susceptible} node $j$ in state $s$ at $t$, $\mathbb{P}(j \in X^{t+1}|X^t)$ is a Bernoulli of parameter $f(\theta_j \cdot X^t)$ +\end{itemize} +\end{definition} +\end{frame} + +\begin{frame} +\frametitle{Sparse Recovery} +\begin{figure} +\includegraphics[scale=.6]{../images/sparse_recovery_illustration.pdf} +\caption{$f(X\cdot\theta) = b$} +\end{figure} +\end{frame} + + +\begin{frame} +\frametitle{$\ell1$ penalized Maximum Likelihood} +\begin{itemize} +\item Decomposable node by node +\item Sum over susceptible steps +\end{itemize} + +\begin{block}{Likelihood function} +\begin{equation} +\nonumber +{\cal L}(\theta| x^1, \dots x^n) = \frac{1}{{\cal T}_i} \sum_{t \in {\cal T}_i} x^{t+1}_i \log f(\theta_i \cdot x^t) + (1 - x^{t+1}_i) \log(1 - f(\theta_i \cdot x^t)) +\end{equation} +\end{block} + +\begin{block}{Algorithm} +\begin{equation} +\nonumber +\theta \in \arg \max_\theta {\cal L}(\theta| x^1, \dots x^n) - \lambda \|\theta\|_1 +\end{equation} +\end{block} + +\end{frame} + +\begin{frame} +\frametitle{Main Result} +\begin{theorem} +Assume condition on the Hessian and certain regularity properties on $f$, then $\exists C>0$ depending only on the properties of the ${\cal G}$, with high probability: +$$\| \theta^*_i - \hat \theta_i \|_2 \leq C\sqrt{\frac{s\log m}{n}}$$ +\end{theorem} + +\begin{corollary} +By thresholding $\hat \theta_i$, if $n > C' s \log m$, we recover the support of $\theta^*$ and therefore the edges of ${\cal G}$ +\end{corollary} + +\end{frame} + +\begin{frame} +\frametitle{Approximate Sparsity} +\begin{itemize} +\item $\theta^*_{\lceil s \rceil}$ best s-sparse approximation to $\theta^*$ +\item $\|\theta^* - \theta^*_{\lceil s \rceil} \|_1$: `tail' of $\theta^*$ +\end{itemize} +\begin{theorem} +Assume condition on Hessian and certain regularity properties on $f$, then $\exists C_1, C_2>0$ depending only on the properties of ${\cal G}$, with high probability: +\begin{equation} +\|\hat \theta_i - \theta^*_i\|_2 \leq C_1 \sqrt{\frac{s\log m}{n}} + C_2 \sqrt[4]{\frac{s\log m}{n}}\|\theta^* - \theta^*_{\lceil s \rceil} \|_1 +\end{equation} +\end{theorem} +\end{frame} + +\begin{frame} +\frametitle{Lower Bound} +\begin{itemize} +\item Under correlation decay assumption for the IC model, ${\Omega}(s \log N/s)$ cascades necessary for graph reconstruction (Netrapalli et Sanghavi SIGMETRICS'12) +\item Adapting (Price \& Woodruff STOC'12), in the approximately sparse case, any algorithm for any generalized linear cascade model such that: +$$\|\hat \theta - \theta^*\|_2 \leq C \|\theta^* - \theta^*_{\lfloor s \rfloor}\|_2$$ +requires ${\cal O}(s \log (n/s)/\log C)$ measurement. +\end{itemize} +\end{frame} + +\begin{frame} +\frametitle{(RE) assumptions} +\begin{block}{Assumption on Hessian} +\begin{itemize} +\item +Hessian has to verify a `restricted eigenvalue property' i.e smallest eigenvalue on sparse vectors is away from $0$. +\end{itemize} +\end{block} + +\begin{block}{From Hessian to Gram Matrix} +\begin{itemize} +\item $\mathbb{E}[X X^T]$ : `expected' Gram matrix of observations +\item $\mathbb{E}[X X^T]_{i,i}$ : $\mathbb{P}$ that node $i$ is infected +\item $\mathbb{E}[X X^T]_{i,j}$ : $\mathbb{P} $that node $i$ and node $j$ are infected simultaneously +\end{itemize} +\end{block} +\end{frame} + +\begin{frame} +\frametitle{Future Work} + +\begin{block}{Linear Threshold Model} +\begin{itemize} +\item Linear threshold model is a generalized linear cascade, with non-differential inverse link function. $$\mathbb{P}(j \in X^{t+1}|X^t) = sign(\theta_j \cdot X^t - t_j)$$ +\end{itemize} +\end{block} + +\begin{block}{Noisy Influence Maximization} +\end{block} + +\begin{block}{Confidence Intervals} +\end{block} + +\begin{block}{Active Learning} +\end{block} +\end{frame} + +\end{document} diff --git a/notes/presentation/beamer_2.tex b/notes/presentation/beamer_2.tex new file mode 100644 index 0000000..fda5b8d --- /dev/null +++ b/notes/presentation/beamer_2.tex @@ -0,0 +1,240 @@ +\documentclass[10pt]{beamer} + +\usepackage{amssymb, amsmath, graphicx, amsfonts, color, amsthm} + +\newtheorem{proposition}{Proposition} + +\title{Learning from Diffusion processes} +\subtitle{What cascades really teach us about networks} +\author{Jean (John) Pouget-Abadie \\ Joint Work with Thibaut (T-bo) Horel} + +\begin{document} + +\begin{frame} +\titlepage +\end{frame} + +\begin{frame} +\frametitle{Introduction} + +%notes: Learn what? the network, the parameters of the diffusion process. + +\begin{table} +\centering +\begin{tabular}{c | c} +Network & Diffusion process \\[1ex] +\hline +\\ +Airports & Infectious diseases (SARS) \\ + & Delays (Eyjafjallajökull) \\[3ex] +Social Network & Infectious diseases (flu) \\ + & Behaviors (Ice Bucket Challenge) \\[3ex] +Internet/WWW & Information diffusion (Memes, Pirated content \dots) +\end{tabular} +\end{table} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Introduction} + +What do we know? What do we want to know? + +\begin{itemize} +\item We know the {\bf airport network} structure. We observe delays. Can we learn how delays propagate? +\item We (sometimes) know the {\bf social network}. We observe behaviors. Can we learn who influences whom? +\item Rarely know {\bf blog network}. We observe discussions. Can we learn who learns from whom? +\end{itemize} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Independent Cascade Model} + +\begin{figure} +\includegraphics[scale=.5]{figures/weighted_graph.png} +\caption{Weighted, directed graph} +\end{figure} + +\begin{itemize} +\item Three states: susceptible, {\color{blue} infected}, {\color{red} dead} +\item Each {\color{blue} infected} node $i$ has a probability $p_{i,j}$ of infecting each of his neighbors $j$. +\item A node stays {\color{blue} infected} for one round, then it {\color{red} dies} +\item At $t=0$, each node is {\color{blue} infected} with probability $p_{\text{init}}$ +\end{itemize} + +%Notes: Revisit the celebrated independent cascade model -> Influence maximisation is tractable, requires knowledge of weights + +\end{frame} + + +%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Independent Cascade Model} + +\begin{figure} +\includegraphics[scale=.5]{figures/weighted_graph.png} +\caption{Weighted, directed graph} +\end{figure} + +\begin{itemize} +\item If $3$ and $4$ are {\color{blue} infected} at $t$, what is the probability that node $0$ is infected at $t+1$? +$$1 - \mathbb{P}(\text{not infected}) = 1 - (1 - .45)(1-.04)$$ +\item In general, $X_t$ {\color{blue} infected} nodes at t: +$$\mathbb{P}(j \text{ becomes infected at t+1}|X_{t}) = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j})$$ +\end{itemize} + + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Independent Cascade Model} +\begin{proposition} +The ICC, conditioned on previous time step, can be cast as a {\bf generalized linear model} +$$\mathbb{P}(j \in X_{t+1} | X_t) = f(X_t \cdot \theta_j)$$ +\end{proposition} + +\begin{proof} +\begin{align} +\mathbb{P}(j\in X_{t+1}|X_{t}) & = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j}) \\ +& = 1 - \exp \left[ \sum_{i \in {\cal N}(j) \cap X_{t}} \log(1 - p_{i,j}) \right] \\ +& = 1 - \exp \left[ X_{t} \cdot \theta_{j}\right] +\end{align} +\end{proof} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Independent Cascade Model} +\begin{block}{Decomposability} +\begin{itemize} +\item Conditioned on $X_t$, the state of each node is sampled independently +\item We can focus on learning vector $\theta_{j}$ for each node +\end{itemize} +\end{block} + +\begin{block}{Sparsity} +\begin{itemize} +\item $\theta_{i,j} = 0 \Leftrightarrow p_{i,j} = 0$ +\item If graph is ``sparse'', then $\theta_j$ is sparse. +\end{itemize} +\end{block} +\end{frame} + + + +%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Sparse Recovery} +\begin{figure} +\includegraphics[scale=.6]{../images/sparse_recovery_illustration.pdf} +\caption{$f(X_t\cdot \theta) = \mathbb{P}(j \in X_{t+1}| X_t)$} +\end{figure} +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Learning from Diffusion Processes} +\begin{block}{Problem Statement} +\begin{itemize} +\item We are given a graph ${\cal G}$, and a diffusion process parameterized by $\left((\theta_{i,j})_{i,j}, f, p_{\text{init}}\right)$. +\item Suppose we {\bf only} observe $(X_t)$ from the diffusion process. +\item Under what conditions can we learn $\theta_{i,j}$ for all $(i,j)$? How many $(X_t)$ are necessary? +\end{itemize} +\end{block} +\end{frame} + +%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Learning from Diffusion Processes} + +\begin{figure} +\includegraphics[scale=.4]{../images/sparse_recovery_illustration.pdf} +\caption{Generalized Linear Model for node $i$} +\end{figure} + +\begin{block}{Likelihood Function} +$${\cal L}(\theta| X_1, \dots X_N) = \frac{1}{{\cal T}_i} \sum_{t \in {\cal T}_i} x^{t+1}_i \log f(\theta_i \cdot x^t) + (1 - x^{t+1}_i) \log(1 - f(\theta_i \cdot x^t))$$ +\end{block} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Conditions} +\begin{block}{On $f$} +\begin{itemize} +\item $\log f$ and $\log (1-f)$ have to be concave +\item $\log f$ and $\log (1-f)$ have bounded gradient +\end{itemize} +\end{block} + +\begin{block}{On $(X_t)$} +\begin{itemize} +\item Want ${\cal H}$ be the hessian of ${\cal L}$ with respect to $\theta$ to be ``inversible'' +\item $ n < dim(\theta) \implies {\cal H}$ is degenerate. +\item {\bf Restricted Eigenvalue condition} = ``almost invertible'' on sparse vectors. +\end{itemize} +\end{block} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Restricted Eigenvalue Condition} + +\begin{definition} +Let $S$ be the set of parents of node $i$. +$${\cal C} := \{ \Delta : \|\Delta\|_2 = 1, \|\Delta_{\bar S}\| \leq 3 \| \Delta_S\|_1 \}$$ +${\cal H}$ verifies the $(S, \gamma)$-RE condition if: +$$\forall X \in {\cal C}, \Delta {\cal H} \Delta \geq \gamma$$ +\end{definition} +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Main Result} +Adapting a result from \cite{Negahban:2009}, we have the following theorem: + +\begin{theorem} +Assume +\begin{itemize} +\item the Hessian verifies the $(S,\gamma)$-RE condition +\item $|(\log f)'| < \frac{1}{\alpha}$ and $|(\log 1- f)'| < \frac{1}{\alpha}$ +\end{itemize} then with high probability: +$$\| \theta^*_i - \hat \theta_i \|_2 \leq \frac{6}{\gamma}\sqrt{\frac{s\log m}{\alpha n}}$$ +\end{theorem} + +\begin{corollary} +By thresholding $\hat \theta_i$, if $n > C' s \log m$, we recover the support of $\theta^*$ and therefore the edges of ${\cal G}$ +\end{corollary} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +Comments: Correlated Measurements +TODO: still need to mention somewhere that we are doing penalized log likelihood +condition on $X_t$ is not great, we would like to have condition on the parameters $\theta, p_{\text{init}}$ $->$ Slides about expected hessian +TODO: slide about matrice de gram! +\end{frame} + +\bibliography{../../paper/sparse} +\bibliographystyle{apalike} + +\end{document} \ No newline at end of file diff --git a/notes/presentation/extended_abstract.txt b/notes/presentation/extended_abstract.txt new file mode 100644 index 0000000..47a12b9 --- /dev/null +++ b/notes/presentation/extended_abstract.txt @@ -0,0 +1,10 @@ +Title: How can we estimate the parameters of a graph by observing its cascades? + + +A standard problem in Social Network Theory is to understand how the parameters of a graph affect the properties of its cascades, which are diffusion processes that spread from node to node along the graph's weighted edges. In other words, can we predict cascades from the graph's parameters? + +Recent work has considered the dual problem: what knowledge about the existence of an edge in the graph do we gain by observing its cascades and how can we leverage that knowledge efficiently? A natural extension to this problem is: can we learn the weights of the graph's edges from cascades? These questions are fundamental to many aspects of social network theory: knowing the parameters of the graph precedes influence maximization or conversely influence minimization. + +In this talk, we present a "sparse recovery" framework for tackling the "graph inference" problem from cascades. This framework achieves a better convergence rate under weaker assumptions than prior work. We show that we (almost) match the lower bound and that our assumptions are robust to approximately sparse graphs. Finally, the approach is validated on synthetic networks. + +Joint work with Thibaut Horel \ No newline at end of file diff --git a/notes/presentation/figures/Screen Shot 2015-03-08 at 13.08.01.png b/notes/presentation/figures/Screen Shot 2015-03-08 at 13.08.01.png new file mode 100644 index 0000000..b053f0c Binary files /dev/null and b/notes/presentation/figures/Screen Shot 2015-03-08 at 13.08.01.png differ diff --git a/notes/presentation/figures/weighted_graph.png b/notes/presentation/figures/weighted_graph.png new file mode 100644 index 0000000..7deccc3 Binary files /dev/null and b/notes/presentation/figures/weighted_graph.png differ diff --git a/notes/presentation/sparse.bib b/notes/presentation/sparse.bib new file mode 100644 index 0000000..5df4b59 --- /dev/null +++ b/notes/presentation/sparse.bib @@ -0,0 +1,503 @@ +@article {CandesRomberTao:2006, +author = {Candès, Emmanuel J. and Romberg, Justin K. and Tao, Terence}, +title = {Stable signal recovery from incomplete and inaccurate measurements}, +journal = {Communications on Pure and Applied Mathematics}, +volume = {59}, +number = {8}, +publisher = {Wiley Subscription Services, Inc., A Wiley Company}, +issn = {1097-0312}, +pages = {1207--1223}, +year = {2006}, +} + + +@inproceedings{GomezRodriguez:2010, + author = {Gomez Rodriguez, Manuel and Leskovec, Jure and Krause, Andreas}, + title = {Inferring Networks of Diffusion and Influence}, + booktitle = {Proceedings of the 16th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining}, + series = {KDD '10}, + year = {2010}, + isbn = {978-1-4503-0055-1}, + location = {Washington, DC, USA}, + pages = {1019--1028}, + numpages = {10}, + publisher = {ACM}, + address = {New York, NY, USA}, +} + + +@article{Netrapalli:2012, + author = {Netrapalli, Praneeth and Sanghavi, Sujay}, + title = {Learning the Graph of Epidemic Cascades}, + journal = {SIGMETRICS Perform. Eval. Rev.}, + volume = {40}, + number = {1}, + month = {June}, + year = {2012}, + issn = {0163-5999}, + pages = {211--222}, + numpages = {12}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {cascades, epidemics, graph structure learning}, +} + +@article{Negahban:2009, + author = {Negahban, Sahand N. and Ravikumar, Pradeep and Wrainwright, Martin J. and Yu, Bin}, + title = {A Unified Framework for High-Dimensional Analysis of M-Estimators with Decomposable Regularizers}, + Journal = {Statistical Science}, + year = {2012}, + month = {December}, + volume = {27}, + number = {4}, + pages = {538--557}, +} + +@article{Zhao:2006, + author = {Zhao, Peng and Yu, Bin}, + title = {On Model Selection Consistency of Lasso}, + journal = {J. Mach. Learn. Res.}, + issue_date = {12/1/2006}, + volume = {7}, + month = dec, + year = {2006}, + issn = {1532-4435}, + pages = {2541--2563}, + numpages = {23}, + url = {http://dl.acm.org/citation.cfm?id=1248547.1248637}, + acmid = {1248637}, + publisher = {JMLR.org}, +} + +@inproceedings{Daneshmand:2014, + author = {Hadi Daneshmand and + Manuel Gomez{-}Rodriguez and + Le Song and + Bernhard Sch{\"{o}}lkopf}, + title = {Estimating Diffusion Network Structures: Recovery Conditions, Sample + Complexity {\&} Soft-thresholding Algorithm}, + booktitle = {Proceedings of the 31th International Conference on Machine Learning, + {ICML} 2014, Beijing, China, 21-26 June 2014}, + pages = {793--801}, + year = {2014}, + url = {http://jmlr.org/proceedings/papers/v32/daneshmand14.html}, + timestamp = {Fri, 07 Nov 2014 20:42:30 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/icml/DaneshmandGSS14}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{Kempe:03, + author = {David Kempe and + Jon M. Kleinberg and + {\'{E}}va Tardos}, + title = {Maximizing the spread of influence through a social network}, + booktitle = {Proceedings of the Ninth {ACM} {SIGKDD} International Conference on + Knowledge Discovery and Data Mining, Washington, DC, USA, August 24 + - 27, 2003}, + pages = {137--146}, + year = {2003}, + url = {http://doi.acm.org/10.1145/956750.956769}, + doi = {10.1145/956750.956769}, + timestamp = {Mon, 13 Feb 2006 15:34:20 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/kdd/KempeKT03}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{Abrahao:13, + author = {Bruno D. Abrahao and + Flavio Chierichetti and + Robert Kleinberg and + Alessandro Panconesi}, + title = {Trace complexity of network inference}, + booktitle = {The 19th {ACM} {SIGKDD} International Conference on Knowledge Discovery + and Data Mining, {KDD} 2013, Chicago, IL, USA, August 11-14, 2013}, + pages = {491--499}, + year = {2013}, + url = {http://doi.acm.org/10.1145/2487575.2487664}, + doi = {10.1145/2487575.2487664}, + timestamp = {Tue, 10 Sep 2013 10:11:57 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/kdd/AbrahaoCKP13}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + + +@article{vandegeer:2009, +author = "van de Geer, Sara A. and B{\"u}hlmann, Peter", +doi = "10.1214/09-EJS506", +fjournal = "Electronic Journal of Statistics", +journal = "Electron. J. Statist.", +pages = "1360--1392", +publisher = "The Institute of Mathematical Statistics and the Bernoulli Society", +title = "On the conditions used to prove oracle results for the Lasso", +url = "http://dx.doi.org/10.1214/09-EJS506", +volume = "3", +year = "2009" +} + +@article{vandegeer:2011, +author = "van de Geer, Sara and Bühlmann, Peter and Zhou, Shuheng", +doi = "10.1214/11-EJS624", +fjournal = "Electronic Journal of Statistics", +journal = "Electron. J. Statist.", +pages = "688--749", +publisher = "The Institute of Mathematical Statistics and the Bernoulli Society", +title = "The adaptive and the thresholded Lasso for potentially misspecified models (and a lower bound for the Lasso)", +url = "http://dx.doi.org/10.1214/11-EJS624", +volume = "5", +year = "2011" +} + +@article{Zou:2006, +author = {Zou, Hui}, +title = {The Adaptive Lasso and Its Oracle Properties}, +journal = {Journal of the American Statistical Association}, +volume = {101}, +number = {476}, +pages = {1418-1429}, +year = {2006}, +doi = {10.1198/016214506000000735}, +URL = {http://dx.doi.org/10.1198/016214506000000735}, +} + +@article{Jacques:2013, + author = {Laurent Jacques and + Jason N. Laska and + Petros T. Boufounos and + Richard G. Baraniuk}, + title = {Robust 1-Bit Compressive Sensing via Binary Stable Embeddings of Sparse + Vectors}, + journal = {{IEEE} Transactions on Information Theory}, + volume = {59}, + number = {4}, + pages = {2082--2102}, + year = {2013}, + url = {http://dx.doi.org/10.1109/TIT.2012.2234823}, + doi = {10.1109/TIT.2012.2234823}, + timestamp = {Tue, 09 Apr 2013 19:57:48 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/tit/JacquesLBB13}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{Boufounos:2008, + author = {Petros Boufounos and + Richard G. Baraniuk}, + title = {1-Bit compressive sensing}, + booktitle = {42nd Annual Conference on Information Sciences and Systems, {CISS} + 2008, Princeton, NJ, USA, 19-21 March 2008}, + pages = {16--21}, + year = {2008}, + url = {http://dx.doi.org/10.1109/CISS.2008.4558487}, + doi = {10.1109/CISS.2008.4558487}, + timestamp = {Wed, 15 Oct 2014 17:04:27 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/ciss/BoufounosB08}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{Gupta:2010, + author = {Ankit Gupta and + Robert Nowak and + Benjamin Recht}, + title = {Sample complexity for 1-bit compressed sensing and sparse classification}, + booktitle = {{IEEE} International Symposium on Information Theory, {ISIT} 2010, + June 13-18, 2010, Austin, Texas, USA, Proceedings}, + pages = {1553--1557}, + year = {2010}, + url = {http://dx.doi.org/10.1109/ISIT.2010.5513510}, + doi = {10.1109/ISIT.2010.5513510}, + timestamp = {Thu, 15 Jan 2015 17:11:50 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/isit/GuptaNR10}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{Plan:2014, + author = {Yaniv Plan and + Roman Vershynin}, + title = {Dimension Reduction by Random Hyperplane Tessellations}, + journal = {Discrete {\&} Computational Geometry}, + volume = {51}, + number = {2}, + pages = {438--461}, + year = {2014}, + url = {http://dx.doi.org/10.1007/s00454-013-9561-6}, + doi = {10.1007/s00454-013-9561-6}, + timestamp = {Tue, 11 Feb 2014 13:48:56 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/dcg/PlanV14}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{bickel:2009, +author = "Bickel, Peter J. and Ritov, Ya’acov and Tsybakov, Alexandre B.", +doi = "10.1214/08-AOS620", +fjournal = "The Annals of Statistics", +journal = "Ann. Statist.", +month = "08", +number = "4", +pages = "1705--1732", +publisher = "The Institute of Mathematical Statistics", +title = "Simultaneous analysis of Lasso and Dantzig selector", +url = "http://dx.doi.org/10.1214/08-AOS620", +volume = "37", +year = "2009" +} + +@article{raskutti:10, + author = {Garvesh Raskutti and + Martin J. Wainwright and + Bin Yu}, + title = {Restricted Eigenvalue Properties for Correlated Gaussian Designs}, + journal = {Journal of Machine Learning Research}, + volume = {11}, + pages = {2241--2259}, + year = {2010}, + url = {http://portal.acm.org/citation.cfm?id=1859929}, + timestamp = {Wed, 15 Oct 2014 17:04:32 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/jmlr/RaskuttiWY10}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{rudelson:13, + author = {Mark Rudelson and + Shuheng Zhou}, + title = {Reconstruction From Anisotropic Random Measurements}, + journal = {{IEEE} Transactions on Information Theory}, + volume = {59}, + number = {6}, + pages = {3434--3447}, + year = {2013}, + url = {http://dx.doi.org/10.1109/TIT.2013.2243201}, + doi = {10.1109/TIT.2013.2243201}, + timestamp = {Tue, 21 May 2013 14:15:50 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/tit/RudelsonZ13}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{bipw11, + author = {Khanh Do Ba and + Piotr Indyk and + Eric Price and + David P. Woodruff}, + title = {Lower Bounds for Sparse Recovery}, + journal = {CoRR}, + volume = {abs/1106.0365}, + year = {2011}, + url = {http://arxiv.org/abs/1106.0365}, + timestamp = {Mon, 05 Dec 2011 18:04:39 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/abs-1106-0365}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{pw11, + author = {Eric Price and + David P. Woodruff}, + title = {{(1} + eps)-Approximate Sparse Recovery}, + booktitle = {{IEEE} 52nd Annual Symposium on Foundations of Computer Science, {FOCS} + 2011, Palm Springs, CA, USA, October 22-25, 2011}, + pages = {295--304}, + year = {2011}, + crossref = {DBLP:conf/focs/2011}, + url = {http://dx.doi.org/10.1109/FOCS.2011.92}, + doi = {10.1109/FOCS.2011.92}, + timestamp = {Tue, 16 Dec 2014 09:57:24 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/focs/PriceW11}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@proceedings{DBLP:conf/focs/2011, + editor = {Rafail Ostrovsky}, + title = {{IEEE} 52nd Annual Symposium on Foundations of Computer Science, {FOCS} + 2011, Palm Springs, CA, USA, October 22-25, 2011}, + publisher = {{IEEE} Computer Society}, + year = {2011}, + url = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6108120}, + isbn = {978-1-4577-1843-4}, + timestamp = {Mon, 15 Dec 2014 18:48:45 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/focs/2011}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{pw12, + author = {Eric Price and + David P. Woodruff}, + title = {Applications of the Shannon-Hartley theorem to data streams and sparse + recovery}, + booktitle = {Proceedings of the 2012 {IEEE} International Symposium on Information + Theory, {ISIT} 2012, Cambridge, MA, USA, July 1-6, 2012}, + pages = {2446--2450}, + year = {2012}, + crossref = {DBLP:conf/isit/2012}, + url = {http://dx.doi.org/10.1109/ISIT.2012.6283954}, + doi = {10.1109/ISIT.2012.6283954}, + timestamp = {Mon, 01 Oct 2012 17:34:07 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/isit/PriceW12}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@proceedings{DBLP:conf/isit/2012, + title = {Proceedings of the 2012 {IEEE} International Symposium on Information + Theory, {ISIT} 2012, Cambridge, MA, USA, July 1-6, 2012}, + publisher = {{IEEE}}, + year = {2012}, + url = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6268627}, + isbn = {978-1-4673-2580-6}, + timestamp = {Mon, 01 Oct 2012 17:33:45 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/isit/2012}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{Leskovec:2010, + author = {Jure Leskovec and + Deepayan Chakrabarti and + Jon M. Kleinberg and + Christos Faloutsos and + Zoubin Ghahramani}, + title = {Kronecker Graphs: An Approach to Modeling Networks}, + journal = {Journal of Machine Learning Research}, + volume = {11}, + pages = {985--1042}, + year = {2010}, + url = {http://doi.acm.org/10.1145/1756006.1756039}, + doi = {10.1145/1756006.1756039}, + timestamp = {Thu, 22 Apr 2010 13:26:26 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/jmlr/LeskovecCKFG10}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{Holme:2002, + author= {Petter Holme and Beom Jun Kim}, + title = {Growing scale-free networks with tunable clustering}, + journal = {Physical review E}, + volume = {65}, + issue = {2}, + pages = {026--107}, + year = {2002} +} + + +@article{watts:1998, + Annote = {10.1038/30918}, + Author = {Watts, Duncan J. and Strogatz, Steven H.}, + Date = {1998/06/04/print}, + Isbn = {0028-0836}, + Journal = {Nature}, + Number = {6684}, + Pages = {440--442}, + Read = {0}, + Title = {Collective dynamics of `small-world' networks}, + Url = {http://dx.doi.org/10.1038/30918}, + Volume = {393}, + Year = {1998}, +} + +@article{barabasi:2001, + author = {R{\'{e}}ka Albert and + Albert{-}L{\'{a}}szl{\'{o}} Barab{\'{a}}si}, + title = {Statistical mechanics of complex networks}, + journal = {CoRR}, + volume = {cond-mat/0106096}, + year = {2001}, + url = {http://arxiv.org/abs/cond-mat/0106096}, + timestamp = {Mon, 05 Dec 2011 18:05:15 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/cond-mat-0106096}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + + +@article{gomezbalduzzi:2011, + author = {Manuel Gomez{-}Rodriguez and + David Balduzzi and + Bernhard Sch{\"{o}}lkopf}, + title = {Uncovering the Temporal Dynamics of Diffusion Networks}, + journal = {CoRR}, + volume = {abs/1105.0697}, + year = {2011}, + url = {http://arxiv.org/abs/1105.0697}, + timestamp = {Mon, 05 Dec 2011 18:05:23 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/abs-1105-0697}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{Nowell08, + author = {Liben-Nowell, David and Kleinberg, Jon}, + biburl = {http://www.bibsonomy.org/bibtex/250b9b1ca1849fa9cb8bb92d6d9031436/mkroell}, + doi = {10.1073/pnas.0708471105}, + eprint = {http://www.pnas.org/content/105/12/4633.full.pdf+html}, + journal = {Proceedings of the National Academy of Sciences}, + keywords = {SNA graph networks}, + number = 12, + pages = {4633-4638}, + timestamp = {2008-10-09T10:32:56.000+0200}, + title = {{Tracing information flow on a global scale using Internet chain-letter data}}, + url = {http://www.pnas.org/content/105/12/4633.abstract}, + volume = 105, + year = 2008 +} + +@inproceedings{Leskovec07, + author = {Jure Leskovec and + Mary McGlohon and + Christos Faloutsos and + Natalie S. Glance and + Matthew Hurst}, + title = {Patterns of Cascading Behavior in Large Blog Graphs}, + booktitle = {Proceedings of the Seventh {SIAM} International Conference on Data + Mining, April 26-28, 2007, Minneapolis, Minnesota, {USA}}, + pages = {551--556}, + year = {2007}, + url = {http://dx.doi.org/10.1137/1.9781611972771.60}, + doi = {10.1137/1.9781611972771.60}, + timestamp = {Wed, 12 Feb 2014 17:08:15 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/sdm/LeskovecMFGH07}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + + +@inproceedings{AdarA05, + author = {Eytan Adar and + Lada A. Adamic}, + title = {Tracking Information Epidemics in Blogspace}, + booktitle = {2005 {IEEE} / {WIC} / {ACM} International Conference on Web Intelligence + {(WI} 2005), 19-22 September 2005, Compiegne, France}, + pages = {207--214}, + year = {2005}, + url = {http://dx.doi.org/10.1109/WI.2005.151}, + doi = {10.1109/WI.2005.151}, + timestamp = {Tue, 12 Aug 2014 16:59:16 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/webi/AdarA05}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{Kleinberg:00, + author = {Jon M. Kleinberg}, + title = {The small-world phenomenon: an algorithm perspective}, + booktitle = {Proceedings of the Thirty-Second Annual {ACM} Symposium on Theory + of Computing, May 21-23, 2000, Portland, OR, {USA}}, + pages = {163--170}, + year = {2000}, + url = {http://doi.acm.org/10.1145/335305.335325}, + doi = {10.1145/335305.335325}, + timestamp = {Thu, 16 Feb 2012 12:06:08 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/stoc/Kleinberg00}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{zhang2014, + title={Confidence intervals for low dimensional parameters in high dimensional linear models}, + author={Zhang, Cun-Hui and Zhang, Stephanie S}, + journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)}, + volume={76}, + number={1}, + pages={217--242}, + year={2014}, + publisher={Wiley Online Library} +} + +@article{javanmard2014, + title={Confidence intervals and hypothesis testing for high-dimensional regression}, + author={Javanmard, Adel and Montanari, Andrea}, + journal={The Journal of Machine Learning Research}, + volume={15}, + number={1}, + pages={2869--2909}, + year={2014}, + publisher={JMLR. org} +} -- cgit v1.2.3-70-g09d2