diff options
| author | jeanpouget-abadie <jean.pougetabadie@gmail.com> | 2015-03-08 15:37:08 -0400 |
|---|---|---|
| committer | jeanpouget-abadie <jean.pougetabadie@gmail.com> | 2015-03-08 15:37:08 -0400 |
| commit | 0b9031ba7f97e2b51fa47c7db8af7873a4884e39 (patch) | |
| tree | 9fc0e209210c66333083f2f80bcbf5ac8d5e9341 /notes | |
| parent | 93fff762c1a4917da51aad6bd5d2285971cb28b4 (diff) | |
| download | cascades-0b9031ba7f97e2b51fa47c7db8af7873a4884e39.tar.gz | |
added new presentation
Diffstat (limited to 'notes')
| -rw-r--r-- | notes/presentation/beamer.tex | 381 | ||||
| -rw-r--r-- | notes/presentation/beamer_2.tex | 240 | ||||
| -rw-r--r-- | notes/presentation/extended_abstract.txt (renamed from notes/econcs_presentation/extended_abstract.txt) | 0 | ||||
| -rw-r--r-- | notes/presentation/figures/Screen Shot 2015-03-08 at 13.08.01.png | bin | 0 -> 126844 bytes | |||
| -rw-r--r-- | notes/presentation/figures/weighted_graph.png | bin | 0 -> 24602 bytes | |||
| -rw-r--r-- | notes/presentation/sparse.bib | 503 |
6 files changed, 1124 insertions, 0 deletions
diff --git a/notes/presentation/beamer.tex b/notes/presentation/beamer.tex new file mode 100644 index 0000000..7d4b5c1 --- /dev/null +++ b/notes/presentation/beamer.tex @@ -0,0 +1,381 @@ +\documentclass[10pt]{beamer} + +\usepackage{amssymb, amsmath, graphicx, amsfonts, color} + +\title{Estimating a Graph's Parameters from Cascades} +\author{Jean (John) Pouget-Abadie \\ Joint Work with Thibaut (T-bo) Horel} +\date{} + +\begin{document} + +\begin{frame} +\titlepage +\end{frame} + +\begin{frame} +\frametitle{Example} +\begin{figure} +\includegraphics[scale=.25]{../images/drawing.pdf} +\caption{A network} +\end{figure} +\end{frame} + +\begin{frame} +\frametitle{Example} +\begin{figure} +\includegraphics[scale=.25]{../images/noedges_step1.pdf} +\caption{Cascade 1: $t=0$} +\end{figure} +\end{frame} + +\begin{frame} +\frametitle{Example} +\begin{figure} +\includegraphics[scale=.25]{../images/noedges_step2.pdf} +\caption{Cascade 1: $t=1$} +\end{figure} +\end{frame} + +\begin{frame} +\frametitle{Example} +\begin{figure} +\includegraphics[scale=.25]{../images/noedges_step3.pdf} +\caption{Cascade 1: $t=2$} +\end{figure} +\end{frame} + +\begin{frame} +\frametitle{Example} +\begin{figure} +\includegraphics[scale=.25]{../images/noedges_step1_cascade2} +\caption{Cascade 2: $t=0$} +\end{figure} +\end{frame} + +\begin{frame} +\frametitle{Example} +\begin{figure} +\includegraphics[scale=.25]{../images/noedges_step2_cascade2} +\caption{Cascade 2: $t=1$} +\end{figure} +\end{frame} + +\begin{frame} +\frametitle{Example} +\begin{figure} +\includegraphics[scale=.25]{../images/noedges_step3_cascade2} +\caption{Cascade 2: $t=2$} +\end{figure} +\end{frame} + + +\begin{frame} +\frametitle{Context} + +Notation: +\begin{itemize} +\item $({\cal G}, \theta)$ : graph, parameters +\item Cascade: diffusion process of a `behavior' on $({\cal G}, \theta)$ +\item $(X_t)_c$ : set of `active' nodes at time t for cascade $c$ +\end{itemize} + + +\begin{table} +\begin{tabular}{c c} +Graph $\implies$ Cascades & Cascades $\implies$ Graph \\ \hline +$({\cal G}, \theta)$ is known & $(X_t)_c$ is observed \\ +Predict $(X_t) | X_0$ & Recover $({\cal G}, \theta)$ \\ +\end{tabular} +\end{table} + +Summary: +\begin{itemize} +\item Many algorithms \emph{require} knowledge of $({\cal G}, \theta)$ +\item {\bf Graph Inference} is the problem of \emph{learning} $({\cal G}, \theta)$ +\end{itemize} +\end{frame} + +\begin{frame} +\begin{block}{Decomposability} +Learning the graph $\Leftrightarrow$ Learning the parents of a single node +\end{block} +\end{frame} + + +\begin{frame} +\frametitle{Problem Statement} +\begin{itemize} +\pause +\item Can we learn ${\cal G}$ from $(X_t)_c$? +\pause +\item How many cascades? How many steps in each cascade? +\pause +\item Can we learn $\theta$ from $(X_t)_c$? +\pause +\item How does the error decrease with $n_{\text{cascades}}$? +\pause +\item Are there graphs which are easy to learn? Harder to learn? +\pause +\item What kind of diffusion processes can we consider? +\pause +\item What is the minimal number of cascades required to learn $({\cal G}, \theta)$? +\end{itemize} +\end{frame} + +\begin{frame} +\frametitle{Notation} +\begin{itemize} +\item n : number of measurements +\item N : number of cascades +\item m : number of nodes +\item s : degree of node considered +\end{itemize} +\end{frame} + + +\begin{frame} +\frametitle{Related Work} + +\begin{itemize} +\pause +\item Can we learn ${\cal G}$ from $(X_t)_c$? +\pause +\\{\color{blue} Yes} +\pause +\item How many cascades? How many steps in each cascade? +\pause +\\ {\color{blue} poly(s)$ \log m$ cascades} +\pause +\item Can we learn $\theta$ from $(X_t)_c$? +\pause +\\ {\color{blue} (?)} +\pause +\item How does the error decrease with $n_{\text{cascades}}$? +\pause +\\ {\color{blue} (?)} +\pause +\item Are there graphs which are easy to learn? Harder to learn? +\pause +\\{\color{blue} Sparse Graphs are easy} +\pause +\item What kind of diffusion processes can we consider? +\pause +\\{\color{blue} IC Model (discrete and continuous)} +\pause +\item What is the minimal number of cascades required to learn $({\cal G}, \theta)$? +\pause +\\{\color{blue} (?)\dots$s \log m/s$ in specific setting} +\end{itemize} +\end{frame} + + + +\begin{frame} +\frametitle{Our Work} +\begin{itemize} +\pause +\item Can we learn ${\cal G}$ from $(X_t)_c$? +\pause +\\{\color{blue} Yes} $\rightarrow$ {\color{red} Yes} +\pause +\item How many cascades? How many steps in each cascade? +\pause +\\ {\color{blue} poly(s)$ \log m$ cascades} $\rightarrow$ {\color{red} $s\log m$ measurements} +\pause +\item Can we learn $\theta$ from $(X_t)_c$? +\pause +\\ {\color{blue} (?)} $\rightarrow$ {\color{red} Yes!} +\pause +\item How does the error decrease with $n_{\text{cascades}}$? +\pause +\\ {\color{blue} (?)} $\rightarrow$ {\color{red} ${\cal O}(\sqrt{s\log m/n})$} +\pause +\item Are there graphs which are easy to learn? Harder to learn? +\pause +\\ {\color{blue} Sparse Graphs are easy} $\rightarrow$ {\color{red} Approx. sparsity is also easy} +\pause +\item What kind of diffusion processes can we consider? +\pause +\\ {\color{blue} IC Model (discrete, continuous)} $\rightarrow$ {\color{red} Large class of Cascade Models} +\pause +\item What is the minimal number of cascades required to learn $({\cal G}, \theta)$? +\pause +\\{\color{blue} $s \log m/s$ in specific setting} $\rightarrow$ {\color{red} $s \log m/s$ for approx. sparse graphs} +\end{itemize} + +\end{frame} + +\begin{frame} +\frametitle{Voter Model} +\begin{itemize} +\pause +\item {\color{red} Red} and {\color{blue} Blue} nodes. At every step, each node $i$ chooses one of its neighbors $j$ with probability $p_{j,i}$ and adopts that color at $t+1$ +\pause +\item If {\color{blue} Blue} is `contagious' state: +\pause +\begin{equation} +\nonumber +\mathbb{P}(i \in X^{t+1}|X^t) = \sum_{j \in {\cal N}(i)\cap X^t} p_{ji} = X^t \cdot \theta_i +\end{equation} +\end{itemize} +\end{frame} + +\begin{frame} +\frametitle{Independent Cascade Model} +\begin{itemize} +\pause +\item Each `infected' node $i$ has a probability $p_{i,j}$ of infecting each of his neighbors $j$. +\pause +\item A node stays `infected' for a single turn. Then it becomes `inactive'. +$$\mathbb{P}(j \text{ becomes infected at t+1}|X_{t}) = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j})$$ +\end{itemize} +\end{frame} + +\begin{frame} +\frametitle{Independent Cascade Model} +\begin{align} +\mathbb{P}(j\in X_{t+1}|X_{t}) & = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j}) \\ +& = 1 - \exp \left[ \sum_{i \in {\cal N}(j) \cap X_{t}} \log(1 - p_{i,j}) \right] \\ +& = 1 - \exp \left[ X_{t} \cdot \theta_{i,j}\right] +\end{align} + +where $\theta_{i,j} := \log (1 - p_{i,j})$ and $\theta_i := (\theta_{i,j})_j$ +\\[5ex] +\begin{itemize} +\item Support of $\vec \theta$ $\Leftrightarrow$ support of $\vec p$ +\end{itemize} +\end{frame} + +\begin{frame} +\frametitle{Model Comparison} +\begin{table} +\centering +\begin{tabular}{c | c} +Voter Model & Indep. Casc. Model \\[1ex] +\hline \\[.1ex] +Markov & Markov \\[3ex] +Indep. prob. of $\in X^{t+1} | X^t$ & Indep. prob. of $\in X^{t+1} | X^t$ \\[3ex] +$\mathbb{P}(j\in X_{t+1}|X_{t}) = X_{t} \cdot \theta_{i}$ & $\mathbb{P}(j\in X_{t+1}|X_{t}) = 1 - \exp(X_{t} \cdot \theta_{i})$ \\[3ex] +Always Susceptible & Susceptible until infected \\ +\includegraphics[scale=.4]{../images/voter_model.pdf} & \includegraphics[scale=.3]{../images/icc_model.pdf} \\ +\end{tabular} +\end{table} +\end{frame} + +\begin{frame} +\frametitle{Generalized Linear Cascade Models} +\begin{definition} +{\bf Generalized Linear Cascade Model} with inverse link function $f : \mathbb{R} \rightarrow [0,1]$: +\begin{itemize} +\item for each \emph{susceptible} node $j$ in state $s$ at $t$, $\mathbb{P}(j \in X^{t+1}|X^t)$ is a Bernoulli of parameter $f(\theta_j \cdot X^t)$ +\end{itemize} +\end{definition} +\end{frame} + +\begin{frame} +\frametitle{Sparse Recovery} +\begin{figure} +\includegraphics[scale=.6]{../images/sparse_recovery_illustration.pdf} +\caption{$f(X\cdot\theta) = b$} +\end{figure} +\end{frame} + + +\begin{frame} +\frametitle{$\ell1$ penalized Maximum Likelihood} +\begin{itemize} +\item Decomposable node by node +\item Sum over susceptible steps +\end{itemize} + +\begin{block}{Likelihood function} +\begin{equation} +\nonumber +{\cal L}(\theta| x^1, \dots x^n) = \frac{1}{{\cal T}_i} \sum_{t \in {\cal T}_i} x^{t+1}_i \log f(\theta_i \cdot x^t) + (1 - x^{t+1}_i) \log(1 - f(\theta_i \cdot x^t)) +\end{equation} +\end{block} + +\begin{block}{Algorithm} +\begin{equation} +\nonumber +\theta \in \arg \max_\theta {\cal L}(\theta| x^1, \dots x^n) - \lambda \|\theta\|_1 +\end{equation} +\end{block} + +\end{frame} + +\begin{frame} +\frametitle{Main Result} +\begin{theorem} +Assume condition on the Hessian and certain regularity properties on $f$, then $\exists C>0$ depending only on the properties of the ${\cal G}$, with high probability: +$$\| \theta^*_i - \hat \theta_i \|_2 \leq C\sqrt{\frac{s\log m}{n}}$$ +\end{theorem} + +\begin{corollary} +By thresholding $\hat \theta_i$, if $n > C' s \log m$, we recover the support of $\theta^*$ and therefore the edges of ${\cal G}$ +\end{corollary} + +\end{frame} + +\begin{frame} +\frametitle{Approximate Sparsity} +\begin{itemize} +\item $\theta^*_{\lceil s \rceil}$ best s-sparse approximation to $\theta^*$ +\item $\|\theta^* - \theta^*_{\lceil s \rceil} \|_1$: `tail' of $\theta^*$ +\end{itemize} +\begin{theorem} +Assume condition on Hessian and certain regularity properties on $f$, then $\exists C_1, C_2>0$ depending only on the properties of ${\cal G}$, with high probability: +\begin{equation} +\|\hat \theta_i - \theta^*_i\|_2 \leq C_1 \sqrt{\frac{s\log m}{n}} + C_2 \sqrt[4]{\frac{s\log m}{n}}\|\theta^* - \theta^*_{\lceil s \rceil} \|_1 +\end{equation} +\end{theorem} +\end{frame} + +\begin{frame} +\frametitle{Lower Bound} +\begin{itemize} +\item Under correlation decay assumption for the IC model, ${\Omega}(s \log N/s)$ cascades necessary for graph reconstruction (Netrapalli et Sanghavi SIGMETRICS'12) +\item Adapting (Price \& Woodruff STOC'12), in the approximately sparse case, any algorithm for any generalized linear cascade model such that: +$$\|\hat \theta - \theta^*\|_2 \leq C \|\theta^* - \theta^*_{\lfloor s \rfloor}\|_2$$ +requires ${\cal O}(s \log (n/s)/\log C)$ measurement. +\end{itemize} +\end{frame} + +\begin{frame} +\frametitle{(RE) assumptions} +\begin{block}{Assumption on Hessian} +\begin{itemize} +\item +Hessian has to verify a `restricted eigenvalue property' i.e smallest eigenvalue on sparse vectors is away from $0$. +\end{itemize} +\end{block} + +\begin{block}{From Hessian to Gram Matrix} +\begin{itemize} +\item $\mathbb{E}[X X^T]$ : `expected' Gram matrix of observations +\item $\mathbb{E}[X X^T]_{i,i}$ : $\mathbb{P}$ that node $i$ is infected +\item $\mathbb{E}[X X^T]_{i,j}$ : $\mathbb{P} $that node $i$ and node $j$ are infected simultaneously +\end{itemize} +\end{block} +\end{frame} + +\begin{frame} +\frametitle{Future Work} + +\begin{block}{Linear Threshold Model} +\begin{itemize} +\item Linear threshold model is a generalized linear cascade, with non-differential inverse link function. $$\mathbb{P}(j \in X^{t+1}|X^t) = sign(\theta_j \cdot X^t - t_j)$$ +\end{itemize} +\end{block} + +\begin{block}{Noisy Influence Maximization} +\end{block} + +\begin{block}{Confidence Intervals} +\end{block} + +\begin{block}{Active Learning} +\end{block} +\end{frame} + +\end{document} diff --git a/notes/presentation/beamer_2.tex b/notes/presentation/beamer_2.tex new file mode 100644 index 0000000..fda5b8d --- /dev/null +++ b/notes/presentation/beamer_2.tex @@ -0,0 +1,240 @@ +\documentclass[10pt]{beamer} + +\usepackage{amssymb, amsmath, graphicx, amsfonts, color, amsthm} + +\newtheorem{proposition}{Proposition} + +\title{Learning from Diffusion processes} +\subtitle{What cascades really teach us about networks} +\author{Jean (John) Pouget-Abadie \\ Joint Work with Thibaut (T-bo) Horel} + +\begin{document} + +\begin{frame} +\titlepage +\end{frame} + +\begin{frame} +\frametitle{Introduction} + +%notes: Learn what? the network, the parameters of the diffusion process. + +\begin{table} +\centering +\begin{tabular}{c | c} +Network & Diffusion process \\[1ex] +\hline +\\ +Airports & Infectious diseases (SARS) \\ + & Delays (Eyjafjallajökull) \\[3ex] +Social Network & Infectious diseases (flu) \\ + & Behaviors (Ice Bucket Challenge) \\[3ex] +Internet/WWW & Information diffusion (Memes, Pirated content \dots) +\end{tabular} +\end{table} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Introduction} + +What do we know? What do we want to know? + +\begin{itemize} +\item We know the {\bf airport network} structure. We observe delays. Can we learn how delays propagate? +\item We (sometimes) know the {\bf social network}. We observe behaviors. Can we learn who influences whom? +\item Rarely know {\bf blog network}. We observe discussions. Can we learn who learns from whom? +\end{itemize} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Independent Cascade Model} + +\begin{figure} +\includegraphics[scale=.5]{figures/weighted_graph.png} +\caption{Weighted, directed graph} +\end{figure} + +\begin{itemize} +\item Three states: susceptible, {\color{blue} infected}, {\color{red} dead} +\item Each {\color{blue} infected} node $i$ has a probability $p_{i,j}$ of infecting each of his neighbors $j$. +\item A node stays {\color{blue} infected} for one round, then it {\color{red} dies} +\item At $t=0$, each node is {\color{blue} infected} with probability $p_{\text{init}}$ +\end{itemize} + +%Notes: Revisit the celebrated independent cascade model -> Influence maximisation is tractable, requires knowledge of weights + +\end{frame} + + +%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Independent Cascade Model} + +\begin{figure} +\includegraphics[scale=.5]{figures/weighted_graph.png} +\caption{Weighted, directed graph} +\end{figure} + +\begin{itemize} +\item If $3$ and $4$ are {\color{blue} infected} at $t$, what is the probability that node $0$ is infected at $t+1$? +$$1 - \mathbb{P}(\text{not infected}) = 1 - (1 - .45)(1-.04)$$ +\item In general, $X_t$ {\color{blue} infected} nodes at t: +$$\mathbb{P}(j \text{ becomes infected at t+1}|X_{t}) = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j})$$ +\end{itemize} + + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Independent Cascade Model} +\begin{proposition} +The ICC, conditioned on previous time step, can be cast as a {\bf generalized linear model} +$$\mathbb{P}(j \in X_{t+1} | X_t) = f(X_t \cdot \theta_j)$$ +\end{proposition} + +\begin{proof} +\begin{align} +\mathbb{P}(j\in X_{t+1}|X_{t}) & = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j}) \\ +& = 1 - \exp \left[ \sum_{i \in {\cal N}(j) \cap X_{t}} \log(1 - p_{i,j}) \right] \\ +& = 1 - \exp \left[ X_{t} \cdot \theta_{j}\right] +\end{align} +\end{proof} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Independent Cascade Model} +\begin{block}{Decomposability} +\begin{itemize} +\item Conditioned on $X_t$, the state of each node is sampled independently +\item We can focus on learning vector $\theta_{j}$ for each node +\end{itemize} +\end{block} + +\begin{block}{Sparsity} +\begin{itemize} +\item $\theta_{i,j} = 0 \Leftrightarrow p_{i,j} = 0$ +\item If graph is ``sparse'', then $\theta_j$ is sparse. +\end{itemize} +\end{block} +\end{frame} + + + +%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Sparse Recovery} +\begin{figure} +\includegraphics[scale=.6]{../images/sparse_recovery_illustration.pdf} +\caption{$f(X_t\cdot \theta) = \mathbb{P}(j \in X_{t+1}| X_t)$} +\end{figure} +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Learning from Diffusion Processes} +\begin{block}{Problem Statement} +\begin{itemize} +\item We are given a graph ${\cal G}$, and a diffusion process parameterized by $\left((\theta_{i,j})_{i,j}, f, p_{\text{init}}\right)$. +\item Suppose we {\bf only} observe $(X_t)$ from the diffusion process. +\item Under what conditions can we learn $\theta_{i,j}$ for all $(i,j)$? How many $(X_t)$ are necessary? +\end{itemize} +\end{block} +\end{frame} + +%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Learning from Diffusion Processes} + +\begin{figure} +\includegraphics[scale=.4]{../images/sparse_recovery_illustration.pdf} +\caption{Generalized Linear Model for node $i$} +\end{figure} + +\begin{block}{Likelihood Function} +$${\cal L}(\theta| X_1, \dots X_N) = \frac{1}{{\cal T}_i} \sum_{t \in {\cal T}_i} x^{t+1}_i \log f(\theta_i \cdot x^t) + (1 - x^{t+1}_i) \log(1 - f(\theta_i \cdot x^t))$$ +\end{block} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Conditions} +\begin{block}{On $f$} +\begin{itemize} +\item $\log f$ and $\log (1-f)$ have to be concave +\item $\log f$ and $\log (1-f)$ have bounded gradient +\end{itemize} +\end{block} + +\begin{block}{On $(X_t)$} +\begin{itemize} +\item Want ${\cal H}$ be the hessian of ${\cal L}$ with respect to $\theta$ to be ``inversible'' +\item $ n < dim(\theta) \implies {\cal H}$ is degenerate. +\item {\bf Restricted Eigenvalue condition} = ``almost invertible'' on sparse vectors. +\end{itemize} +\end{block} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Restricted Eigenvalue Condition} + +\begin{definition} +Let $S$ be the set of parents of node $i$. +$${\cal C} := \{ \Delta : \|\Delta\|_2 = 1, \|\Delta_{\bar S}\| \leq 3 \| \Delta_S\|_1 \}$$ +${\cal H}$ verifies the $(S, \gamma)$-RE condition if: +$$\forall X \in {\cal C}, \Delta {\cal H} \Delta \geq \gamma$$ +\end{definition} +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +\frametitle{Main Result} +Adapting a result from \cite{Negahban:2009}, we have the following theorem: + +\begin{theorem} +Assume +\begin{itemize} +\item the Hessian verifies the $(S,\gamma)$-RE condition +\item $|(\log f)'| < \frac{1}{\alpha}$ and $|(\log 1- f)'| < \frac{1}{\alpha}$ +\end{itemize} then with high probability: +$$\| \theta^*_i - \hat \theta_i \|_2 \leq \frac{6}{\gamma}\sqrt{\frac{s\log m}{\alpha n}}$$ +\end{theorem} + +\begin{corollary} +By thresholding $\hat \theta_i$, if $n > C' s \log m$, we recover the support of $\theta^*$ and therefore the edges of ${\cal G}$ +\end{corollary} + +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} +Comments: Correlated Measurements +TODO: still need to mention somewhere that we are doing penalized log likelihood +condition on $X_t$ is not great, we would like to have condition on the parameters $\theta, p_{\text{init}}$ $->$ Slides about expected hessian +TODO: slide about matrice de gram! +\end{frame} + +\bibliography{../../paper/sparse} +\bibliographystyle{apalike} + +\end{document}
\ No newline at end of file diff --git a/notes/econcs_presentation/extended_abstract.txt b/notes/presentation/extended_abstract.txt index 47a12b9..47a12b9 100644 --- a/notes/econcs_presentation/extended_abstract.txt +++ b/notes/presentation/extended_abstract.txt diff --git a/notes/presentation/figures/Screen Shot 2015-03-08 at 13.08.01.png b/notes/presentation/figures/Screen Shot 2015-03-08 at 13.08.01.png Binary files differnew file mode 100644 index 0000000..b053f0c --- /dev/null +++ b/notes/presentation/figures/Screen Shot 2015-03-08 at 13.08.01.png diff --git a/notes/presentation/figures/weighted_graph.png b/notes/presentation/figures/weighted_graph.png Binary files differnew file mode 100644 index 0000000..7deccc3 --- /dev/null +++ b/notes/presentation/figures/weighted_graph.png diff --git a/notes/presentation/sparse.bib b/notes/presentation/sparse.bib new file mode 100644 index 0000000..5df4b59 --- /dev/null +++ b/notes/presentation/sparse.bib @@ -0,0 +1,503 @@ +@article {CandesRomberTao:2006, +author = {Candès, Emmanuel J. and Romberg, Justin K. and Tao, Terence}, +title = {Stable signal recovery from incomplete and inaccurate measurements}, +journal = {Communications on Pure and Applied Mathematics}, +volume = {59}, +number = {8}, +publisher = {Wiley Subscription Services, Inc., A Wiley Company}, +issn = {1097-0312}, +pages = {1207--1223}, +year = {2006}, +} + + +@inproceedings{GomezRodriguez:2010, + author = {Gomez Rodriguez, Manuel and Leskovec, Jure and Krause, Andreas}, + title = {Inferring Networks of Diffusion and Influence}, + booktitle = {Proceedings of the 16th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining}, + series = {KDD '10}, + year = {2010}, + isbn = {978-1-4503-0055-1}, + location = {Washington, DC, USA}, + pages = {1019--1028}, + numpages = {10}, + publisher = {ACM}, + address = {New York, NY, USA}, +} + + +@article{Netrapalli:2012, + author = {Netrapalli, Praneeth and Sanghavi, Sujay}, + title = {Learning the Graph of Epidemic Cascades}, + journal = {SIGMETRICS Perform. Eval. Rev.}, + volume = {40}, + number = {1}, + month = {June}, + year = {2012}, + issn = {0163-5999}, + pages = {211--222}, + numpages = {12}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {cascades, epidemics, graph structure learning}, +} + +@article{Negahban:2009, + author = {Negahban, Sahand N. and Ravikumar, Pradeep and Wrainwright, Martin J. and Yu, Bin}, + title = {A Unified Framework for High-Dimensional Analysis of M-Estimators with Decomposable Regularizers}, + Journal = {Statistical Science}, + year = {2012}, + month = {December}, + volume = {27}, + number = {4}, + pages = {538--557}, +} + +@article{Zhao:2006, + author = {Zhao, Peng and Yu, Bin}, + title = {On Model Selection Consistency of Lasso}, + journal = {J. Mach. Learn. Res.}, + issue_date = {12/1/2006}, + volume = {7}, + month = dec, + year = {2006}, + issn = {1532-4435}, + pages = {2541--2563}, + numpages = {23}, + url = {http://dl.acm.org/citation.cfm?id=1248547.1248637}, + acmid = {1248637}, + publisher = {JMLR.org}, +} + +@inproceedings{Daneshmand:2014, + author = {Hadi Daneshmand and + Manuel Gomez{-}Rodriguez and + Le Song and + Bernhard Sch{\"{o}}lkopf}, + title = {Estimating Diffusion Network Structures: Recovery Conditions, Sample + Complexity {\&} Soft-thresholding Algorithm}, + booktitle = {Proceedings of the 31th International Conference on Machine Learning, + {ICML} 2014, Beijing, China, 21-26 June 2014}, + pages = {793--801}, + year = {2014}, + url = {http://jmlr.org/proceedings/papers/v32/daneshmand14.html}, + timestamp = {Fri, 07 Nov 2014 20:42:30 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/icml/DaneshmandGSS14}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{Kempe:03, + author = {David Kempe and + Jon M. Kleinberg and + {\'{E}}va Tardos}, + title = {Maximizing the spread of influence through a social network}, + booktitle = {Proceedings of the Ninth {ACM} {SIGKDD} International Conference on + Knowledge Discovery and Data Mining, Washington, DC, USA, August 24 + - 27, 2003}, + pages = {137--146}, + year = {2003}, + url = {http://doi.acm.org/10.1145/956750.956769}, + doi = {10.1145/956750.956769}, + timestamp = {Mon, 13 Feb 2006 15:34:20 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/kdd/KempeKT03}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{Abrahao:13, + author = {Bruno D. Abrahao and + Flavio Chierichetti and + Robert Kleinberg and + Alessandro Panconesi}, + title = {Trace complexity of network inference}, + booktitle = {The 19th {ACM} {SIGKDD} International Conference on Knowledge Discovery + and Data Mining, {KDD} 2013, Chicago, IL, USA, August 11-14, 2013}, + pages = {491--499}, + year = {2013}, + url = {http://doi.acm.org/10.1145/2487575.2487664}, + doi = {10.1145/2487575.2487664}, + timestamp = {Tue, 10 Sep 2013 10:11:57 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/kdd/AbrahaoCKP13}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + + +@article{vandegeer:2009, +author = "van de Geer, Sara A. and B{\"u}hlmann, Peter", +doi = "10.1214/09-EJS506", +fjournal = "Electronic Journal of Statistics", +journal = "Electron. J. Statist.", +pages = "1360--1392", +publisher = "The Institute of Mathematical Statistics and the Bernoulli Society", +title = "On the conditions used to prove oracle results for the Lasso", +url = "http://dx.doi.org/10.1214/09-EJS506", +volume = "3", +year = "2009" +} + +@article{vandegeer:2011, +author = "van de Geer, Sara and Bühlmann, Peter and Zhou, Shuheng", +doi = "10.1214/11-EJS624", +fjournal = "Electronic Journal of Statistics", +journal = "Electron. J. Statist.", +pages = "688--749", +publisher = "The Institute of Mathematical Statistics and the Bernoulli Society", +title = "The adaptive and the thresholded Lasso for potentially misspecified models (and a lower bound for the Lasso)", +url = "http://dx.doi.org/10.1214/11-EJS624", +volume = "5", +year = "2011" +} + +@article{Zou:2006, +author = {Zou, Hui}, +title = {The Adaptive Lasso and Its Oracle Properties}, +journal = {Journal of the American Statistical Association}, +volume = {101}, +number = {476}, +pages = {1418-1429}, +year = {2006}, +doi = {10.1198/016214506000000735}, +URL = {http://dx.doi.org/10.1198/016214506000000735}, +} + +@article{Jacques:2013, + author = {Laurent Jacques and + Jason N. Laska and + Petros T. Boufounos and + Richard G. Baraniuk}, + title = {Robust 1-Bit Compressive Sensing via Binary Stable Embeddings of Sparse + Vectors}, + journal = {{IEEE} Transactions on Information Theory}, + volume = {59}, + number = {4}, + pages = {2082--2102}, + year = {2013}, + url = {http://dx.doi.org/10.1109/TIT.2012.2234823}, + doi = {10.1109/TIT.2012.2234823}, + timestamp = {Tue, 09 Apr 2013 19:57:48 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/tit/JacquesLBB13}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{Boufounos:2008, + author = {Petros Boufounos and + Richard G. Baraniuk}, + title = {1-Bit compressive sensing}, + booktitle = {42nd Annual Conference on Information Sciences and Systems, {CISS} + 2008, Princeton, NJ, USA, 19-21 March 2008}, + pages = {16--21}, + year = {2008}, + url = {http://dx.doi.org/10.1109/CISS.2008.4558487}, + doi = {10.1109/CISS.2008.4558487}, + timestamp = {Wed, 15 Oct 2014 17:04:27 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/ciss/BoufounosB08}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{Gupta:2010, + author = {Ankit Gupta and + Robert Nowak and + Benjamin Recht}, + title = {Sample complexity for 1-bit compressed sensing and sparse classification}, + booktitle = {{IEEE} International Symposium on Information Theory, {ISIT} 2010, + June 13-18, 2010, Austin, Texas, USA, Proceedings}, + pages = {1553--1557}, + year = {2010}, + url = {http://dx.doi.org/10.1109/ISIT.2010.5513510}, + doi = {10.1109/ISIT.2010.5513510}, + timestamp = {Thu, 15 Jan 2015 17:11:50 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/isit/GuptaNR10}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{Plan:2014, + author = {Yaniv Plan and + Roman Vershynin}, + title = {Dimension Reduction by Random Hyperplane Tessellations}, + journal = {Discrete {\&} Computational Geometry}, + volume = {51}, + number = {2}, + pages = {438--461}, + year = {2014}, + url = {http://dx.doi.org/10.1007/s00454-013-9561-6}, + doi = {10.1007/s00454-013-9561-6}, + timestamp = {Tue, 11 Feb 2014 13:48:56 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/dcg/PlanV14}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{bickel:2009, +author = "Bickel, Peter J. and Ritov, Ya’acov and Tsybakov, Alexandre B.", +doi = "10.1214/08-AOS620", +fjournal = "The Annals of Statistics", +journal = "Ann. Statist.", +month = "08", +number = "4", +pages = "1705--1732", +publisher = "The Institute of Mathematical Statistics", +title = "Simultaneous analysis of Lasso and Dantzig selector", +url = "http://dx.doi.org/10.1214/08-AOS620", +volume = "37", +year = "2009" +} + +@article{raskutti:10, + author = {Garvesh Raskutti and + Martin J. Wainwright and + Bin Yu}, + title = {Restricted Eigenvalue Properties for Correlated Gaussian Designs}, + journal = {Journal of Machine Learning Research}, + volume = {11}, + pages = {2241--2259}, + year = {2010}, + url = {http://portal.acm.org/citation.cfm?id=1859929}, + timestamp = {Wed, 15 Oct 2014 17:04:32 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/jmlr/RaskuttiWY10}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{rudelson:13, + author = {Mark Rudelson and + Shuheng Zhou}, + title = {Reconstruction From Anisotropic Random Measurements}, + journal = {{IEEE} Transactions on Information Theory}, + volume = {59}, + number = {6}, + pages = {3434--3447}, + year = {2013}, + url = {http://dx.doi.org/10.1109/TIT.2013.2243201}, + doi = {10.1109/TIT.2013.2243201}, + timestamp = {Tue, 21 May 2013 14:15:50 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/tit/RudelsonZ13}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{bipw11, + author = {Khanh Do Ba and + Piotr Indyk and + Eric Price and + David P. Woodruff}, + title = {Lower Bounds for Sparse Recovery}, + journal = {CoRR}, + volume = {abs/1106.0365}, + year = {2011}, + url = {http://arxiv.org/abs/1106.0365}, + timestamp = {Mon, 05 Dec 2011 18:04:39 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/abs-1106-0365}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{pw11, + author = {Eric Price and + David P. Woodruff}, + title = {{(1} + eps)-Approximate Sparse Recovery}, + booktitle = {{IEEE} 52nd Annual Symposium on Foundations of Computer Science, {FOCS} + 2011, Palm Springs, CA, USA, October 22-25, 2011}, + pages = {295--304}, + year = {2011}, + crossref = {DBLP:conf/focs/2011}, + url = {http://dx.doi.org/10.1109/FOCS.2011.92}, + doi = {10.1109/FOCS.2011.92}, + timestamp = {Tue, 16 Dec 2014 09:57:24 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/focs/PriceW11}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@proceedings{DBLP:conf/focs/2011, + editor = {Rafail Ostrovsky}, + title = {{IEEE} 52nd Annual Symposium on Foundations of Computer Science, {FOCS} + 2011, Palm Springs, CA, USA, October 22-25, 2011}, + publisher = {{IEEE} Computer Society}, + year = {2011}, + url = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6108120}, + isbn = {978-1-4577-1843-4}, + timestamp = {Mon, 15 Dec 2014 18:48:45 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/focs/2011}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{pw12, + author = {Eric Price and + David P. Woodruff}, + title = {Applications of the Shannon-Hartley theorem to data streams and sparse + recovery}, + booktitle = {Proceedings of the 2012 {IEEE} International Symposium on Information + Theory, {ISIT} 2012, Cambridge, MA, USA, July 1-6, 2012}, + pages = {2446--2450}, + year = {2012}, + crossref = {DBLP:conf/isit/2012}, + url = {http://dx.doi.org/10.1109/ISIT.2012.6283954}, + doi = {10.1109/ISIT.2012.6283954}, + timestamp = {Mon, 01 Oct 2012 17:34:07 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/isit/PriceW12}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@proceedings{DBLP:conf/isit/2012, + title = {Proceedings of the 2012 {IEEE} International Symposium on Information + Theory, {ISIT} 2012, Cambridge, MA, USA, July 1-6, 2012}, + publisher = {{IEEE}}, + year = {2012}, + url = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6268627}, + isbn = {978-1-4673-2580-6}, + timestamp = {Mon, 01 Oct 2012 17:33:45 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/isit/2012}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{Leskovec:2010, + author = {Jure Leskovec and + Deepayan Chakrabarti and + Jon M. Kleinberg and + Christos Faloutsos and + Zoubin Ghahramani}, + title = {Kronecker Graphs: An Approach to Modeling Networks}, + journal = {Journal of Machine Learning Research}, + volume = {11}, + pages = {985--1042}, + year = {2010}, + url = {http://doi.acm.org/10.1145/1756006.1756039}, + doi = {10.1145/1756006.1756039}, + timestamp = {Thu, 22 Apr 2010 13:26:26 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/jmlr/LeskovecCKFG10}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{Holme:2002, + author= {Petter Holme and Beom Jun Kim}, + title = {Growing scale-free networks with tunable clustering}, + journal = {Physical review E}, + volume = {65}, + issue = {2}, + pages = {026--107}, + year = {2002} +} + + +@article{watts:1998, + Annote = {10.1038/30918}, + Author = {Watts, Duncan J. and Strogatz, Steven H.}, + Date = {1998/06/04/print}, + Isbn = {0028-0836}, + Journal = {Nature}, + Number = {6684}, + Pages = {440--442}, + Read = {0}, + Title = {Collective dynamics of `small-world' networks}, + Url = {http://dx.doi.org/10.1038/30918}, + Volume = {393}, + Year = {1998}, +} + +@article{barabasi:2001, + author = {R{\'{e}}ka Albert and + Albert{-}L{\'{a}}szl{\'{o}} Barab{\'{a}}si}, + title = {Statistical mechanics of complex networks}, + journal = {CoRR}, + volume = {cond-mat/0106096}, + year = {2001}, + url = {http://arxiv.org/abs/cond-mat/0106096}, + timestamp = {Mon, 05 Dec 2011 18:05:15 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/cond-mat-0106096}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + + +@article{gomezbalduzzi:2011, + author = {Manuel Gomez{-}Rodriguez and + David Balduzzi and + Bernhard Sch{\"{o}}lkopf}, + title = {Uncovering the Temporal Dynamics of Diffusion Networks}, + journal = {CoRR}, + volume = {abs/1105.0697}, + year = {2011}, + url = {http://arxiv.org/abs/1105.0697}, + timestamp = {Mon, 05 Dec 2011 18:05:23 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/abs-1105-0697}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{Nowell08, + author = {Liben-Nowell, David and Kleinberg, Jon}, + biburl = {http://www.bibsonomy.org/bibtex/250b9b1ca1849fa9cb8bb92d6d9031436/mkroell}, + doi = {10.1073/pnas.0708471105}, + eprint = {http://www.pnas.org/content/105/12/4633.full.pdf+html}, + journal = {Proceedings of the National Academy of Sciences}, + keywords = {SNA graph networks}, + number = 12, + pages = {4633-4638}, + timestamp = {2008-10-09T10:32:56.000+0200}, + title = {{Tracing information flow on a global scale using Internet chain-letter data}}, + url = {http://www.pnas.org/content/105/12/4633.abstract}, + volume = 105, + year = 2008 +} + +@inproceedings{Leskovec07, + author = {Jure Leskovec and + Mary McGlohon and + Christos Faloutsos and + Natalie S. Glance and + Matthew Hurst}, + title = {Patterns of Cascading Behavior in Large Blog Graphs}, + booktitle = {Proceedings of the Seventh {SIAM} International Conference on Data + Mining, April 26-28, 2007, Minneapolis, Minnesota, {USA}}, + pages = {551--556}, + year = {2007}, + url = {http://dx.doi.org/10.1137/1.9781611972771.60}, + doi = {10.1137/1.9781611972771.60}, + timestamp = {Wed, 12 Feb 2014 17:08:15 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/sdm/LeskovecMFGH07}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + + +@inproceedings{AdarA05, + author = {Eytan Adar and + Lada A. Adamic}, + title = {Tracking Information Epidemics in Blogspace}, + booktitle = {2005 {IEEE} / {WIC} / {ACM} International Conference on Web Intelligence + {(WI} 2005), 19-22 September 2005, Compiegne, France}, + pages = {207--214}, + year = {2005}, + url = {http://dx.doi.org/10.1109/WI.2005.151}, + doi = {10.1109/WI.2005.151}, + timestamp = {Tue, 12 Aug 2014 16:59:16 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/webi/AdarA05}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@inproceedings{Kleinberg:00, + author = {Jon M. Kleinberg}, + title = {The small-world phenomenon: an algorithm perspective}, + booktitle = {Proceedings of the Thirty-Second Annual {ACM} Symposium on Theory + of Computing, May 21-23, 2000, Portland, OR, {USA}}, + pages = {163--170}, + year = {2000}, + url = {http://doi.acm.org/10.1145/335305.335325}, + doi = {10.1145/335305.335325}, + timestamp = {Thu, 16 Feb 2012 12:06:08 +0100}, + biburl = {http://dblp.uni-trier.de/rec/bib/conf/stoc/Kleinberg00}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{zhang2014, + title={Confidence intervals for low dimensional parameters in high dimensional linear models}, + author={Zhang, Cun-Hui and Zhang, Stephanie S}, + journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)}, + volume={76}, + number={1}, + pages={217--242}, + year={2014}, + publisher={Wiley Online Library} +} + +@article{javanmard2014, + title={Confidence intervals and hypothesis testing for high-dimensional regression}, + author={Javanmard, Adel and Montanari, Andrea}, + journal={The Journal of Machine Learning Research}, + volume={15}, + number={1}, + pages={2869--2909}, + year={2014}, + publisher={JMLR. org} +} |
