aboutsummaryrefslogtreecommitdiffstats
path: root/presentation/WWW15/beamer_3.tex
diff options
context:
space:
mode:
Diffstat (limited to 'presentation/WWW15/beamer_3.tex')
-rw-r--r--presentation/WWW15/beamer_3.tex603
1 files changed, 603 insertions, 0 deletions
diff --git a/presentation/WWW15/beamer_3.tex b/presentation/WWW15/beamer_3.tex
new file mode 100644
index 0000000..0243b88
--- /dev/null
+++ b/presentation/WWW15/beamer_3.tex
@@ -0,0 +1,603 @@
+\documentclass[10pt]{beamer}
+
+\usepackage{amssymb, amsmath, graphicx, amsfonts, color, amsthm, wasysym, framed}
+
+\newtheorem{proposition}{Proposition}
+
+\title{Learning from Diffusion processes}
+\subtitle{What cascades really teach us about networks}
+\author{Jean Pouget-Abadie, Thibaut Horel}
+\institute[]{\includegraphics[scale=.35]{figures/SEASLogo_RGB.png}}
+\begin{document}
+
+\begin{frame}
+\titlepage
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Introduction}
+
+\begin{itemize}
+\item If we observe who catches the flu and when over several years, can we guess {\bf who is friends with whom?}
+\item If we observe behaviors spreading on Facebook (\emph{Ice bucket challenge, Je suis Charlie}), can we know {\bf who is most likely to influence you?}
+\item If we observe memes/ideas on internet, can we tell {\bf which blogs/commmunities take their information from which source?}
+\end{itemize}
+
+\end{frame}
+
+%% Intuition says yes --> this problem can be formalized and is known as the
+
+%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Introduction}
+
+\begin{definition}{{\bf Network Inference Problem} {\small \cite{GomezRodriguez:2010}} }
+
+
+If \emph{only} the diffusion process is observed, but the network is \emph{unknown}:
+\begin{itemize}
+\item Can we learn the network?
+\item For which types of diffusion process?
+\item After how many observations?
+\end{itemize}
+\end{definition}
+
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Prior work}
+\begin{itemize}
+\item {\small \cite{GomezRodriguez:2010, gomezbalduzzi:2011, Abrahao:13, Daneshmand:2014}} : Continuous-time Independent Cascade-like diffusion model
+\item {\small \cite{Netrapalli:2012}}: Discrete-time Independent Cascade Model (formalized in {\small \cite{Kempe:03}}) with correlation decay
+\item {\small \cite{?}}
+\end{itemize}
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Discrete-time Independent Cascade Model}
+
+\begin{figure}
+\includegraphics[scale=.3]{figures/ic_illustration.pdf}
+\caption{Weighted, directed graph}
+\end{figure}
+
+\begin{itemize}
+
+\item At $t=0$, each node is {\color{blue} infected} with probability $p_{\text{init}}$ and {\bf susceptible} with probability $1-p_{\text{init}}$
+
+\item Each {\color{blue} infected} node $i$ has a ``one-shot'' probability $p_{i,j}$ of infecting each of its susceptible neighbors $j$ at $t+1$.
+
+\item A node stays {\color{blue} infected} for one round, then it {\color{red} dies}
+
+\item Process continues until random time $T$ when no more {\bf susceptible} nodes are left
+\end{itemize}
+
+%Notes: Revisit the celebrated independent cascade model -> Influence maximisation is tractable, requires knowledge of weights
+\end{frame}
+
+% %%%%%%%%%%%%%%%%%%%%%%%%%
+
+% \begin{frame}
+% \frametitle{Discrete-time Independent Cascade Model}
+
+% \begin{figure}
+% \includegraphics[scale=.5]{figures/weighted_graph.png}
+% \caption{Weighted, directed graph}
+% \end{figure}
+
+% \begin{block}{Example}
+% \begin{itemize}
+% \item At $t=0$, the {\color{orange} orange} node is infected, and the two other nodes are susceptible. $X_0 = $\{{\color{orange} orange}\}
+% \item At $t=1$, the {\color{orange}} node infects the {\color{blue} blue} node and fails to infect the {\color{green} green} node. The {\color{orange} orange} node dies. $X_1 = $\{{\color{blue} blue}\}
+% \item At $t=2$, {\color{blue} blue} dies. $X_2 = \emptyset$
+% \end{itemize}
+% \end{block}
+
+% \end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Discrete-time Independent Cascade Model}
+
+\begin{figure}
+\includegraphics[scale=.5]{figures/weighted_graph.png}
+\caption{Weighted, directed graph}
+\end{figure}
+
+\begin{itemize}
+\item If the {\color{orange} orange} node and the {\color{green} green} node are infected at $t=0$, what is the probability that the {\color{blue} blue} node is infected at $t=1$?
+$$1 - \mathbb{P}(\text{not infected}) = 1 - (1 - .72)(1-.04)$$
+\end{itemize}
+
+
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%
+
+% \begin{frame}
+% \frametitle{Independent Cascade Model}
+% \begin{figure}
+% \includegraphics[scale=.5]{figures/weighted_graph.png}
+% \caption{Weighted, directed graph}
+% \end{figure}
+
+% \begin{itemize}
+% \item In general, for each susceptible node $j$:
+% $$\mathbb{P}(j \text{ becomes infected at t+1}|X_{t}) = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j})$$
+% \end{itemize}
+
+% \end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Independent Cascade Model}
+
+\begin{itemize}
+\item $X^t$: set of {\color{red} infected} nodes
+
+\item Probability that node $j$ gets infected at $t+1$:
+\begin{framed}
+ \begin{align*}
+ \tag{IC}
+ \mathbb{P}\big[X^{t+1}_j = 1\,|\, X^{t}\big]
+ & = 1 - \prod_{i = 1}^m {(1 - p_{i,j})}^{X^t_i} \\
+ & = 1 - \prod_{i = 1}^m e^{\Theta_{i,j}X^t_i} \\
+ & = 1 - e^{\Theta_j \cdot X^t}
+ \end{align*}
+ \end{framed}
+% \item $f: z \mapsto 1 - e^z$
+% \item $(j \in X_{t+1} | X_t) \sim {\cal B} \big(f(X_t \cdot \theta_j) \big)$
+\item $\Theta_{i,j} \equiv \log(1-p_{i,j})$
+\end{itemize}
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Voter Model}
+\begin{figure}
+\includegraphics[scale=.3]{figures/vt_illustration.pdf}
+\end{figure}
+\begin{itemize}
+\item Probability that node $j$ is infected at $t+1$:
+\begin{equation*}
+ \tag{VT}
+ \mathbb{P}\big[X^{t+1}_j = 1\,|\, X^{t}\big]
+ = \sum_{i \in X^t} p_{i,j} = p_j \cdot X^t
+\end{equation*}
+\end{itemize}
+
+\end{frame}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Generalized Linear Cascades}
+
+{\bf Generalized Linear Cascade Model}
+\begin{itemize}
+ \item $f: \mathbb{R} \rightarrow [0,1]$: inverse link function
+ \item Probability depends on $f$-transform of {\bf scalar product}:
+ \begin{framed}
+ $$\mathbb{P}(X^{t+1}_j = 1 | X^t) = f(\Theta_j \cdot X^t)$$
+ \end{framed}
+ \item {\bf Decomposable} node by node conditionally on infected nodes
+ \item Examples:
+ \begin{itemize}
+ \item IC model : $f: z \mapsto 1- e^z$
+ \item VT model : $f: z \mapsto z$
+ \item Discretized version of continuous diffusion model $f: z \mapsto 1-e^{-z}$
+ \end{itemize}
+\end{itemize}
+
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Setup}
+\begin{figure}
+\centering
+\includegraphics[scale=.6]{../images/drawing.pdf}
+\caption{$\mathbb{P}(j \in X_{t+1}| X_t) = f(X_t\cdot \theta)$}
+\end{figure}
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}
+\frametitle{Sparse Recovery}
+\begin{itemize}
+\item Solving for $Ax = b$ when $A$ is non-degenerate is possible if:
+\begin{itemize}
+\item A is {\bf almost invertible}
+\item x is {\bf sparse}
+\end{itemize}
+\item If $x$ is solution to $\min L(x)$ where $L$ is convex, then {\small \cite{Negahban:2009}} solving for:
+\begin{framed}
+\begin{equation*}
+\min_x L(x) + \lambda \| x \|
+\end{equation*}
+\end{framed}
+gives finite-sample guarantees under certain assumptions.
+\end{itemize}
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Result}
+{\bf Assumptions}
+\begin{itemize}
+\item $f$ and $1-f$ are $(1)$ log-concave and have $(2)$ log-gradient bounded by $\frac{1}{\alpha}$
+\item $\nabla^2{\cal L}$ verifies the $(S, \gamma)-({\bf RE})$ condition
+\end{itemize}
+\vspace{1cm}
+{\bf Algorithm}
+\begin{itemize}
+\item Solve $MLE$ program with $\lambda = 2 \sqrt{\frac{\log m}{\alpha n}}$:
+\begin{framed}
+\begin{equation*}
+ \hat \theta_i \in \arg \max_{\theta} {\cal L}_i(\theta_i | x^1,
+ \dots x^n) - \lambda \|\hat \theta_i\|_1
+\end{equation*}
+\end{framed}
+\end{itemize}
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Theorem}
+If previous assumptions are met, with high probability,
+ \begin{framed}
+ \begin{equation*}
+ \|\hat \theta - \theta^*\|_2 \leq \frac{6}{\gamma} \sqrt{\frac{s \log
+ m}{\alpha n}}
+ \end{equation*}
+ \end{framed}
+ where
+ \begin{itemize}
+ \item $s$ is degree of node,
+ \item $m$ is number of nodes,
+ \item $n$ is the number of observations,
+ \item $\alpha$ is the gradient bound,
+ \item $\gamma$ is the $({\bf RE})$-constant
+ \end{itemize}
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Restricted Eigenvalue Condition}
+\begin{block}{Almost sparse vectors}
+\begin{itemize}
+\item ${\cal C} := \{ X : \|X\|_2 = 1, \|X_{\bar S}\|_1 \leq 3 \| X_S\|_1 \}$
+\end{itemize}
+\end{block}
+\begin{definition}
+$A$ verifies the $(S, \gamma)$-({\bf RE}) condition \cite{bickel2009simultaneous} if:
+$$\forall X \in {\cal C}, X^T A X \geq \gamma$$
+\end{definition}
+\begin{block}{Properties}
+\begin{itemize}
+\item If $\mathbb{E}[A]$ verifies the $(S, \gamma)$-{(\bf RE)} condition,
+ then $A$ verifies the $(S, \gamma/2)$-{(\bf RE)}
+ condition~{\small \cite{vandegeer:2009}}
+\end{itemize}
+\end{block}
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Restricted Eigenvalue Condition}
+\begin{itemize}
+\item If $f$ and $1-f$ are $c$-strictly log-concave, then if the {\bf Gram matrix} verifies the $\gamma$-({\bf RE})-condition, then the Hessian verifies the $c\gamma$-({\bf RE})-condition.
+\end{itemize}
+
+\begin{align*}
+\mathbb{E}[X^T X] = \left( \begin{array}{ccc} a_1 & b_{1, 2} & b_{1, m} \\
+\dots & \dots & \dots \\
+b_{1, m} & \dots & a_m \end{array}\right)
+\end{align*}
+where
+\begin{itemize}
+\item $a_i \equiv$ ``average time node $i$ is infected''
+\item $b_{i,j} \equiv$ ``average time node $i$ and node $j$ are infected \emph{together}''
+\end{itemize}
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}
+\frametitle{Conclusion}
+\begin{itemize}
+\item Introduced Generalized Linear Cascades
+\item Better finite sample guarantees
+\item Interpretable conditions
+\item Lower bound+approx. sparse case developped in full paper~\cite{Pouget:2015}
+\end{itemize}
+\end{frame}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%
+
+% \begin{frame}
+% \frametitle{Independent Cascade Model}
+% For each susceptible node $j$, the event that it becomes {\color{blue} infected} conditioned on previous time step is a Bernoulli variable:
+% $$(j \in X_{t+1} | X_t) \sim {\cal B} \big(f(X_t \cdot \theta_j) \big)$$
+% \begin{itemize}
+% \item $\theta_{i,j} := \log(1 - p_{i,j})$
+% \item $\theta_j := (0, 0, 0, \theta_{4,j}, 0 \dots, \theta_{k,j}, \dots)$
+% \item $f : x \mapsto 1 - e^x$
+% \begin{align*}
+% \mathbb{P}(j\in X_{t+1}|X_{t}) & = 1 - \prod_{i \in {\cal N}(j) \cap X_{t}} (1 - p_{i,j}) \\
+% & = 1 - \exp \left[ \sum_{i \in {\cal N}(j) \cap X_{t}} \log(1 - p_{i,j}) \right] \\
+% & = 1 - \exp \left[ X_{t} \cdot \theta_{j}\right]
+% \end{align*}
+% \end{itemize}
+% \end{frame}
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%
+
+% \begin{frame}
+% \frametitle{Independent Cascade Model}
+% For each susceptible node $j$, the event that it becomes {\color{blue} infected} conditioned on previous time step is a Bernoulli variable:
+% $$(j \in X_{t+1} | X_t) \sim {\cal B} \big(f(X_t \cdot \theta_j) \big)$$
+
+% \begin{block}{Decomposability}
+% \begin{itemize}
+% \item Conditioned on $X_t$, the state of the nodes at the next time step are mutually independent
+% \item We can learn the parents of each node independently
+% \end{itemize}
+% \end{block}
+
+% \begin{block}{Sparsity}
+% \begin{itemize}
+% \item $\theta_{i,j} = 0 \Leftrightarrow \log(1 - p_{i,j}) = 0 \Leftrightarrow p_{i,j} = 0$
+% \item If graph is ``sparse'', then $p_{j}$ is sparse, then $\theta_j$ is sparse.
+% \end{itemize}
+% \end{block}
+% \end{frame}
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%
+
+% \begin{frame}
+% \frametitle{Learning from Diffusion Processes}
+% \begin{block}{Problem Statement}
+% \begin{itemize}
+% \item We are given a graph ${\cal G}$, and a diffusion process $f$ parameterized by $\left((\theta_j)_j, p_{\text{init}}\right)$.
+% \item Suppose we {\bf only} observe $(X_t)$ from the diffusion process.
+% \item Under what conditions can we learn $\theta_{i,j}$ for all $(i,j)$?
+% \end{itemize}
+% \end{block}
+% \end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%
+
+% \begin{frame}
+% \frametitle{Sparse Recovery}
+% \begin{figure}
+% \centering
+% \includegraphics[scale=.6]{../images/drawing.pdf}
+% \caption{$\mathbb{P}(j \in X_{t+1}| X_t) = f(X_t\cdot \theta)$}
+% \end{figure}
+% \end{frame}
+
+
+
+%%%%%%%%%%%%%%%%%%%%%
+
+% \begin{frame}
+% \frametitle{Learning from Diffusion Processes}
+
+% % \begin{figure}
+% % \includegraphics[scale=.4]{../images/sparse_recovery_illustration.pdf}
+% % \caption{Generalized Cascade Model for node $i$}
+% % \end{figure}
+
+% \begin{block}{Likelihood Function}
+% \begin{align*}
+% {\cal L}(\theta_1, \dots, \theta_m| X_1, \dots X_n) = \sum_{i=1}^m \sum_{t} & X_{t+1}^i \log f(\theta_i \cdot X_t) + \\
+% & (1 - X_{t+1}^i) \log(1 - f(\theta_i \cdot X_t))
+% \end{align*}
+% \end{block}
+
+% \begin{block}{Penalized log-likelihood}
+% For each node $i$, $$\theta_i \in \arg \max \frac{1}{n_i}{\cal {L}}_i(\theta_i | X_1, X_2, \dots, X_{n_i}) - \lambda \|\theta_i\|_1$$
+% \end{block}
+
+% \end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%
+
+% \begin{frame}
+% \frametitle{Conditions}
+% \begin{block}{On $f$}
+% \begin{itemize}
+% \item $\log f$ and $\log (1-f)$ have to be concave
+% \item $\log f$ and $\log (1-f)$ have to have bounded gradient
+% \end{itemize}
+% \end{block}
+
+% \begin{block}{On $(X_t)$}
+% \begin{itemize}
+% \item Want ${\cal H}$, the hessian of ${\cal L}$ with respect to $\theta$, to be well-conditioned.
+% \item $ n < dim(\theta) \implies {\cal H}$ is degenerate.
+% \item {\bf Restricted Eigenvalue condition} = invertible on ``almost sparse'' vectors.
+% \end{itemize}
+% \end{block}
+
+% \end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+% %%%%%%%%%%%%%%%%%%%%%%%%
+
+% \begin{frame}
+% \frametitle{Main Result}
+% Adapting a result from \cite{Negahban:2009}, we have the following theorem:
+
+% \begin{theorem}
+% For node $i$, assume
+% \begin{itemize}
+% \item the Hessian verifies the $(S,\gamma)$-RE condition where $S$ is the set of parents of node $i$ (support of $\theta_i$)
+% \item $f$ and $1-f$ are log-concave
+% \item $|(\log f)'| < \frac{1}{\alpha}$ and $|(\log 1- f)'| < \frac{1}{\alpha}$
+% \end{itemize} then with high probability:
+% $$\| \theta^*_i - \hat \theta_i \|_2 \leq \frac{6}{\gamma}\sqrt{\frac{s\log m}{\alpha n}}$$
+% \end{theorem}
+
+% \begin{corollary}
+% By thresholding $\hat \theta_i$, if $n > C' s \log m$, we recover the support of $\theta^*$ and therefore the edges of ${\cal G}$
+% \end{corollary}
+
+% \end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%
+
+
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%
+
+% \begin{frame}
+% \frametitle{Voter Model}
+% \begin{itemize}
+
+% \item {\color{red} Red} and {\color{blue} Blue} nodes. At every step, each node $i$ chooses one of its neighbors $j$ with probability $p_{j,i}$ and adopts that color at $t+1$
+
+% \item If {\color{blue} Blue} is `contagious' state:
+
+% \begin{equation}
+% \nonumber
+% \mathbb{P}(i \in X^{t+1}|X^t) = \sum_{j \in {\cal N}(i)\cap X^t} p_{ji} = X^t \cdot \theta_i
+% \end{equation}
+% \end{itemize}
+% \end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Future Work}
+\begin{itemize}
+\item Lower bound restricted eigenvalues of expected gram matrix
+\item Confidence Intervals
+\item Show that $n > C' s \log m$ measurements are necessary w.r.t. expected hessian.
+\item Linear Threshold model $\rightarrow$ 1-bit compressed sensing formulation
+\item Better lower bounds
+\item Active Learning
+\end{itemize}
+\end{frame}
+
+
+%%%%%%%%%%%%%%%%%
+{\scriptsize
+\bibliography{../../paper/sparse}
+\bibliographystyle{apalike}
+}
+%%%%%%%%%%%%%%%%%%%
+
+
+\begin{frame}
+\frametitle{Analysis}
+
+\begin{block}{Guarantees}
+\begin{itemize}
+\item Positive result despite correlated measurements \smiley
+\item Several measurements per cascade
+\item Good finite-sample guarantee
+\end{itemize}
+\end{block}
+
+\begin{block}{Assumptions}
+\begin{itemize}
+\item The Hessian must verify the $(S,\gamma)$-RE condition \frownie
+\item Can we make a conditional statement on $\Theta$ and not $X_t$?
+\end{itemize}
+\end{block}
+
+\end{frame}
+
+
+
+%%%%%%%%%%%%%%%%%%%%
+
+
+
+
+
+\begin{frame}
+\frametitle{Restricted Eigenvalue Condition}
+
+\begin{block}{From Hessian to Gram Matrix}
+\begin{itemize}
+\item If $f$ and $1 -f$ are strictly log-concave with constant $c$, then if $(S, \gamma)$-RE holds for the gram matrix $\frac{1}{n}X X^T$ , then $(S, c \gamma)$-RE holds for ${\cal H}$
+\end{itemize}
+\end{block}
+
+\begin{block}{From Gram Matrix to Expected Gram Matrix}
+\begin{itemize}
+\item If $n > C' s^2 \log m$ and $(S, \gamma)$-RE holds for $\mathbb{E}(\frac{1}{n}XX^T)$, then $(S, \gamma/2)$-RE holds for $\frac{1}{n}XX^T$ w.h.p
+\item $\mathbb{E}(\frac{1}{n}XX^T)$ only depends on $p_{\text{init}}$ and $(\theta_j)_j$
+\end{itemize}
+\end{block}
+
+\begin{block}{Expected Gram Matrix}
+\begin{itemize}
+\item Diagonal : average number of times node is infected
+\item Outer-diagonal : average number of times pair of nodes is infected {\emph together}
+\end{itemize}
+\end{block}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Approximate Sparsity}
+\begin{itemize}
+\item $\theta^*_{\lceil s \rceil}$ best s-sparse approximation to $\theta^*$
+\item $\|\theta^* - \theta^*_{\lceil s \rceil} \|_1$: `tail' of $\theta^*$
+\end{itemize}
+\begin{theorem}
+Under similar conditions on $f$ and a relaxed RE condition, there $\exists C_1, C_2>0$ such that with high probability:
+\begin{equation}
+\|\hat \theta_i - \theta^*_i\|_2 \leq C_1 \sqrt{\frac{s\log m}{n}} + C_2 \sqrt[4]{\frac{s\log m}{n}}\|\theta^* - \theta^*_{\lceil s \rceil} \|_1
+\end{equation}
+\end{theorem}
+\end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+\frametitle{Lower Bound}
+\begin{itemize}
+\item Under correlation decay assumption for the IC model, ${\Omega}(s \log N/s)$ cascades necessary for graph reconstruction (Netrapalli et Sanghavi SIGMETRICS'12)
+\item Adapting (Price \& Woodruff STOC'12), in the approximately sparse case, any algorithm for any generalized linear cascade model such that:
+$$\|\hat \theta - \theta^*\|_2 \leq C \|\theta^* - \theta^*_{\lfloor s \rfloor}\|_2$$
+requires ${\cal O}(s \log (n/s)/\log C)$ measurement.
+\end{itemize}
+\end{frame}
+
+\end{document} \ No newline at end of file