In \cite{Netrapalli:2012}, the authors explicitate a lower bound of
$\Omega(s\log\frac{m}{s})$ on the number of cascades necessary to achieve good
support recovery with constant probability under a \emph{correlation decay}
assumption.  In this section, we will consider the stable sparse recovery
setting of Section~\ref{sec:relaxing_sparsity}.  Our goal is to obtain an
information-theoretic lower bound on the number of measurements necessary to
approximately recover the parameter $\theta^*$ of a cascade model from observed
cascades. Similar lower bounds were obtained for sparse \emph{linear} inverse
problems in \cite{pw11, pw12, bipw11}.

\begin{theorem}
    \label{thm:lb}
    Let us consider a cascade model of the form \eqref{eq:glm} and a recovery
    algorithm $\mathcal{A}$ which takes as input $n$ random cascade
    measurements and outputs $\hat{\theta}$ such that with probability
    $\delta>\frac{1}{2}$ (over the measurements):
    \begin{equation}
        \label{eq:lb}
        \|\hat{\theta}-\theta^*\|_2\leq
        C\min_{\|\theta\|_0\leq s}\|\theta-\theta^*\|_2
    \end{equation}
    where $\theta^*$ is the true parameter of the cascade model. Then $n
    = \Omega(s\log\frac{m}{s}/\log C)$.
\end{theorem}

This theorem should be contrasted with Theorem~\ref{thm:approx_sparse}: up to
an additive $s\log s$ factor, the number of measurements required by our
algorithm is tight.  The proof of Theorem~\ref{thm:lb} follows an approach
similar to \cite{pw12}.  We present a sketch of the proof in the Appendix and
refer the reader to their paper for more details.

%Let us consider an algorithm $\mathcal{A}$ which verifies the recovery
%guarantee of Theorem~\ref{thm:lb}: there exists a probability distribution over
%measurements such that for all vectors $\theta^*$, \eqref{eq:lb} holds w.p.
%$\delta$. This implies by the probabilistic method that for all distribution
%$D$ over vectors $\theta$, there exists an $n\times m$ measurement matrix $X_D$
%with such that \eqref{eq:lb} holds w.p. $\delta$ ($\theta$ is now the random
%variable).

%Consider the following distribution $D$: choose $S$ uniformly at random from a
%``well-chosen'' set of $s$-sparse supports $\mathcal{F}$ and $t$ uniformly at
%random from
%$X \defeq\big\{t\in\{-1,0,1\}^m\,|\, \mathrm{supp}(t)\in\mathcal{F}\big\}$.
%Define $\theta = t + w$ where
%$w\sim\mathcal{N}(0, \alpha\frac{s}{m}I_m)$ and $\alpha = \Omega(\frac{1}{C})$.

%Consider the following communication game between Alice and Bob: \emph{(1)}
%Alice sends $y\in\reals^m$ drawn from a Bernouilli distribution of parameter
%$f(X_D\theta)$ to Bob. \emph{(2)} Bob uses $\mathcal{A}$ to recover
%$\hat{\theta}$ from $y$.  It can be shown that at the end of the game Bob now
%has a quantity of information $\Omega(s\log \frac{m}{s})$ about $S$. By the
%Shannon-Hartley theorem, this information is also upper-bounded by $\O(n\log
%C)$. These two bounds together imply the theorem.

%\begin{comment}
%\begin{lemma}
    %\label{lemma:upperinf}
    %Let $S'=\mathrm{supp}(\hat{t})$. If $\alpha = \Omega(\frac{1}{C})$, $I(S,
    %S') = \O(n\log C)$.
%\end{lemma}

%\begin{lemma}
    %\label{lemma:lowerinf}
    %If $\alpha = \Omega(\frac{1}{C})$, $I(S, S') = \Omega(s\log\frac{m}{s})$.
%\end{lemma}

%Lemmas \ref{lemma:lowerinf} and \ref{lemma:upperinf} together give Theorem
%\ref{thm:lb}.

%Formally, let $\mathcal{F}\subset \{S\subset [1..m]\,|\, |S|=s\}$ be a family
%of $s$-sparse supports such that:
%\begin{itemize}
    %\item $|S\Delta S'|\geq s$ for $S\neq S'\in\mathcal{F}$ .
    %\item $\P_{S\in\mathcal{F}}[i\in S]=\frac{s}{m}$ for all $i\in [1..m]$ and
        %when $S$ is chosen uniformly at random in $\mathcal{F}$
    %\item $\log|\mathcal{F}| = \Omega(s\log\frac{m}{s})$
%\end{itemize}
%such a family can be obtained by considering a linear code (see details in
%\cite{pw11}. Let . 
%\end{comment}