1 files changed, 33 insertions, 12 deletions
diff --git a/paper/sections/results.tex b/paper/sections/results.tex
index 95a0826..c9f267a 100644
--- a/paper/sections/results.tex
+++ b/paper/sections/results.tex
@@ -56,9 +56,9 @@ have $\Theta_{i,j} = \log(1-p_{i,j})$. These assumptions are reasonable: if an e
 \begin{theorem}
 \label{thm:main}
 Assume the Hessian $\nabla^2\mathcal{L}(\theta^*)$ satisfies the
-$(S,\gamma)$-{\bf(RE)} for some $\gamma > 0$ and that {\bf (LF)} holds for
-some $\alpha > 0$. For any $\delta\in(0,1)$, let $\hat{\theta}$ be the solution
-of \eqref{eq:pre-mle} with $\lambda \defeq 2\sqrt{\frac{\log m}{\alpha n^{1
+$(S,\gamma)$-{\bf(RE)} for some $\gamma > 0$ and that {\bf (LF)} holds for some
+$\alpha > 0$. For any $\delta\in(0,1)$, let $\hat{\theta}$ be the solution of
+\eqref{eq:pre-mle} with $\lambda \defeq 2\sqrt{\frac{\log m}{\alpha n^{1
 - \delta}}}$, then:
 \begin{equation}
     \label{eq:sparse}
@@ -75,9 +75,9 @@ Before moving to the proof of Theorem~\ref{thm:main}, note that interpreting it
 in the case of the Independent Cascade Model requires one more step. Indeed, to
 cast it as a generalized linear cascade model, we had to perform the
 transformation $\Theta_{i,j} = \log(1-p_{i,j})$, where $p_{i,j}$ are the
-infection probabilities. Fortunately, if we estimate $p_j$ through
-$\hat{p}_{j} = \hat{\theta}_j$, a bound on $\|\hat\theta - \theta^*\|_2$
-directly implies a bound on $\|\hat p - p^*\|$. Indeed we have:
+infection probabilities. Fortunately, if we estimate $p_j$ through $\hat{p}_{j}
+= \hat{\theta}_j$, a bound on $\|\hat\theta - \theta^*\|_2$ directly implies
+a bound on $\|\hat p - p^*\|$. Indeed we have:
 
 \begin{lemma}
     $\|\hat{\theta} - \theta' \|_2 \geq \|\hat{p} - p^*\|_2$.
@@ -229,7 +229,7 @@ Let $\theta^*_{\lfloor s \rfloor}$ be the best s-sparse approximation to the tru
 {\cal C}' \defeq & \{X \in \mathbb{R}^p : \|X_{S^c}\|_1 \leq 3 \|X_S\|_1 + 4 \|\theta^*_{\lfloor s \rfloor}\|_1 \} \\ \nonumber
 & \cap \{ \|X\|_1 \leq 1 \}
 \end{align}
-By solving \eqref{eq:pre-mle} for $\lambda \defeq 2\sqrt{\frac{\log m}{n^{1 - \delta} p_{\min}}}$ we have:
+By solving \eqref{eq:pre-mle} for $\lambda \defeq 2\sqrt{\frac{\log m}{\alpha n^{1 - \delta} p_{\min}}}$ we have:
 \begin{align}
 \|\hat p - p^* \|_2 \leq 3 \frac{\sqrt{s}\lambda_n}{\gamma_n} + 2 \sqrt{\frac{\lambda_n}{\gamma_n}} \|p^*_{\lfloor s \rfloor}\|_1
 \end{align}
@@ -256,12 +256,33 @@ irrepresentability condition considered in \cite{Daneshmand:2014}.
 
 \paragraph{Interpretation}
 
-The restricted eigenvalue condition, introduced in \cite{bickel:2009}, is one
-of the weakest sufficient condition on the design matrix for successful sparse
-recovery \cite{vandegeer:2009}. Several recent papers show that large classes
-of correlated designs obey the restricted eigenvalue property with high
-probability \cite{raskutti:10} \cite{rudelson:13}. 
+There exists a large class of sufficient conditions under which sparse recovery
+is achievable in the context of regularized estimation. A good survey on these
+different assumptions can be found in \cite{vandegeer:2009}.
 
+The restricted eigenvalue condition introduce in \cite{bickel:2009} is one of
+the weakest such assumption. It can be interpreted as a restricted form of
+non-degeneracy. Since we apply it to the Hessian of the log-likelihood function
+$\nabla^2 \mathcal{L}(\theta)$, it essentially reduces to a form of restricted
+strong convexity, that Lemma~\ref{lem:negahban} ultimately relies on.
+
+In our case we have:
+\begin{multline*}
+    \nabla^2\mathcal{L}(\theta^*)
+    = \frac{1}{|\mathcal{T}|}\sum_{t\in\mathcal{T}}x^t(x^t)^T
+    \bigg[x_i^{t+1}\frac{f''f-f'^2}{f^2}(\inprod{\theta^*}{x^t})\\
+    -(1-x_i^{t+1})\frac{f''(1-f) + f'^2}{(1-f)^2}(\inprod{\theta^*}{x^t})\bigg]
+\end{multline*}
+It is interesting to observe that the Hessian of $\mathcal{L}$ can be seen as
+a re-weighted Gram matrix of the observations. In other words, the restricted
+eigenvalue condition expresses that the observed set of active nodes are not
+too collinear with each other.
+
+In the specific case of ``logistic cascades'' (where $f$ is the logistic
+function), the Hessian simplifies to $\nabla^2\mathcal{L}(\theta^*)
+= \frac{1}{|\mathcal{T}|}XX^T$ where $X$ is the design matrix $[x^1 \ldots
+x^\mathca{|T|}]$. The restricted eigenvalue condition is equivalent in this
+case to the assumption made in the Lasso analysis of \cite{bickel:2009}.
 
 \paragraph{(RE) with high probability}