1 files changed, 16 insertions, 32 deletions
diff --git a/paper/sections/results.tex b/paper/sections/results.tex
index 2292ce3..cab27a7 100644
--- a/paper/sections/results.tex
+++ b/paper/sections/results.tex
@@ -139,16 +139,6 @@ S}^*_{\eta + \epsilon} \subseteq \hat {\cal S}_\eta \subseteq {\cal S}^*$. In
 other words we recover all `strong' parents and no `false' parents.
 \end{corollary}
 
-\begin{proof}
-By choosing $\delta = 0$, if $ n > \frac{9s\log m}{\alpha\gamma^2\epsilon^2}$,
-then $\|\hat \theta-\theta^*\|_2 < \epsilon < \eta$ with probability
-$1-\frac{1}{m}$. If $\theta_i^* = 0$ and $\hat \theta > \eta$, then $\|\hat
-\theta - \theta^*\|_2 \geq |\hat \theta_i-\theta_i^*| > \eta$, which is a
-contradiction. Therefore we get no false positives. If $\theta_i^* > \eta +
-\epsilon$, then $|\hat{\theta}_i- \theta_i^*| < \epsilon \implies \theta_j >
-\eta$ and we get all strong parents.
-\end{proof}
-
 Assuming we know a lower bound $\alpha$ on $\Theta_{i,j}$,
 Corollary~\ref{cor:variable_selection} can be applied to the Network Inference
 problem in the following manner: pick $\epsilon = \frac{\eta}{2}$ and $\eta
@@ -199,9 +189,11 @@ solving \eqref{eq:pre-mle} for $\lambda \defeq 2\sqrt{\frac{\log m}{\alpha n^{1
 \end{align*}
 \end{theorem}
 
-As before, edge recovery is a consequence of upper-bounding $\|\theta^* - \hat
-\theta\|_2$
+As in Corollary~\ref{cor:variable_selection}, an edge recovery guarantee can be
+derived from  Theorem~\ref{thm:approx_sparse} in the case of approximate
+sparsity.
 
+\begin{comment}
 \begin{corollary}
     Under the same assumptions as Theorem~\ref{thm:approx_sparse}, if the number
     of measurements verifies: \begin{equation}
@@ -212,22 +204,18 @@ As before, edge recovery is a consequence of upper-bounding $\|\theta^* - \hat
 then: ${\cal S}^*_{\eta + \epsilon} \subset \hat {\cal S}_\eta
 \subset {\cal S}^*$ w.p. at least $1-\frac{1}{m}$.
 \end{corollary}
+\end{comment}
 
 \subsection{Restricted Eigenvalue Condition}
 \label{sec:re}
 
-In this section, we discuss the main assumption of Theorem~\ref{thm:main} namely
-the restricted eigenvalue condition introduced by~\cite{bickel2009simultaneous}.
-
-\paragraph{Interpretation}
-
 There exists a large class of sufficient conditions under which sparse recovery
 is achievable in the context of regularized estimation~\cite{vandegeer:2009}.
-The restricted eigenvalue condition is one of the weakest such assumption. It
-can be interpreted as a restricted form of non-degeneracy. Since we apply it to
-the Hessian of the log-likelihood function $\nabla^2 \mathcal{L}(\theta)$, it
-essentially reduces to a form of restricted strong convexity, that
-Lemma~\ref{lem:negahban} ultimately relies on.
+The restricted eigenvalue condition, introduced in \cite{bickel:2009}, is one
+of the weakest such assumption. It can be interpreted as a restricted form of
+non-degeneracy. Since we apply it to the Hessian of the log-likelihood function
+$\nabla^2 \mathcal{L}(\theta)$, it essentially reduces to a form of restricted
+strong convexity, that Lemma~\ref{lem:negahban} ultimately relies on.
 
 Observe that the Hessian of $\mathcal{L}$ can be seen as a re-weighted
 \emph{Gram matrix} of the observations:
@@ -238,16 +226,12 @@ Observe that the Hessian of $\mathcal{L}$ can be seen as a re-weighted
     -(1-x_i^{t+1})\frac{f''(1-f) + f'^2}{(1-f)^2}(\inprod{\theta^*}{x^t})\bigg]
 \end{multline*}
 If $f$ and $1-f$ are $c$-strictly log-convex~\cite{bagnoli2005log} for $c>0$,
-then
-\begin{displaymath}
-  \min\left(\frac{f''f-f'^2}{f^2}(x), \frac{f''(1-f) + f'^2}{(1-f)^2}(x) \right)
-  \geq c
-\end{displaymath}
-and the $(S, \gamma)$-({\bf RE}) condition on the Hessian in
-Theorem~\ref{thm:main} and Theorem~\ref{thm:approx_sparse} reduces to a
-condition on the \emph{Gram matrix} of the observations $X^T X =
-\frac{1}{|\mathcal{T}|}\sum_{t\in\mathcal{T}}x^t(x^t)^T$ for $\gamma' \defeq
-\gamma\cdot c$.
+then $ \min\left((\log f)'', (\log (1-f))'' \right) \geq c $. This implies that
+the $(S, \gamma)$-({\bf RE}) condition in Theorem~\ref{thm:main} and
+Theorem~\ref{thm:approx_sparse} reduces to a condition on the \emph{Gram
+matrix} of the observations $X^T
+X = \frac{1}{|\mathcal{T}|}\sum_{t\in\mathcal{T}}x^t(x^t)^T$ for $\gamma'
+\defeq \gamma\cdot c$.
 
 \paragraph{(RE) with high probability}