1 files changed, 31 insertions, 31 deletions
diff --git a/paper/sections/results.tex b/paper/sections/results.tex
index c83e4d6..f3c5366 100644
--- a/paper/sections/results.tex
+++ b/paper/sections/results.tex
@@ -20,7 +20,7 @@ a set $\mathcal{T}$ of observations. We will write $n\defeq |\mathcal{T}|$.
 \label{sec:main_theorem}
 
 In this section, we analyze the case where $\theta^*$ is exactly sparse. We
-write $S\defeq\text{supp}(\theta^*)$ and $s=|S|$. Recall, that $\theta_i$ is the vector of weights for all edges \emph{directed at} the node we are solving for. In other words, $S$ is the set of all nodes susceptible to influence our node $i$, also referred to as its parents. Our main theorem will rely on the now standard \emph{restricted eigenvalue condition}.
+write $S\defeq\text{supp}(\theta^*)$ and $s=|S|$. Recall, that $\theta_i$ is the vector of weights for all edges \emph{directed at} the node we are solving for. In other words, $S$ is the set of all nodes susceptible to influence node $i$, also referred to as its parents. Our main theorem will rely on the now standard \emph{restricted eigenvalue condition}.
 
 \begin{definition}
     Let $\Sigma\in\mathcal{S}_m(\reals)$ be a real symmetric matrix and $S$ be
@@ -37,8 +37,8 @@ write $S\defeq\text{supp}(\theta^*)$ and $s=|S|$. Recall, that $\theta_i$ is the
 
 A discussion of the $(S,\gamma)$-{\bf(RE)} assumption in the context of
 generalized linear cascade models can be found in Section~\ref{sec:re}. In our
-setting we will require that this property holds for the Hessian of the
-log-likelihood function $\mathcal{L}$ and essentially captures that the binary
+setting we require that the (RE)-condition holds for the Hessian of the
+log-likelihood function $\mathcal{L}$: it essentially captures the fact that the binary
 vectors of the set of active nodes are not \emph{too} collinear.
 
 We will also need the following assumption on the inverse link function $f$ of the generalized linear cascade model:
@@ -53,8 +53,8 @@ $f(\inprod{\theta^*}{x})\notin\{0,1\}$.
 
 In the voter model, $\frac{f'}{f}(z) = \frac{1}{z}$ and $\frac{f'}{f}(1-z)
 = \frac{1}{1-z}$. Hence (LF) will hold as soon as $\alpha\leq \Theta_{i,j}\leq
-1-\alpha$ for all $(i,j)\in E$ which is always satisfied for some $\alpha$.
-Similarly, in the Independent Cascade Model, $\frac{f'}{f}(z)
+1-\alpha$ for all $(i,j)\in E$ which is always satisfied for some $\alpha$ for non-isolated nodes.
+In the Independent Cascade Model, $\frac{f'}{f}(z)
 = \frac{1}{1-e^{-z}}$ and $\frac{f'}{1-f}(z) = -1$ and (LF) is not restrictive.
 
 \begin{comment}
@@ -74,7 +74,7 @@ $\alpha > 0$. For any $\delta\in(0,1)$, let $\hat{\theta}$ be the solution of
 \begin{equation}
     \label{eq:sparse}
     \|\hat \theta - \theta^* \|_2
-    \leq \frac{3}{\gamma} \sqrt{\frac{s \log m}{\alpha n^{1-\delta}}}
+    \leq \frac{6}{\gamma} \sqrt{\frac{s \log m}{\alpha n^{1-\delta}}}
 \quad
 \text{w.p.}\;1-\frac{1}{e^{n^\delta \log m}}
 \end{equation}
@@ -91,10 +91,10 @@ cast it as a generalized linear cascade model, we had to perform the
 transformation $\Theta_{i,j} = \log(1-p_{i,j})$, where $p_{i,j}$ are the
 infection probabilities. Fortunately, if we estimate $p_j$ through $\hat{p}_{j}
 = \hat{\theta}_j$, a bound on $\|\hat\theta - \theta^*\|_2$ directly implies
-a bound on $\|\hat p - p^*\|$. Indeed we have:
+a bound on $\|\hat p - p^*\|_2$:
 
 \begin{lemma}
-    $\|\hat{\theta} - \theta' \|_2 \geq \|\hat{p} - p^*\|_2$.
+    $\|\hat{\theta} - \theta^* \|_2 \geq \|\hat{p} - p^*\|_2$.
 \end{lemma}
 \begin{proof}
 Using the inequality $\forall x>0, \; \log x \geq 1 - \frac{1}{x}$, we have
@@ -115,7 +115,7 @@ Let ${\cal C}(S) \defeq \{ \Delta \in \mathbb{R}^m\,|\,\|\Delta_S\|_1 \leq
     \label{eq:rc}
     \forall \Delta \in {\cal C}(S), \;
     {\cal L}(\theta^* + \Delta) - {\cal L}(\theta^*)\\
-    - \inprod{\Delta {\cal L}(\theta^*)}{\Delta}
+    - \inprod{\nabla {\cal L}(\theta^*)}{\Delta}
     \geq \kappa_{\cal L} \|\Delta\|_2^2 - \tau_{\cal L}^2(\theta^*)
 \end{multline}
 for some $\kappa_{\cal L} > 0$ and function $\tau_{\cal L}$. Finally suppose
@@ -131,7 +131,7 @@ $\hat{\theta}_\lambda$ is the solution of \eqref{eq:pre-mle}:
 To prove Theorem~\ref{thm:main}, we apply Lemma~\ref{lem:negahban} with
 $\tau_{\mathcal{L}}=0$. Since $\mathcal{L}$ is twice differentiable and convex,
 assumption \eqref{eq:rc} with $\kappa_{\mathcal{L}}=\frac{\gamma}{2}$ is
-implied by the (RE) condition \eqref{eq:re}. The upper bound
+implied by the (RE)-condition.. The upper bound
 on the $\ell_{\infty}$ norm of $\nabla\mathcal{L}(\theta^*)$ is given by
 Lemma~\ref{lem:ub}.
 
@@ -156,7 +156,7 @@ The gradient of $\mathcal{L}$ is given by:
 \end{multline*}
 
 Let $\partial_j \mathcal{L}(\theta)$ be the $j$-th coordinate of
-$\nabla\mathcal{L}(\theta^*)$.  Writing:
+$\nabla\mathcal{L}(\theta^*)$.  Writing
 $\partial_j\mathcal{L}(\theta^*)
 = \frac{1}{|\mathcal{T}|}\sum_{t\in\mathcal{T}} Y_t$ and since
 $\E[x_i^{t+1}|x^t]= f(\inprod{\theta^*}{x^t})$, we have that $\E[Y_{t+1}|Y_t]
@@ -172,7 +172,7 @@ and we can apply Azuma's inequality to $Z_t$:
 Applying a union bound to have the previous inequality hold for all coordinates
 of $\nabla\mathcal{L}(\theta)$ implies:
 \begin{align*}
-    \P\big[\|\nabla\mathcal{L}(\theta)\|_{\infty}\geq \lambda \big]
+    \P\big[\|\nabla\mathcal{L}(\theta^*)\|_{\infty}\geq \lambda \big]
     &\leq 2m\exp\left(\frac{-\lambda^2n\alpha}{2}\right)
 \end{align*}
 Choosing $\lambda\defeq 2\sqrt{\frac{\log m}{\alpha n^{1-\delta}}}$ concludes the
@@ -186,10 +186,10 @@ solve the Graph Inference problem.
 
 \begin{corollary}
 \label{cor:variable_selection}
-Under the same assumptions of Theorem~\ref{thm:main}, let $\hat {\cal S}_\eta
+Under the same assumptions as Theorem~\ref{thm:main}, let $\hat {\cal S}_\eta
 \defeq \{ j \in \{1,\ldots, m\} : \hat{\theta}_j > \eta\}$ for $\eta > 0$. For
 $\epsilon>0$ and $\epsilon < \eta$, let ${\cal S}^*_{\eta + \epsilon} \defeq \{
-i \in \{1,\ldots,m\} :\theta_i > \eta +\epsilon \}$ be the set of all true `strong'
+i \in \{1,\ldots,m\} :\theta_i^* > \eta +\epsilon \}$ be the set of all true `strong'
 parents. Suppose the number of measurements verifies:
 $ n > \frac{9s\log m}{\alpha\gamma^2\epsilon^2}$.
 Then with probability $1-\frac{1}{m}$, ${\cal S}^*_{\eta + \epsilon} \subseteq
@@ -199,11 +199,11 @@ parents and no `false' parents.
 
 \begin{proof}
 By choosing $\delta = 0$, if $ n > \frac{9s\log
-m}{\alpha\gamma^2\epsilon^2}$, then $\|\hat \theta-\theta\|_2 < \epsilon < \eta$
-with probability $1-\frac{1}{m}$. If $\theta_i = 0$ and $\hat \theta > \eta$,
-then $\|\hat \theta - \theta\|_2 \geq |\hat \theta_i-\theta_j| > \eta$, which
-is a contradiction. Therefore we get no false positives. If $\theta_i \leq \eta
-+ \epsilon$, then $|\hat{\theta}_i- \theta_i| < \epsilon \implies \theta_j
+m}{\alpha\gamma^2\epsilon^2}$, then $\|\hat \theta-\theta^*\|_2 < \epsilon < \eta$
+with probability $1-\frac{1}{m}$. If $\theta_i^* = 0$ and $\hat \theta > \eta$,
+then $\|\hat \theta - \theta^*\|_2 \geq |\hat \theta_i-\theta_i^*| > \eta$, which
+is a contradiction. Therefore we get no false positives. If $\theta_i^* > \eta
++ \epsilon$, then $|\hat{\theta}_i- \theta_i^*| < \epsilon \implies \theta_j
 > \eta$ and we get all strong parents.
 \end{proof}
 
@@ -259,10 +259,10 @@ by solving \eqref{eq:pre-mle} for $\lambda \defeq 2\sqrt{\frac{\log m}{\alpha n^
 \end{align*}
 \end{theorem}
 
-As before, edge recovery is a consequence of upper-bounding $\|\theta - \hat \theta\|_2$
+As before, edge recovery is a consequence of upper-bounding $\|\theta^* - \hat \theta\|_2$
 
 \begin{corollary}
-    Under the same assumptions as Theorem~\ref{thm:approx_sparse} the number of
+    Under the same assumptions as Theorem~\ref{thm:approx_sparse}, if the number of
     measurements verifies: \begin{equation}
         n > \frac{9}{\alpha\gamma^2\epsilon^2}\left(1+
         \frac{16}{\epsilon^2}\| \theta^* - \theta^*_{\lfloor
@@ -302,13 +302,13 @@ In our case we have:
 \end{multline*}
 Observe that the Hessian of $\mathcal{L}$ can be seen as
 a re-weighted Gram matrix of the observations. In other words, the restricted
-eigenvalue condition expresses that the observed set of active nodes are not
+eigenvalue condition sates that the observed set of active nodes are not
 too collinear with each other.
 
 In the specific case of ``logistic cascades'' (when $f$ is the logistic
 function), the Hessian simplifies to $\nabla^2\mathcal{L}(\theta^*)
-= \frac{1}{|\mathcal{T}|}XX^T$ where $X$ is the design matrix $[x^1 \ldots
-x^\mathcal{|T|}]$. The (RE) condition is then equivalent 
+= \frac{1}{|\mathcal{T}|}XX^T$ where $X$ is the observation matrix $[x^1 \ldots
+x^\mathcal{|T|}]$. The (RE)-condition is then equivalent 
 to the assumption made in the Lasso analysis of \cite{bickel:2009}.
 
 \paragraph{(RE) with high probability}
@@ -325,7 +325,7 @@ matrix $\E[\nabla^2\mathcal{L}(\theta^*)]$, then it holds for the finite sample
 Hessian matrix $\nabla^2\mathcal{L}(\theta^*)$ with high probability. Note that
 the expected Hessian matrix is exactly the Fisher Information matrix of the
 generalized linear cascade model which captures the amount of information about
-$\theta$ conveyed by the random observations. Therefore, under an assumption
+$\theta^*$ conveyed by the random observations. Therefore, under an assumption
 which only involves the probabilistic model and not the actual observations,
 Proposition~\ref{prop:fi} allows us to draw the same conclusion as in
 Theorem~\ref{thm:main}.
@@ -344,10 +344,10 @@ again non restrictive in the (IC) model and (V) model.
 
 \begin{proposition}
     \label{prop:fi}
-    If $\E[\nabla^2\mathcal{L}(\theta^*)]$ verifies the $(S,\gamma)$-{\bf (RE)}
-    condition and assuming {\bf (LF)} and {\bf (LF2)}, then for $\delta> 0$, if $n^{1-\delta}\geq
+    Suppose $\E[\nabla^2\mathcal{L}(\theta^*)]$ verifies the $(S,\gamma)$-{\bf (RE)}
+    condition and assume {\bf (LF)} and {\bf (LF2)}. For $\delta> 0$, if $n^{1-\delta}\geq
 \frac{M+2}{21\gamma\alpha}s^2\log m
-    $, $\nabla^2\mathcal{L}(\theta^*)$ verifies the $(S,\frac{\gamma}{2})$-(RE)
+    $, then $\nabla^2\mathcal{L}(\theta^*)$ verifies the $(S,\frac{\gamma}{2})$-(RE)
     condition, w.p $\geq 1-e^{-n^\delta\log m}$.
 \end{proposition}
 
@@ -390,13 +390,13 @@ again non restrictive in the (IC) model and (V) model.
 Observe that the number of measurements required in Proposition~\ref{prop:fi}
 is now quadratic in $s$. If we only keep the first measurement from each
 cascade which are independent, we can apply Theorem 1.8 from
-\cite{rudelson:13}, lowering the number of required cascades to $s\log(s\log^3
-m)$.
+\cite{rudelson:13}, lowering the number of required cascades to $s\log m \log^3(
+s\log m)$.
 
 \paragraph{(RE) vs Irrepresentability Condition}
 
 \cite{Daneshmand:2014} rely on an `incoherence' condition on the hessian of the
-likelihood function also known as the {\it (S,s)-irrepresentability} condition:
+likelihood function also known as the {\it (S,s)-irrepresentability} condition.
 
 \begin{comment}
 \begin{definition}