aboutsummaryrefslogtreecommitdiffstats
path: root/paper/sections/results.tex
diff options
context:
space:
mode:
Diffstat (limited to 'paper/sections/results.tex')
-rw-r--r--paper/sections/results.tex62
1 files changed, 31 insertions, 31 deletions
diff --git a/paper/sections/results.tex b/paper/sections/results.tex
index c83e4d6..f3c5366 100644
--- a/paper/sections/results.tex
+++ b/paper/sections/results.tex
@@ -20,7 +20,7 @@ a set $\mathcal{T}$ of observations. We will write $n\defeq |\mathcal{T}|$.
\label{sec:main_theorem}
In this section, we analyze the case where $\theta^*$ is exactly sparse. We
-write $S\defeq\text{supp}(\theta^*)$ and $s=|S|$. Recall, that $\theta_i$ is the vector of weights for all edges \emph{directed at} the node we are solving for. In other words, $S$ is the set of all nodes susceptible to influence our node $i$, also referred to as its parents. Our main theorem will rely on the now standard \emph{restricted eigenvalue condition}.
+write $S\defeq\text{supp}(\theta^*)$ and $s=|S|$. Recall, that $\theta_i$ is the vector of weights for all edges \emph{directed at} the node we are solving for. In other words, $S$ is the set of all nodes susceptible to influence node $i$, also referred to as its parents. Our main theorem will rely on the now standard \emph{restricted eigenvalue condition}.
\begin{definition}
Let $\Sigma\in\mathcal{S}_m(\reals)$ be a real symmetric matrix and $S$ be
@@ -37,8 +37,8 @@ write $S\defeq\text{supp}(\theta^*)$ and $s=|S|$. Recall, that $\theta_i$ is the
A discussion of the $(S,\gamma)$-{\bf(RE)} assumption in the context of
generalized linear cascade models can be found in Section~\ref{sec:re}. In our
-setting we will require that this property holds for the Hessian of the
-log-likelihood function $\mathcal{L}$ and essentially captures that the binary
+setting we require that the (RE)-condition holds for the Hessian of the
+log-likelihood function $\mathcal{L}$: it essentially captures the fact that the binary
vectors of the set of active nodes are not \emph{too} collinear.
We will also need the following assumption on the inverse link function $f$ of the generalized linear cascade model:
@@ -53,8 +53,8 @@ $f(\inprod{\theta^*}{x})\notin\{0,1\}$.
In the voter model, $\frac{f'}{f}(z) = \frac{1}{z}$ and $\frac{f'}{f}(1-z)
= \frac{1}{1-z}$. Hence (LF) will hold as soon as $\alpha\leq \Theta_{i,j}\leq
-1-\alpha$ for all $(i,j)\in E$ which is always satisfied for some $\alpha$.
-Similarly, in the Independent Cascade Model, $\frac{f'}{f}(z)
+1-\alpha$ for all $(i,j)\in E$ which is always satisfied for some $\alpha$ for non-isolated nodes.
+In the Independent Cascade Model, $\frac{f'}{f}(z)
= \frac{1}{1-e^{-z}}$ and $\frac{f'}{1-f}(z) = -1$ and (LF) is not restrictive.
\begin{comment}
@@ -74,7 +74,7 @@ $\alpha > 0$. For any $\delta\in(0,1)$, let $\hat{\theta}$ be the solution of
\begin{equation}
\label{eq:sparse}
\|\hat \theta - \theta^* \|_2
- \leq \frac{3}{\gamma} \sqrt{\frac{s \log m}{\alpha n^{1-\delta}}}
+ \leq \frac{6}{\gamma} \sqrt{\frac{s \log m}{\alpha n^{1-\delta}}}
\quad
\text{w.p.}\;1-\frac{1}{e^{n^\delta \log m}}
\end{equation}
@@ -91,10 +91,10 @@ cast it as a generalized linear cascade model, we had to perform the
transformation $\Theta_{i,j} = \log(1-p_{i,j})$, where $p_{i,j}$ are the
infection probabilities. Fortunately, if we estimate $p_j$ through $\hat{p}_{j}
= \hat{\theta}_j$, a bound on $\|\hat\theta - \theta^*\|_2$ directly implies
-a bound on $\|\hat p - p^*\|$. Indeed we have:
+a bound on $\|\hat p - p^*\|_2$:
\begin{lemma}
- $\|\hat{\theta} - \theta' \|_2 \geq \|\hat{p} - p^*\|_2$.
+ $\|\hat{\theta} - \theta^* \|_2 \geq \|\hat{p} - p^*\|_2$.
\end{lemma}
\begin{proof}
Using the inequality $\forall x>0, \; \log x \geq 1 - \frac{1}{x}$, we have
@@ -115,7 +115,7 @@ Let ${\cal C}(S) \defeq \{ \Delta \in \mathbb{R}^m\,|\,\|\Delta_S\|_1 \leq
\label{eq:rc}
\forall \Delta \in {\cal C}(S), \;
{\cal L}(\theta^* + \Delta) - {\cal L}(\theta^*)\\
- - \inprod{\Delta {\cal L}(\theta^*)}{\Delta}
+ - \inprod{\nabla {\cal L}(\theta^*)}{\Delta}
\geq \kappa_{\cal L} \|\Delta\|_2^2 - \tau_{\cal L}^2(\theta^*)
\end{multline}
for some $\kappa_{\cal L} > 0$ and function $\tau_{\cal L}$. Finally suppose
@@ -131,7 +131,7 @@ $\hat{\theta}_\lambda$ is the solution of \eqref{eq:pre-mle}:
To prove Theorem~\ref{thm:main}, we apply Lemma~\ref{lem:negahban} with
$\tau_{\mathcal{L}}=0$. Since $\mathcal{L}$ is twice differentiable and convex,
assumption \eqref{eq:rc} with $\kappa_{\mathcal{L}}=\frac{\gamma}{2}$ is
-implied by the (RE) condition \eqref{eq:re}. The upper bound
+implied by the (RE)-condition.. The upper bound
on the $\ell_{\infty}$ norm of $\nabla\mathcal{L}(\theta^*)$ is given by
Lemma~\ref{lem:ub}.
@@ -156,7 +156,7 @@ The gradient of $\mathcal{L}$ is given by:
\end{multline*}
Let $\partial_j \mathcal{L}(\theta)$ be the $j$-th coordinate of
-$\nabla\mathcal{L}(\theta^*)$. Writing:
+$\nabla\mathcal{L}(\theta^*)$. Writing
$\partial_j\mathcal{L}(\theta^*)
= \frac{1}{|\mathcal{T}|}\sum_{t\in\mathcal{T}} Y_t$ and since
$\E[x_i^{t+1}|x^t]= f(\inprod{\theta^*}{x^t})$, we have that $\E[Y_{t+1}|Y_t]
@@ -172,7 +172,7 @@ and we can apply Azuma's inequality to $Z_t$:
Applying a union bound to have the previous inequality hold for all coordinates
of $\nabla\mathcal{L}(\theta)$ implies:
\begin{align*}
- \P\big[\|\nabla\mathcal{L}(\theta)\|_{\infty}\geq \lambda \big]
+ \P\big[\|\nabla\mathcal{L}(\theta^*)\|_{\infty}\geq \lambda \big]
&\leq 2m\exp\left(\frac{-\lambda^2n\alpha}{2}\right)
\end{align*}
Choosing $\lambda\defeq 2\sqrt{\frac{\log m}{\alpha n^{1-\delta}}}$ concludes the
@@ -186,10 +186,10 @@ solve the Graph Inference problem.
\begin{corollary}
\label{cor:variable_selection}
-Under the same assumptions of Theorem~\ref{thm:main}, let $\hat {\cal S}_\eta
+Under the same assumptions as Theorem~\ref{thm:main}, let $\hat {\cal S}_\eta
\defeq \{ j \in \{1,\ldots, m\} : \hat{\theta}_j > \eta\}$ for $\eta > 0$. For
$\epsilon>0$ and $\epsilon < \eta$, let ${\cal S}^*_{\eta + \epsilon} \defeq \{
-i \in \{1,\ldots,m\} :\theta_i > \eta +\epsilon \}$ be the set of all true `strong'
+i \in \{1,\ldots,m\} :\theta_i^* > \eta +\epsilon \}$ be the set of all true `strong'
parents. Suppose the number of measurements verifies:
$ n > \frac{9s\log m}{\alpha\gamma^2\epsilon^2}$.
Then with probability $1-\frac{1}{m}$, ${\cal S}^*_{\eta + \epsilon} \subseteq
@@ -199,11 +199,11 @@ parents and no `false' parents.
\begin{proof}
By choosing $\delta = 0$, if $ n > \frac{9s\log
-m}{\alpha\gamma^2\epsilon^2}$, then $\|\hat \theta-\theta\|_2 < \epsilon < \eta$
-with probability $1-\frac{1}{m}$. If $\theta_i = 0$ and $\hat \theta > \eta$,
-then $\|\hat \theta - \theta\|_2 \geq |\hat \theta_i-\theta_j| > \eta$, which
-is a contradiction. Therefore we get no false positives. If $\theta_i \leq \eta
-+ \epsilon$, then $|\hat{\theta}_i- \theta_i| < \epsilon \implies \theta_j
+m}{\alpha\gamma^2\epsilon^2}$, then $\|\hat \theta-\theta^*\|_2 < \epsilon < \eta$
+with probability $1-\frac{1}{m}$. If $\theta_i^* = 0$ and $\hat \theta > \eta$,
+then $\|\hat \theta - \theta^*\|_2 \geq |\hat \theta_i-\theta_i^*| > \eta$, which
+is a contradiction. Therefore we get no false positives. If $\theta_i^* > \eta
++ \epsilon$, then $|\hat{\theta}_i- \theta_i^*| < \epsilon \implies \theta_j
> \eta$ and we get all strong parents.
\end{proof}
@@ -259,10 +259,10 @@ by solving \eqref{eq:pre-mle} for $\lambda \defeq 2\sqrt{\frac{\log m}{\alpha n^
\end{align*}
\end{theorem}
-As before, edge recovery is a consequence of upper-bounding $\|\theta - \hat \theta\|_2$
+As before, edge recovery is a consequence of upper-bounding $\|\theta^* - \hat \theta\|_2$
\begin{corollary}
- Under the same assumptions as Theorem~\ref{thm:approx_sparse} the number of
+ Under the same assumptions as Theorem~\ref{thm:approx_sparse}, if the number of
measurements verifies: \begin{equation}
n > \frac{9}{\alpha\gamma^2\epsilon^2}\left(1+
\frac{16}{\epsilon^2}\| \theta^* - \theta^*_{\lfloor
@@ -302,13 +302,13 @@ In our case we have:
\end{multline*}
Observe that the Hessian of $\mathcal{L}$ can be seen as
a re-weighted Gram matrix of the observations. In other words, the restricted
-eigenvalue condition expresses that the observed set of active nodes are not
+eigenvalue condition sates that the observed set of active nodes are not
too collinear with each other.
In the specific case of ``logistic cascades'' (when $f$ is the logistic
function), the Hessian simplifies to $\nabla^2\mathcal{L}(\theta^*)
-= \frac{1}{|\mathcal{T}|}XX^T$ where $X$ is the design matrix $[x^1 \ldots
-x^\mathcal{|T|}]$. The (RE) condition is then equivalent
+= \frac{1}{|\mathcal{T}|}XX^T$ where $X$ is the observation matrix $[x^1 \ldots
+x^\mathcal{|T|}]$. The (RE)-condition is then equivalent
to the assumption made in the Lasso analysis of \cite{bickel:2009}.
\paragraph{(RE) with high probability}
@@ -325,7 +325,7 @@ matrix $\E[\nabla^2\mathcal{L}(\theta^*)]$, then it holds for the finite sample
Hessian matrix $\nabla^2\mathcal{L}(\theta^*)$ with high probability. Note that
the expected Hessian matrix is exactly the Fisher Information matrix of the
generalized linear cascade model which captures the amount of information about
-$\theta$ conveyed by the random observations. Therefore, under an assumption
+$\theta^*$ conveyed by the random observations. Therefore, under an assumption
which only involves the probabilistic model and not the actual observations,
Proposition~\ref{prop:fi} allows us to draw the same conclusion as in
Theorem~\ref{thm:main}.
@@ -344,10 +344,10 @@ again non restrictive in the (IC) model and (V) model.
\begin{proposition}
\label{prop:fi}
- If $\E[\nabla^2\mathcal{L}(\theta^*)]$ verifies the $(S,\gamma)$-{\bf (RE)}
- condition and assuming {\bf (LF)} and {\bf (LF2)}, then for $\delta> 0$, if $n^{1-\delta}\geq
+ Suppose $\E[\nabla^2\mathcal{L}(\theta^*)]$ verifies the $(S,\gamma)$-{\bf (RE)}
+ condition and assume {\bf (LF)} and {\bf (LF2)}. For $\delta> 0$, if $n^{1-\delta}\geq
\frac{M+2}{21\gamma\alpha}s^2\log m
- $, $\nabla^2\mathcal{L}(\theta^*)$ verifies the $(S,\frac{\gamma}{2})$-(RE)
+ $, then $\nabla^2\mathcal{L}(\theta^*)$ verifies the $(S,\frac{\gamma}{2})$-(RE)
condition, w.p $\geq 1-e^{-n^\delta\log m}$.
\end{proposition}
@@ -390,13 +390,13 @@ again non restrictive in the (IC) model and (V) model.
Observe that the number of measurements required in Proposition~\ref{prop:fi}
is now quadratic in $s$. If we only keep the first measurement from each
cascade which are independent, we can apply Theorem 1.8 from
-\cite{rudelson:13}, lowering the number of required cascades to $s\log(s\log^3
-m)$.
+\cite{rudelson:13}, lowering the number of required cascades to $s\log m \log^3(
+s\log m)$.
\paragraph{(RE) vs Irrepresentability Condition}
\cite{Daneshmand:2014} rely on an `incoherence' condition on the hessian of the
-likelihood function also known as the {\it (S,s)-irrepresentability} condition:
+likelihood function also known as the {\it (S,s)-irrepresentability} condition.
\begin{comment}
\begin{definition}