1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
|
\subsection{Proof for different lemmas}
\subsubsection{Bounded gradient}
\begin{proof}
The gradient of $\mathcal{L}$ is given by:
\begin{multline*}
\nabla \mathcal{L}(\theta^*) =
\frac{1}{|\mathcal{T}|}\sum_{t\in \mathcal{T}}x^t\bigg[
x_i^{t+1}\frac{f'}{f}(\inprod{\theta^*}{x^t})\\
- (1-x_i^{t+1})\frac{f'}{1-f}(\inprod{\theta^*}{x^t})\bigg]
\end{multline*}
Let $\partial_j \mathcal{L}(\theta)$ be the $j$-th coordinate of
$\nabla\mathcal{L}(\theta^*)$. Writing
$\partial_j\mathcal{L}(\theta^*)
= \frac{1}{|\mathcal{T}|}\sum_{t\in\mathcal{T}} Y_t$ and since
$\E[x_i^{t+1}|x^t]= f(\inprod{\theta^*}{x^t})$, we have that $\E[Y_{t+1}|Y_t]
= 0$. Hence $Z_t = \sum_{k=1}^t Y_k$ is a martingale.
Using assumption (LF), we have almost surely $|Z_{t+1}-Z_t|\leq
\frac{1}{\alpha}$ and we can apply Azuma's inequality to $Z_t$:
\begin{displaymath}
\P\big[|Z_{\mathcal{T}}|\geq \lambda\big]\leq
2\exp\left(\frac{-\lambda^2\alpha}{2n}\right)
\end{displaymath}
Applying a union bound to have the previous inequality hold for all coordinates
of $\nabla\mathcal{L}(\theta)$ implies:
\begin{align*}
\P\big[\|\nabla\mathcal{L}(\theta^*)\|_{\infty}\geq \lambda \big]
&\leq 2m\exp\left(\frac{-\lambda^2n\alpha}{2}\right)
\end{align*}
Choosing $\lambda\defeq 2\sqrt{\frac{\log m}{\alpha n^{1-\delta}}}$ concludes
the proof.
\end{proof}
\subsubsection{Approximate sparsity proof}
\subsubsection{RE with high probability}
\begin{proof}Writing $H\defeq \nabla^2\mathcal{L}(\theta^*)$, if
$ \forall\Delta\in C(S),\;
\|\E[H] - H]\|_\infty\leq \lambda $
and $\E[H]$ verifies the $(S,\gamma)$-(RE)
condition then:
\begin{equation}
\label{eq:foo}
\forall \Delta\in C(S),\;
\Delta H\Delta \geq
\Delta \E[H]\Delta(1-32s\lambda/\gamma)
\end{equation}
Indeed, $
|\Delta(H-E[H])\Delta| \leq 2\lambda \|\Delta\|_1^2\leq
2\lambda(4\sqrt{s}\|\Delta_s\|_2)^2
$.
Writing
$\partial^2_{i,j}\mathcal{L}(\theta^*)=\frac{1}{|\mathcal{T}|}\sum_{t\in
T}Y_t$ and using $(LF)$ and $(LF2)$ we have $\big|Y_t - \E[Y_t]\big|\leq
\frac{4(M+2)}{\alpha}$.
Applying Azuma's inequality as in the proof of Lemma~\ref{lem:ub}, this
implies:
\begin{displaymath}
\P\big[\|\E[H]-H\|_{\infty}\geq\lambda\big] \leq
2\exp\left(-\frac{n\alpha\lambda^2}{4(M+2)} + 2\log m\right)
\end{displaymath}
Thus, if we take $\lambda=\sqrt{\frac{12(M+2)\log m}{\alpha
n^{1-\delta}}}$, $\|E[H]-H\|_{\infty}\leq\lambda$ w.p at least
$1-e^{-n^{\delta}\log m}$. When $n^{1-\delta}\geq
\frac{M+2}{21\gamma\alpha}s^2\log m$, \eqref{eq:foo} implies
$
\forall \Delta\in C(S),\;
\Delta H\Delta \geq \frac{1}{2} \Delta \E[H]\Delta,
$ w.p. at least $1-e^{-n^{\delta}\log m}$ and the conclusion of
Proposition~\ref{prop:fi} follows.
\end{proof}
\subsection{Other continuous time processes binned to ours: proportional
hazards model}
\subsection{Irrepresentability vs. Restricted Eigenvalue Condition}
In the words and notation of Theorem 9.1 in \cite{vandegeer:2009}:
\begin{lemma}
\label{lemm:irrepresentability_proof}
Let $\phi^2_{\text{compatible}}(L,S) \defeq \min \{ \frac{s
\|f_\beta\|^2_2}{\|\beta_S\|^2_1} \ : \ \beta \in {\cal R}(L, S) \}$, where
$\|f_\beta\|^2_2 \defeq \{ \beta^T \Sigma \beta \}$ and ${\cal R}(L,S) \defeq
\{\beta : \|\beta_{S^c}\|_1 \leq L \|\beta_S\|_1 \neq 0\}$. If
$\nu_{\text{irrepresentable}(S,s)} < 1/L$, then $\phi^2_{\text{compatible}}(L,S)
\geq (1 - L \nu_{\text{irrepresentable}(S,s)})^2 \lambda_{\min}^2$.
\end{lemma}
Since ${\cal R}(3, S) = {\cal C}$, $\|\beta_S\|_1 \geq \|\beta_S\|_2$, and
$\|\beta_S\|_1 \geq \frac{1}{3} \|\beta_{S^c}\|_1$ it is easy to see that
$\|\beta_S\|_1 \geq \frac{1}{4} \|\beta\|_2$ and therefore that: $\gamma_n \geq
\frac{n}{4s}\phi^2_{\text{compatible}}(3,S)$
Consequently, if $\epsilon > \frac{2}{3}$, then
$\nu_{\text{irrepresentable}(S,s)} < 1/3$ and the conditions of
Lemma~\ref{lemm:irrepresentability_proof} hold.
\subsection{Lower bound for restricted eigenvalues (expected hessian) for
different graphs}
\subsection{Better asymptotic w.r.t expected hessian}
\subsection{Confidence intervals?}
\subsection{Active learning}
|