1 files changed, 36 insertions, 1 deletions
diff --git a/paper/sections/appendix.tex b/paper/sections/appendix.tex
index ff4c5dd..afcf186 100644
--- a/paper/sections/appendix.tex
+++ b/paper/sections/appendix.tex
@@ -1,10 +1,45 @@
 \subsection{Proof for different lemmas}
 
 \subsubsection{Bounded gradient}
+
+\begin{proof}
+The gradient of $\mathcal{L}$ is given by:
+\begin{multline*}
+    \nabla \mathcal{L}(\theta^*) =
+    \frac{1}{|\mathcal{T}|}\sum_{t\in \mathcal{T}}x^t\bigg[
+        x_i^{t+1}\frac{f'}{f}(\inprod{\theta^*}{x^t})\\
+    - (1-x_i^{t+1})\frac{f'}{1-f}(\inprod{\theta^*}{x^t})\bigg]
+\end{multline*}
+
+Let $\partial_j \mathcal{L}(\theta)$ be the $j$-th coordinate of
+$\nabla\mathcal{L}(\theta^*)$.  Writing
+$\partial_j\mathcal{L}(\theta^*)
+= \frac{1}{|\mathcal{T}|}\sum_{t\in\mathcal{T}} Y_t$ and since
+$\E[x_i^{t+1}|x^t]= f(\inprod{\theta^*}{x^t})$, we have that $\E[Y_{t+1}|Y_t]
+= 0$. Hence $Z_t = \sum_{k=1}^t Y_k$ is a martingale.
+
+Using assumption (LF), we have almost surely $|Z_{t+1}-Z_t|\leq
+\frac{1}{\alpha}$ and we can apply Azuma's inequality to $Z_t$:
+\begin{displaymath}
+    \P\big[|Z_{\mathcal{T}}|\geq \lambda\big]\leq
+    2\exp\left(\frac{-\lambda^2\alpha}{2n}\right)
+\end{displaymath}
+
+Applying a union bound to have the previous inequality hold for all coordinates
+of $\nabla\mathcal{L}(\theta)$ implies:
+\begin{align*}
+    \P\big[\|\nabla\mathcal{L}(\theta^*)\|_{\infty}\geq \lambda \big]
+    &\leq 2m\exp\left(\frac{-\lambda^2n\alpha}{2}\right)
+\end{align*}
+Choosing $\lambda\defeq 2\sqrt{\frac{\log m}{\alpha n^{1-\delta}}}$ concludes
+the proof.
+\end{proof}
+
 \subsubsection{Approximate sparsity proof}
 \subsubsection{RE with high probability}
 
-\subsection{Other continuous time processes binned to ours: prop. hazards model}
+\subsection{Other continuous time processes binned to ours: proportional
+hazards model}
 
 \subsection{Irrepresentability vs. Restricted Eigenvalue Condition}
 In the words and notation of Theorem 9.1 in \cite{vandegeer:2009}: