1 files changed, 19 insertions, 4 deletions
diff --git a/paper/sections/appendix.tex b/paper/sections/appendix.tex
index 32e2775..22b87c2 100644
--- a/paper/sections/appendix.tex
+++ b/paper/sections/appendix.tex
@@ -5,11 +5,13 @@ the paper.
 
 \subsection{Proofs of Section~\ref{sec:results}}
 
-\paragraph{Bounded gradient} The proof of Lemma~\ref{lem:ub} giving an upper
-bound on the $\ell_{\infty}$ norm of the gradient of the likelihood function is
-given below.
+\begin{proof}[Proof of Lemma~\ref{lem:transform}]
+Using the inequality $\forall x>0, \; \log x \geq 1 - \frac{1}{x}$, we have
+$|\log (\frac{1}{1 - p}) - \log (\frac{1}{1-p'})| \geq \max(1 - \frac{1-p}{1-p'},
+1 - \frac{1-p'}{1-p}) \geq \max( p-p', p'-p)$.
+\end{proof}
 
-\begin{proof}
+\begin{proof}[Proof of Lemma~\ref{lem:ub}]
 The gradient of $\mathcal{L}$ is given by:
 \begin{multline*}
     \nabla \mathcal{L}(\theta^*) =
@@ -42,6 +44,16 @@ Choosing $\lambda\defeq 2\sqrt{\frac{\log m}{\alpha n^{1-\delta}}}$ concludes
 the proof.
 \end{proof}
 
+\begin{proof}[Proof of Corollary~\ref{cor:variable_selection}]
+By choosing $\delta = 0$, if $ n > \frac{9s\log m}{\alpha\gamma^2\epsilon^2}$,
+then $\|\hat \theta-\theta^*\|_2 < \epsilon < \eta$ with probability
+$1-\frac{1}{m}$. If $\theta_i^* = 0$ and $\hat \theta > \eta$, then $\|\hat
+\theta - \theta^*\|_2 \geq |\hat \theta_i-\theta_i^*| > \eta$, which is a
+contradiction. Therefore we get no false positives. If $\theta_i^* > \eta +
+\epsilon$, then $|\hat{\theta}_i- \theta_i^*| < \epsilon \implies \theta_j >
+\eta$ and we get all strong parents.
+\end{proof}
+
 %\subsubsection{Approximate sparsity proof}
 \paragraph{(RE) with high probability} We now prove Proposition~\ref{prop:fi}.
 The proof mostly relies on showing that the Hessian of likelihood function
@@ -113,6 +125,7 @@ C)$. These two bounds together imply the theorem.
 \begin{figure}
   \centering
   \includegraphics[scale=.4]{figures/n_nodes_running_time.pdf}
+  \vspace{-1em}
   \caption{Running time analysis for estimating the parents of a \emph{single
     node} on a Barabasi-Albert graph as a function of the number of nodes in
     the graph. The parameter $k$ (number of nodes each new node is attached to)
@@ -120,6 +133,7 @@ C)$. These two bounds together imply the theorem.
     the edge weights are chosen uniformly at random in $[.2,.7]$. The
   penalization parameter $\lambda$ is chosen equal to $.1$.}
     \label{fig:running_time_n_nodes}
+    \vspace{-1em}
 \end{figure}
 
 \begin{figure}
@@ -130,6 +144,7 @@ C)$. These two bounds together imply the theorem.
     observed cascades. The parameters defining the graph were set as in
 Figure~\ref{fig:running_time_n_nodes}.}
     \label{fig:running_time_n_cascades}
+    \vspace{-1em}
 \end{figure}
 
 We include here a running time analysis of our algorithm. In