1 files changed, 37 insertions, 37 deletions
diff --git a/paper/sections/appendix.tex b/paper/sections/appendix.tex
index 1d22402..57658b4 100644
--- a/paper/sections/appendix.tex
+++ b/paper/sections/appendix.tex
@@ -1,56 +1,56 @@
-It is easy to compute the gradient and Hessian matrix of $\mathcal{L}$:
-\begin{multline*}
-    \nabla \mathcal{L}(\theta) =
-    \frac{1}{|\mathcal{T}|}\sum_{t\in \mathcal{T}}x^t\bigg[
-        x_i^{t+1}\frac{f'}{f}(\inprod{\theta_i}{x^t})\\
-    - (1-x_i^{t+1})\frac{f'}{1-f}(\inprod{\theta_i}{x^t})\bigg]
-\end{multline*}
+\begin{comment}
 \begin{multline*}
     \nabla^2 \mathcal{L}(\theta) =
     \frac{1}{|\mathcal{T}|}\sum_{t\in \mathcal{T}}x^t(x^t)^T\bigg[
         x_i^{t+1}\frac{f''f - f'^2}{f^2}(\inprod{\theta_i}{x^t})\\
     - (1-x_i^{t+1})\frac{f''(1-f) + f'^2}{(1-f)^2}(\inprod{\theta_i}{x^t})\bigg]
 \end{multline*}
+\end{comment}
 
-Since $\E[x_i^{t+1}|x^t] = f(\inprod{\theta_i}{x^t})$,
-$\nabla\mathcal{L}(\theta)$ can be written
-$\frac{1}{|\mathcal{T}|}\sum_{t\in\mathcal{T}} Y_t$ where $\E[Y_t|Y_{t-1}]
-= 0$. It follows from the law of expected iterations that
-$\E[\nabla\mathcal{L}(\theta)] = 0$.
+\subsection{Upper-bound for $\|\nabla \mathcal{L}(\theta^*)\|_\infty$}
 
-\subsection{Upper-bound for $\|\nabla f(\theta^*)\|_\infty$}
+The gradient of $\mathcal{L}$ (as a function of $\theta$) is given by:
+\begin{multline*}
+    \nabla \mathcal{L}(\theta) =
+    \frac{1}{|\mathcal{T}|}\sum_{t\in \mathcal{T}}x^t\bigg[
+        x_i^{t+1}\frac{f'}{f}(\inprod{\theta_i}{x^t})\\
+    - (1-x_i^{t+1})\frac{f'}{1-f}(\inprod{\theta_i}{x^t})\bigg]
+\end{multline*}
 
-We show an upper-bound for $ 2 \|\nabla f(\theta^*) \|_\infty$. If we only consider the first measurement of every cascade, this can be done easily. Let $N$ be the number of cascades (to distinguish from $n$ number of total measurements). Begin by noting that for a node $\alpha$:
+Let us focus on one coordinate of the gradient, the partial derivative $\partial_j
+\mathcal{L}(\theta)$ with respect to the $j$-th coordinate.  Writing
+$\partial_j\mathcal{L}(\theta)
+= \frac{1}{|\mathcal{T}|}\sum_{t\in\mathcal{T}} Y_t$ and since
+$\E[x_i^{t+1}|x^t]= f(\inprod{\theta_i}{x^t})$, we have that $\E[Y_{t+1}|Y_t]
+= 0$. Hence $Z_t = \sum_{k=1}^t Y_k$ is a martingale.
 
-\begin{align}
-\nabla_k f(\theta) & = \sum^n_{i=1} \frac{b^i x^i_k}{1 - e^{\langle x^i, \theta \rangle}} - \sum^n_{i=1} x^i_k \nonumber \\
-& = \sum_{i=1}^n x^k_i \left( \frac{b^i}{\mathbb{P}(\text{node } \alpha \text { infected})} - 1\right) \nonumber 
-\end{align}
+Let us assume that we can bound $\frac{f'}{f}(\theta_i\cdot x^t)$ and
+$\frac{f'}{1-f}(\theta_i\cdot x^t)$ almost surely and in absolute value by
+$\frac{1}{\eta}$. It is easy to verify that this will be true if for all
+$(i,j)\in E$, $\delta\leq p_{i,j}\leq 1-\delta$ in the Voter model and when
+$p_{i,j}\leq 1-\delta$ in the Independent Cascade model.
 
-\begin{lemma}
-\label{lem:subgaussian_variable}
-$\nabla f(\theta^*)$ is a $N/p_{\min}$-subgaussian variable, where $p_{\min}$ is the smallest non-zero link to node $\alpha$.
-\end{lemma}
+Under the assumption of the previous paragraph, we have almost surely
+$|Z_{t+1}-Z_t|\leq \frac{1}{\eta}$ and we can apply Azuma's inequality to the
+martingale $Z_t$:
+\begin{displaymath}
+    \P\big[|Z_{\mathcal{T}}|\geq \lambda\big]\leq
+    2\exp\left(\frac{-\lambda^2\eta}{2n}\right)
+\end{displaymath}
 
-\begin{proof}
-We show that its expectation is $0$ by conditioning on the seed set $S$:
+Applying a union bound to have the previous inequality hold for all coordinates
+of $\nabla\mathcal{L}(\theta)$ implies the following bound:
+\begin{align*}
+    \P\big[\|\nabla\mathcal{L}(\theta)\|_{\infty}\geq \lambda \big]
+    &\leq 2m\exp\left(\frac{-\lambda^2n\eta}{2}\right)\\
+    &= 2\exp\left(\frac{-\lambda^2n\eta}{2}+\log m\right)
+\end{align*}
 
-\begin{align}
-\mathbb{E} \left( \nabla_k f(\theta) \right) & = \sum_{i=1}^N \mathbb{E} \left[ x^i_k \left( \frac{b^i}{\mathbb{P}(\alpha \text { infected})} - 1\right) \right] \nonumber \\
-& = \sum_{i=1}^N \mathbb{E}_S \left[ \mathbb{E}\left[x^i_k \left( \frac{b^i}{\mathbb{P}(\alpha \text { infected})} - 1\right) \middle| S \right] \right] \nonumber \\
-& = \sum_{i=1}^N  \mathbb{E}\left[x^i_k \left( \frac{ \mathbb{E}_S \left[ b^i \middle| S \right]}{\mathbb{P}(\alpha \text { infected})} - 1\right)  \right] \nonumber \\
-& = 0 \nonumber
-\end{align}
-Therefore, $\nabla f(\theta^*)$ is the sum of zero-mean variables, upper-bounded by $1/p_{\min}$. It follows that $\nabla f(\theta^*)$ is $N/p_{\min}$-subgaussian.
-\end{proof}
+Choosing $\lambda\defeq 2\sqrt{\frac{\log m}{\eta n^{1-\delta}}}$ concludes the
+proof of Lemma~\ref{lem:icc_lambda_upper_bound}.
 
-By union bound and characterization of sub-gaussian variables:
 
-\begin{equation}
-\mathbb{P}(\| \nabla f(\theta) \|_{\infty} > \lambda) \leq 2 \exp \left( -\frac{\lambda^2 p_{\min}}{2n} + \log m \right)
-\end{equation}
 
-In conclusion, for $\delta>0$, $\lambda := 2 \sqrt{\frac{n^{\delta + 1} \log m}{p_{\min}}}$ is a valid regularizer with probability $1 - \exp(-n^\delta \log m )$
 
 \subsection{Proposition~\ref{prop:irrepresentability}}
 In the words and notation of Theorem 9.1 in \cite{vandegeer:2009}: