summaryrefslogtreecommitdiffstats
path: root/supplements/main.tex
diff options
context:
space:
mode:
Diffstat (limited to 'supplements/main.tex')
-rw-r--r--supplements/main.tex77
1 files changed, 69 insertions, 8 deletions
diff --git a/supplements/main.tex b/supplements/main.tex
index 04a306d..2c73442 100644
--- a/supplements/main.tex
+++ b/supplements/main.tex
@@ -1,6 +1,6 @@
\documentclass{article}
\usepackage[utf8x]{inputenc}
-\usepackage{amsmath}
+\usepackage{amsmath, amsthm}
\usepackage{algorithm}% http://ctan.org/pkg/algorithms
\usepackage{algpseudocode}% http://ctan.org/pkg/algorithmicx
\usepackage{graphicx}
@@ -8,8 +8,11 @@
\usepackage{verbatim}
\DeclareMathOperator*{\argmax}{argmax}
+\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}}
\providecommand{\e}[1]{\ensuremath{\times 10^{#1}}}
\newcommand{\given}{\,|\,}
+\newtheorem{theorem}{Theorem}
+\newtheorem{proposition}[theorem]{Proposition}
\title{Hawkes contagion model}
\author{Ben Green \and Thibaut Horel \and Andrew Papachristos}
@@ -64,7 +67,7 @@ log-likelihood of events $\mathcal{E} = \{(t_i, k_i)\}_{1\leq i\leq N}$ given
$M$ and $\Phi$ and observation period $[0, T]$:
\begin{equation}
\label{eq:likelihood}
- \mathcal{L}(\mathcal{E}\given M, \Phi) = \sum_{i=1}^N \log\lambda_{k_i}(t_i)
+ \mathcal{L}(M, \Phi\given\mathcal{E}) = \sum_{i=1}^N \log\lambda_{k_i}(t_i)
- \sum_{k=1}^D\int_{0}^T \lambda_k(t) dt
\end{equation}
@@ -140,7 +143,7 @@ death in the second summand of Equation~\eqref{eq:likelihood}. Denoting by
$T_u$ the time of death of vertex $u$ ($T_u = T$ if the individual didn't die
during the study period), we obtain:
\begin{displaymath}
- \mathcal{L}(\mathcal{E}\given \mu, \alpha, \beta) = \sum_{i=1}^N \log\lambda_{u_i}(t_i)
+ \mathcal{L}(\mu, \alpha, \beta\given\mathcal{E}) = \sum_{i=1}^N \log\lambda_{u_i}(t_i)
- \sum_{v\in V}\int_{0}^{T_v} \lambda_v(t) dt
\end{displaymath}
@@ -149,7 +152,7 @@ be expanded to:
\begin{equation}
\label{eq:final-likelihood}
\begin{split}
- \mathcal{L}(\mathcal{E}\given \mu, \alpha, \beta) =&
+ \mathcal{L}(\mu, \alpha, \beta\given\mathcal{E}) =&
\sum_{i=1}^N \log\left(\mu(t_i)
+ \sum_{j:t_j< t_i} g_\alpha(u_i, u_j)\beta e^{-\beta (t_i - t_j)}\right)
\\
@@ -223,16 +226,74 @@ where $\mu_0 = \frac{A}{|V|}$.
\subsection{Kernel Function Parameters}
-We learn parameters using
+Using the exogenous intensity from Section~\ref{sec:background}, the
+log-likelihood now depends on three parameters $(\mu_0, \alpha, \beta)$, and
+finding the maximum likelihood estimate of these parameters amounts to solving
+the following optimization problem:
+\begin{equation}
+ \label{eq:mle}
+ \argmax_{\mu_0,\alpha,\beta}\mathcal{L}(\mu_0,\alpha,\beta\given\mathcal{E})
+\end{equation}
+Unfortunately, the function $\mathcal{L}$ is not jointly concave in its three
+arguments. We will however exploit the following fact.
+
+\begin{proposition}
+ \label{prop:concave}
+ The function $(\mu_0,\alpha)\mapsto
+ \mathcal{L}(\mu_0,\alpha,\beta\given\mathcal{E})$ is concave.
+\end{proposition}
+
+\begin{proof}
+ Using Equation~\ref{eq:final-likelihood}, it is easy to see that the second
+ and third sums are linear in $(\mu_0, \alpha)$, hence it is sufficient to
+ show that for $1\leq i\leq N$:
+ \begin{displaymath}
+ f(\mu_0,\alpha) = \log\left(
+ \mu(t_i)
+ + \sum_{j:t_j< t_i} g_\alpha(u_i, u_j)\beta e^{-\beta (t_i - t_j)}
+ \right)
+ \end{displaymath}
+ is concave. For this, we see that the operand of the $\log$ function is
+ linear in $(\mu_0, \alpha)$. By composition with $\log$ with the concave
+ function $\log$, we obtain that $f$ is concave and thus conclude the proof.
+\end{proof}
-$\mu_0 = 1.1845e-05$, $\alpha = 0.00317$, and $\beta = 0.0039$.
+Numerically, we observed that $\mathcal{L}$ has many local optima, hence we
+solved \eqref{eq:mle} using the following heuristic:
+\begin{itemize}
+ \item first, we did a brute force grid search to locate good starting
+ points for the refining heuristic.
+ \item second, starting from the best gridpoints obtained during the first
+ step, we refined the solution by alternated minimization, that is
+ alternatively:
+ \begin{itemize}
+ \item optimizing over $(\mu_0, \alpha)$ for a fixed value of
+ $\beta$. For this, Proposition~\ref{prop:concave} allows us to
+ use standard convex optimization machinery (gradient descent in
+ our case) to solve this step exactly.
+ \item optimizing over $\beta$ for a fixed value of $(\mu_0,
+ \alpha)$. The simulated annealing was used at this step.
+ \end{itemize}
+\end{itemize}
+Other heuristics were considered: using gradient descent as well for the
+optimization over $\beta$, or using global gradient descent to optimize over
+$(\mu_0,\alpha,\beta)$ at the same time. All heuristics led to the same optimal
+solution, indicating that our initial grid search with fine enough to identify
+good starting points. We obtained the following values of the parameters at
+the optimum:
+\begin{displaymath}
+ \mu_0 = 1.19\cdot 10^{-5},\quad
+ \alpha = 7.82\cdot 10^{-3},\quad
+ \beta = 3.74\cdot 10^{-3}
+\end{displaymath}
+\begin{comment}
\begin{equation}
\lambda_v(t) = 1.1845\e{-5} \left[1 + 0.43 \sin\left(\frac{2\pi}{365.24} t + 4.36\right) \right] + \sum_{u \in V} \frac{0.00317}{\text{dist}(u,v)} 0.0039 e^{-0.0039(t-t_u)}
\end{equation}
+\end{comment}
-\section{Inferring infections}
-[how we determine background vs peer infection]
+\section{Inferring the Patterns of Infections}
We can estimate if a person was primarily infected via peer contagion by comparing the contributions from the background rate and from his or her peers.