theory/warmup.tex


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119

\documentclass[10pt]{article}
\usepackage[utf8x]{inputenc}
\usepackage{amsmath,amsfonts,amsthm}
\usepackage[english]{babel}
\usepackage[capitalize, noabbrev]{cleveref}
\usepackage{algorithm}
\usepackage{algpseudocode}

\newenvironment{enumerate*}%
  {\vspace{-2ex} \begin{enumerate} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{enumerate}}
 
\newenvironment{itemize*}%
  {\vspace{-2ex} \begin{itemize} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{itemize}}
 
\newenvironment{description*}%
  {\vspace{-2ex} \begin{description} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{description}}

\DeclareMathOperator{\E}{\mathbb{E}}
\let\P\relax
\DeclareMathOperator{\P}{\mathbb{P}}
\newcommand{\ex}[1]{\E\left[#1\right]}
\newcommand{\prob}[1]{\P\left[#1\right]}
\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
\newcommand{\R}{\mathbf{R}}
\newcommand{\N}{\mathbf{N}}
\newcommand{\C}{\mathcal{C}}
\newcommand{\eps}{\varepsilon}
\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}}
\newcommand{\llbracket}{[\![}

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\algrenewcommand\algorithmicensure{\textbf{Output:}}
\algrenewcommand\algorithmicrequire{\textbf{Input:}}
\DeclareMathOperator*{\argmax}{arg\,max}

\author{}
\title{}
\date{}

\begin{document}

\paragraph{Notations}
\begin{itemize}
    \item $n$ criminals, $V = \{1,\dots,n\}$
    \item $\alpha_{i,j}$ influence of criminal $i$ on criminal $j$.
    \item the infections form a set of pairs $C = \{(t_i, v_i),\; 1\leq i\leq m\}$
        where $t_i$ is the time and $v_i\in\{1,\dots, n\}$ is the identifier of
        the criminal.
\end{itemize}

\paragraph{Model}
The crimes are modeled as a multivariate Hawkes process. The instantaneous rate
of infection of user $j$ at time $t$ is given by:
\begin{displaymath}
    \lambda_j(t) = \lambda(t) + \sum_{t_i<t}\alpha_{v_i, j}
    \phi(t-t_i)
\end{displaymath}

\emph{Technical concern:} stability of the process, there is a condition on the
$\alpha_{i,j}$ parameters so that the process is stable, i.e does not keep
accelerating forever. We probably do not need to worry about it too much:
empirically the process is stable.

\paragraph{Time kernel}
we choose an exponential kernel for $\phi$ (will allow the recursive trick
later on) $ \phi(t) = \mu e^{-\mu t} $

\paragraph{Base rate} $\lambda(t)$ captures the seasonal effect. TBD, worst
case is to choose a constant approximation, $\lambda(t) = \lambda_0$. We
define $\Lambda(T) \eqdef \int_{0}^T \lambda(t)dt$.

\paragraph{Edge model} TBD

\paragraph{Likelihood} The log-likelihood for observation horizon $T$ is given
by:
\begin{displaymath}
    \mathcal{L}(C) = \sum_{i=1}^m \log \lambda_{v_i}(t_i)
    - \sum_{j=1}^n\int_{0}^T \lambda_j(t)dt
\end{displaymath}
Note that we have:
\begin{displaymath}
    \int_{0}^T \lambda_j(t)dt = \int_{0}^T\lambda(t)dt + \sum_{i=1}^m
        \alpha_{v_i, j}\big[1-e^{-\mu(T-t_i)}\big]
\end{displaymath}

Hence the log-likelihood can be rewritten (after reordering a little bit):
\begin{displaymath}
    \mathcal{L}(C) = \sum_{i=1}^m\log \lambda_{v_i}(t_i)
    - \sum_{i=1}^m A_{v_i} \big[1-e^{-\mu(T-t_i)}\big]
    - n\Lambda(T)
\end{displaymath}
where $A_{v_i} = \sum_{j=1}^n \alpha_{v_i, j}$ (out-weight of criminal $v_i$),
or in complete form:
\begin{displaymath}
    \mathcal{L}(C) = \sum_{i=1}^m\log\left[ \lambda(t_i)
    + \sum_{j=1}^{i-1}\alpha_{v_j, v_i}\mu e^{-\mu(t_i-t_j)}\right]
    - \sum_{i=1}^m A_{v_i} \big[1-e^{-\mu(T-t_i)}\big]
    - n\Lambda(T)
\end{displaymath}

\paragraph{Computational trick} As written above, the log-likelihood can be
computed in time $O(m^2 + n^2)$. This is not linear in the input size, which is
$O(m + n^2)$. The $O(m^2)$ term comes from the first sum in the definition of
the log-likelihood. However, in the case of an exponential time kernel, using
a recursive trick, the complexity of this term can be reduced to $O(mn)$ for an
overall running time of $O(mn + n^2)$.

\emph{Question:} Is it worth it in our case? Yes if the number of victims
is significantly smaller than the number of crimes. Is it the case?


\end{document}