WIP

author: Thibaut Horel <thibaut.horel@gmail.com> 2015-08-18 22:15:45 -0700
committer: Thibaut Horel <thibaut.horel@gmail.com> 2015-08-18 22:15:45 -0700
commit: f21723ec23a6e5960d5a94660193298cd8f0d5d3 (patch)
tree: d102781c0db3192b2a074eb96c697fd49a63cd47 /theory/warmup.tex
parent: 9350ee2c6359562a23cf8efdefdd7de80b2a682e (diff)
download: criminal_cascades-f21723ec23a6e5960d5a94660193298cd8f0d5d3.tar.gz
1 files changed, 119 insertions, 0 deletions
diff --git a/theory/warmup.tex b/theory/warmup.tex
new file mode 100644
index 0000000..7c2da96
--- /dev/null
+++ b/theory/warmup.tex
@@ -0,0 +1,119 @@
+\documentclass[10pt]{article}
+\usepackage[utf8x]{inputenc}
+\usepackage{amsmath,amsfonts,amsthm}
+\usepackage[english]{babel}
+\usepackage[capitalize, noabbrev]{cleveref}
+\usepackage{algorithm}
+\usepackage{algpseudocode}
+
+\newenvironment{enumerate*}%
+  {\vspace{-2ex} \begin{enumerate} %
+     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
+  {\end{enumerate}}
+ 
+\newenvironment{itemize*}%
+  {\vspace{-2ex} \begin{itemize} %
+     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
+  {\end{itemize}}
+ 
+\newenvironment{description*}%
+  {\vspace{-2ex} \begin{description} %
+     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
+  {\end{description}}
+
+\DeclareMathOperator{\E}{\mathbb{E}}
+\let\P\relax
+\DeclareMathOperator{\P}{\mathbb{P}}
+\newcommand{\ex}[1]{\E\left[#1\right]}
+\newcommand{\prob}[1]{\P\left[#1\right]}
+\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
+\newcommand{\R}{\mathbf{R}}
+\newcommand{\N}{\mathbf{N}}
+\newcommand{\C}{\mathcal{C}}
+\newcommand{\eps}{\varepsilon}
+\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}}
+\newcommand{\llbracket}{[\![}
+
+\newtheorem{theorem}{Theorem}
+\newtheorem{lemma}{Lemma}
+\algrenewcommand\algorithmicensure{\textbf{Output:}}
+\algrenewcommand\algorithmicrequire{\textbf{Input:}}
+\DeclareMathOperator*{\argmax}{arg\,max}
+
+\author{}
+\title{}
+\date{}
+
+\begin{document}
+
+\paragraph{Notations}
+\begin{itemize}
+    \item $n$ criminals, $V = \{1,\dots,n\}$
+    \item $\alpha_{i,j}$ influence of criminal $i$ on criminal $j$.
+    \item the infections form a set of pairs $C = \{(t_i, v_i),\; 1\leq i\leq m\}$
+        where $t_i$ is the time and $v_i\in\{1,\dots, n\}$ is the identifier of
+        the criminal.
+\end{itemize}
+
+\paragraph{Model}
+The crimes are modeled as a multivariate Hawkes process. The instantaneous rate
+of infection of user $j$ at time $t$ is given by:
+\begin{displaymath}
+    \lambda_j(t) = \lambda(t) + \sum_{t_i<t}\alpha_{v_i, j}
+    \phi(t-t_i)
+\end{displaymath}
+
+\emph{Technical concern:} stability of the process, there is a condition on the
+$\alpha_{i,j}$ parameters so that the process is stable, i.e does not keep
+accelerating forever. We probably do not need to worry about it too much:
+empirically the process is stable.
+
+\paragraph{Time kernel}
+we choose an exponential kernel for $\phi$ (will allow the recursive trick
+later on) $ \phi(t) = \mu e^{-\mu t} $
+
+\paragraph{Base rate} $\lambda(t)$ captures the seasonal effect. TBD, worst
+case is to choose a constant approximation, $\lambda(t) = \lambda_0$. We
+define $\Lambda(T) \eqdef \int_{0}^T \lambda(t)dt$.
+
+\paragraph{Edge model} TBD
+
+\paragraph{Likelihood} The log-likelihood for observation horizon $T$ is given
+by:
+\begin{displaymath}
+    \mathcal{L}(C) = \sum_{i=1}^m \log \lambda_{v_i}(t_i)
+    - \sum_{j=1}^n\int_{0}^T \lambda_j(t)dt
+\end{displaymath}
+Note that we have:
+\begin{displaymath}
+    \int_{0}^T \lambda_j(t)dt = \int_{0}^T\lambda(t)dt + \sum_{i=1}^m
+        \alpha_{v_i, j}\big[1-e^{-\mu(T-t_i)}\big]
+\end{displaymath}
+
+Hence the log-likelihood can be rewritten (after reordering a little bit):
+\begin{displaymath}
+    \mathcal{L}(C) = \sum_{i=1}^m\log \lambda_{v_i}(t_i)
+    - \sum_{i=1}^m A_{v_i} \big[1-e^{-\mu(T-t_i)}\big]
+    - n\Lambda(T)
+\end{displaymath}
+where $A_{v_i} = \sum_{j=1}^n \alpha_{v_i, j}$ (out-weight of criminal $v_i$),
+or in complete form:
+\begin{displaymath}
+    \mathcal{L}(C) = \sum_{i=1}^m\log\left[ \lambda(t_i)
+    + \sum_{j=1}^{i-1}\alpha_{v_j, v_i}\mu e^{-\mu(t_i-t_j)}\right]
+    - \sum_{i=1}^m A_{v_i} \big[1-e^{-\mu(T-t_i)}\big]
+    - n\Lambda(T)
+\end{displaymath}
+
+\paragraph{Computational trick} As written above, the log-likelihood can be
+computed in time $O(m^2 + n^2)$. This is not linear in the input size, which is
+$O(m + n^2)$. The $O(m^2)$ term comes from the first sum in the definition of
+the log-likelihood. However, in the case of an exponential time kernel, using
+a recursive trick, the complexity of this term can be reduced to $O(mn)$ for an
+overall running time of $O(mn + n^2)$.
+
+\emph{Question:} Is it worth it in our case? Yes if the number of victims
+is significantly smaller than the number of crimes. Is it the case?
+
+
+\end{document}
author	Thibaut Horel <thibaut.horel@gmail.com>	2015-08-18 22:15:45 -0700
committer	Thibaut Horel <thibaut.horel@gmail.com>	2015-08-18 22:15:45 -0700
commit	f21723ec23a6e5960d5a94660193298cd8f0d5d3 (patch)
tree	d102781c0db3192b2a074eb96c697fd49a63cd47 /theory/warmup.tex
parent	9350ee2c6359562a23cf8efdefdd7de80b2a682e (diff)
download	criminal_cascades-f21723ec23a6e5960d5a94660193298cd8f0d5d3.tar.gz