summaryrefslogtreecommitdiffstats
path: root/theory/main.tex
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2015-06-18 09:58:12 -0700
committerThibaut Horel <thibaut.horel@gmail.com>2015-06-18 09:58:12 -0700
commit4401de9b38b4169efa71ad47c81774e74a2fe3da (patch)
treea7f14b0fff9e998099500a2d89a6a9aa4698b822 /theory/main.tex
parent4ff5e31b715a62a2cc62a1a8f8beab6c4c2234d6 (diff)
downloadcriminal_cascades-4401de9b38b4169efa71ad47c81774e74a2fe3da.tar.gz
Some notes
Diffstat (limited to 'theory/main.tex')
-rw-r--r--theory/main.tex132
1 files changed, 132 insertions, 0 deletions
diff --git a/theory/main.tex b/theory/main.tex
new file mode 100644
index 0000000..d293b6f
--- /dev/null
+++ b/theory/main.tex
@@ -0,0 +1,132 @@
+\documentclass[10pt]{article}
+\usepackage[utf8x]{inputenc}
+\usepackage{amsmath,amsfonts,amsthm}
+\usepackage[english]{babel}
+\usepackage[capitalize, noabbrev]{cleveref}
+\usepackage{algorithm}
+\usepackage{algpseudocode}
+
+\newenvironment{enumerate*}%
+ {\vspace{-2ex} \begin{enumerate} %
+ \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
+ {\end{enumerate}}
+
+\newenvironment{itemize*}%
+ {\vspace{-2ex} \begin{itemize} %
+ \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
+ {\end{itemize}}
+
+\newenvironment{description*}%
+ {\vspace{-2ex} \begin{description} %
+ \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
+ {\end{description}}
+
+\DeclareMathOperator{\E}{\mathbb{E}}
+\let\P\relax
+\DeclareMathOperator{\P}{\mathbb{P}}
+\newcommand{\ex}[1]{\E\left[#1\right]}
+\newcommand{\prob}[1]{\P\left[#1\right]}
+\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
+\newcommand{\R}{\mathbf{R}}
+\newcommand{\N}{\mathbf{N}}
+\newcommand{\C}{\mathcal{C}}
+\newcommand{\eps}{\varepsilon}
+\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}}
+\newcommand{\llbracket}{[\![}
+
+\newtheorem{theorem}{Theorem}
+\newtheorem{lemma}{Lemma}
+\algrenewcommand\algorithmicensure{\textbf{Output:}}
+\algrenewcommand\algorithmicrequire{\textbf{Input:}}
+
+\author{}
+\title{}
+\date{}
+
+\begin{document}
+
+\section{Probabilistic Model}
+During the last decade, a growing body of work has focused on the graph
+inference problem: how can an unknown graph be recovered from the observation
+of cascades propagating along it? (CITE). While our probabilistic model for
+cascades of violence strongly relies on this line of work, we depart from it in
+the following ways:
+\begin{itemize}
+ \item in our case, the graph along which the cascades of violence propagate
+ (the co-offending network) is known, and we are only interested in
+ recovering (and ultimately predicting) the edges of the cascade, that
+ is, identifying who influenced whom.
+ \item the previous models treat multiple cascades as independent
+ observations which occur sequentially. In our case, cascades can die
+ and spawn at all time and temporally overlap. This need to be carefully
+ accounted for in our generative model and the resulting inference
+ algorithms.
+\end{itemize}
+
+\subsection{Pairwise Propagation}
+
+For a pair $(u,v)$ of nodes, we model the propagation of the cascade
+from $u$ to $v$ as the sequence of the following two probabilistic events:
+\begin{enumerate}
+
+ \item \emph{structural propagation:} a Bernouilli variable dictates whether
+ or not a propagation occurs. The parameter of this Bernouilli variable
+ is computed from the edge weights of the co-offending network as
+ follows:
+ \begin{equation}\label{eq:structural}
+ p_{u,v}^s \eqdef \prod_{e\in P(u,v)}
+ \frac{1}{1+e^{-\frac{w_e}{\lambda}}}
+ \end{equation}
+ where $P(u,v)$ denotes the shortest path from $u$ to $v$ and $\lambda$
+ is the \emph{structural parameter}. That is, the weight $w_e$ of each
+ edge $e$ on the path from $u$ to $v$ is mapped to a probability
+ $p_e\in [0,1]$ through the logistic function:
+ \begin{displaymath}
+ p_e \eqdef \frac{1}{1+e^{-\frac{w_e}{\lambda}}}
+ \end{displaymath}
+ Note that the probability of structural propagation both \emph{(a)}
+ decreases as the distance between $u$ and $v$ in the co-offending
+ network increases, and \emph{(b)} increases as the weights of the edges
+ on the path from $u$ to $v$ increase. The structural parameter
+ $\lambda$ controls how quickly the probability $p_e$ of a given edge
+ converges to one as the weight $w_e$ increases.
+
+ \item \emph{temporal propagation:} if the structural propagation occurs,
+ then the propagation time $\Delta_{u,v}$ between $u$ and $v$ is drawn
+ from a continuous distribution with support in $\R_+$. We denote by
+ $f(t)$ the density function of this probability distribution and by
+ $F(t)$ its cumulative function. Table TODO gives the expression of $f$
+ and $F$ for the different temporal distributions considered in this
+ work.
+
+ In our data, the infection times are only observed with a temporal
+ resolution of one day. As a consequence, for two nodes $u$ and $v$
+ whose \empb{observed} infection times are respectively $t_u$ and $t_v$
+ (days), the \emph{real} propagation time could be anything in the
+ interval $(t_v-t_u-1, t_v-t_u]$. Hence, the probability of observing
+ a propagation time $\Delta_{u,v} = t_v - t_u$ is given by:
+ \begin{equation}\label{eq:temporal}
+ p_{u,v}^t \eqdef \int_{\Delta_{u,v}-1}^{\Delta_{u,v}} f(t)dt
+ \end{equation}
+
+\paragraph{Successful propagation.} For two nodes $u$ and $v$ with observed
+infection times $t_u$ and $t_v$, the probability $p_{u,v}$ that $u$ infected
+$v$ is obtained by multiplying the structural and temporal probabilities given
+by \eqref{eq:structural} and \eqref{eq:temporal}:
+\begin{displaymath}
+ p_{u,v} \eqdef p_{u,v}^s\cdot p_{u,v}^t
+\end{displaymath}
+
+\subsection{Spawning of New Cascades}
+
+\subsection{Cascade Model}
+
+\section{Maximum Likelihood Inference}
+
+\subsection{Finding the Cascades}
+
+\subsection{Finding the Spawning Parameter}
+
+\subsection{Finding the Pairwise Propagation Parameters}
+
+\end{document}