1 files changed, 324 insertions, 0 deletions
diff --git a/theory/main.tex b/theory/main.tex
new file mode 100644
index 0000000..1b4f92a
--- /dev/null
+++ b/theory/main.tex
@@ -0,0 +1,324 @@
+\documentclass[10pt]{article}
+\usepackage[utf8x]{inputenc}
+\usepackage{amsmath,amsfonts,amsthm}
+\usepackage[english]{babel}
+\usepackage[capitalize, noabbrev]{cleveref}
+\usepackage{algorithm}
+\usepackage{algpseudocode}
+
+\newenvironment{enumerate*}%
+  {\vspace{-2ex} \begin{enumerate} %
+     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
+  {\end{enumerate}}
+ 
+\newenvironment{itemize*}%
+  {\vspace{-2ex} \begin{itemize} %
+     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
+  {\end{itemize}}
+ 
+\newenvironment{description*}%
+  {\vspace{-2ex} \begin{description} %
+     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
+  {\end{description}}
+
+\DeclareMathOperator{\E}{\mathbb{E}}
+\let\P\relax
+\DeclareMathOperator{\P}{\mathbb{P}}
+\newcommand{\ex}[1]{\E\left[#1\right]}
+\newcommand{\prob}[1]{\P\left[#1\right]}
+\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
+\newcommand{\R}{\mathbf{R}}
+\newcommand{\N}{\mathbf{N}}
+\newcommand{\C}{\mathcal{C}}
+\newcommand{\eps}{\varepsilon}
+\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}}
+\newcommand{\llbracket}{[\![}
+
+\newtheorem{theorem}{Theorem}
+\newtheorem{lemma}{Lemma}
+\algrenewcommand\algorithmicensure{\textbf{Output:}}
+\algrenewcommand\algorithmicrequire{\textbf{Input:}}
+\DeclareMathOperator*{\argmax}{arg\,max}
+
+\author{}
+\title{}
+\date{}
+
+\begin{document}
+
+\section{Probabilistic Model}
+During the last decade, a growing body of work has focused on the graph
+inference problem: how can an unknown graph be recovered from the observation
+of cascades propagating along it? (CITE). While our probabilistic model for
+cascades of violence strongly relies on this line of work, we depart from it in
+the following ways:
+\begin{itemize}
+    \item in our case, the graph along which the cascades of violence propagate
+        (the co-offending network) is known, and we are only interested in
+        recovering (and ultimately predicting) the edges of the cascade, that
+        is, identifying who influenced whom.
+    \item the previous models treat multiple cascades as independent
+        observations which occur sequentially. In our case, cascades can die
+        and spawn at all time and temporally overlap. This need to be carefully
+        accounted for in our generative model and the resulting inference
+        algorithms.
+\end{itemize}
+
+\subsection{Pairwise Propagation}
+\label{sec:edge}
+
+For a pair $(u,v)$ of nodes, we model the propagation of the cascade
+from $u$ to $v$ as the sequence of the following two probabilistic events:
+\begin{enumerate}
+
+    \item \emph{structural propagation:} a Bernouilli variable dictates whether
+        or not a propagation occurs. The parameter of this Bernouilli variable
+        is computed from the edge weights of the co-offending network as
+        follows:
+        \begin{equation}\label{eq:structural}
+            p_{u,v}^s \eqdef \prod_{e\in P(u,v)}
+            \frac{1}{1+e^{-\frac{w_e}{\lambda}}}
+        \end{equation}
+        where $P(u,v)$ denotes the shortest path from $u$ to $v$ and $\lambda$
+        is the \emph{structural parameter}. That is, the weight $w_e$ of each
+        edge $e$ on the  path from $u$ to $v$ is mapped to a probability
+        $p_e\in [0,1]$ through the logistic function:
+        \begin{displaymath}
+            p_e \eqdef \frac{1}{1+e^{-\frac{w_e}{\lambda}}}
+        \end{displaymath}
+        Note that the probability of structural propagation both \emph{(a)}
+        decreases as the distance between $u$ and $v$ in the co-offending
+        network increases, and \emph{(b)} increases as the weights of the edges
+        on the path from $u$ to $v$ increase. The structural parameter
+        $\lambda$ controls how quickly the probability $p_e$ of a given edge
+        converges to one as the weight $w_e$ increases.
+
+    \item \emph{temporal propagation:} if the structural propagation occurs,
+        then the propagation time $\Delta_{u,v}$ between $u$ and $v$ is drawn
+        from a continuous distribution with support in $\R_+$. We denote by
+        $f(t)$ the density function of this probability distribution and by
+        $F(t)$ its cumulative function. Table TODO gives the expression of $f$
+        and $F$ for the different temporal distributions considered in this
+        work.
+
+        In our data, the infection times are only observed with a temporal
+        resolution of one day. As a consequence, for two nodes $u$ and $v$
+        whose \emph{observed} infection times are respectively $t_u$ and $t_v$
+        (days), the \emph{real} propagation time could be anything in the
+    interval $(t_v-t_u-1, t_v-t_u]$. Hence, the probability of observing
+    a propagation time $\Delta_{u,v} = t_v - t_u$ is given by:
+    \begin{equation}\label{eq:temporal}
+        p_{u,v}^t(\Delta_{u,v}) \eqdef \int_{\Delta_{u,v}-1}^{\Delta_{u,v}} f(t)dt
+    \end{equation}
+\end{enumerate}
+
+\paragraph{Successful propagation.} For two nodes $u$ and $v$ with observed
+infection times $t_u$ and $t_v$, the probability $p_{u,v}$ that $u$ infected
+$v$ is obtained by multiplying the structural and temporal probabilities given
+by \eqref{eq:structural} and \eqref{eq:temporal}:
+\begin{displaymath}
+    p_{u,v}(t_u, t_v)  \eqdef p_{u,v}^s\cdot p_{u,v}^t(\Delta_{u,v})
+\end{displaymath}
+
+\subsection{Failed Propagations and Spawning of New Cascades}
+\label{sec:roots}
+
+\paragraph{Failed propagation.}
+When an edge fails to propagate the violence cascade, it could be either
+because the structural propagation did not occur, or because the structural
+propagation occurred, but the temporal propagation didn't occur in time.
+
+For an edge $(u,v)$ with infection times $t_u$ and $t_v$, it follows directly
+from Section~\ref{sec:edge} that structural propagation does not occur with
+probability $1-p_{u,v}^s$.
+
+If structural propagation occurs, but temporal propagation does not occur in
+time, it means that node $v$ was infected at time $t_v$ (through other means)
+\emph{before} $u$ succeeds in infecting $v$. That is, the propagation time was
+larger than $\Delta_{u,v}$. This occurs with probability:
+\begin{displaymath}
+    \int_{\Delta_{u,v}}^{+\infty} f(t)dt = 1-F(\Delta_{u,v})
+\end{displaymath}
+
+Overall, the probability of a failed propagation over an edge $(u,v)$ is given
+by:
+\begin{displaymath}
+    \tilde{p}_{u,v}(t_u, t_v)
+    \eqdef 1 - p_{u,v}^s + p_{u,v}^s\left(1-F(\Delta_{u,v})\right)
+    = 1 - p_{u,v}^s F(\Delta_{u,v})
+\end{displaymath}
+
+\paragraph{}For an edge $(u,v)$ where $u$ is a victim node and $v$ is a not
+a victim, the situation is similar: either the structural propagation failed,
+or it succeeded but the temporal propagation did not occur in time. In the
+latter case, since we did not observe an infection at all for $v$, this means
+that the propagation time between $u$ and $v$ was such that the infection time
+of $v$ would have fallen after the end of the study period. Denoting by $T$ the
+observation horizon (the end of the study period), the probability of a failed
+propagation for an edge $(u,v)$ where $v$ is not a victim is:
+\begin{displaymath}
+    \tilde{p}_{u,v}(t_u, T) \eqdef 1-p_{u,v}^s F(T-t_u)
+\end{displaymath}
+
+\paragraph{Spawning of New Cascades.}
+
+Our probabilistic model considers another cause of infection for victim nodes:
+beyond pairwise propagation of the violence cascade, nodes can also
+spontaneously become infected and spawn a new violence cascade. We simply model
+this by a Bernouilli variable of parameter $\beta$: for any node, the
+probability that it becomes infected spontaneously is $\beta$.
+
+\subsection{The Directed Acyclic Vector of Infection}
+\label{sec:dag}
+
+Given a vector of infection times denoted by $\textbf{t}$, it follows from
+Sections~\ref{sec:edge} and \ref{sec:roots} that \emph{(1)} no infection can
+occur between two non victim nodes, \emph{(2)} no infection can occur along an
+edge $(u,v)$ where $t_u > t_v$.
+
+This suggests to orient and trim the co-offending network $G$ to construct the
+following directed graph $D_{\mathbf{t}}$ by applying the following rules:
+\begin{enumerate}
+    \item remove edges between non-victim nodes.
+    \item orient edges from victim nodes to non-victim nodes.
+    \item for any edge $(u,v)$ between two victims, orient it in the direction
+        from smallest infection time to largest infection time.
+\end{enumerate}
+
+The resulting graph $D_{\mathbf{t}}$ only contains edges along with the
+propagation of violence can possibly occur. Furthermore the edges are oriented
+in the direction of propagation.
+
+It has been observed in (CITE) that $D$ is acyclic. For completeness we
+reproduce their proof here and adapt it to account for non-victim nodes (which
+do not exist in the original paper).
+
+\begin{theorem}
+    The graph $D_{\mathbf{t}}$ obtained from $G$ by applying rules 1. to 3. is
+    acyclic.
+\end{theorem}
+
+\begin{proof}
+    Assume by contradiction that $D$ contains a cycle $(u_1,\ldots, u_n)$ where
+    $(u_1, u_2),\dots,(u_{n-1}, u_n), (u_n, u_1)$ are edges of $D$. Because of
+    rule 1., all nodes $u_1,\dots, u_n$ are victim nodes.
+
+    Applying rule 3. to all edges in the cycle implies that
+    $t_1<t_2<\dots<t_n<t_1$. The two extremal terms of this chain of
+    inequalities implie $t_1<t_1$ which is a contradiction.
+\end{proof}
+
+\subsection{Cascade Model}
+
+Using Sections~\ref{sec:edge} to \ref{sec:dag}, we can now fully specify our
+cascade model.
+
+The violence propagation proceeds as follows: every time a node becomes
+infected, it attempts to infect all its neighbors which are not infected yet
+according to the pairwise model of Section~\ref{sec:edge}. In other words, at
+a given time, a node is subjected to the ongoing infection attempts of all its
+neighbors which have been infected in the past. Because of the temporal
+component of the pairwise propagation probabilities, these attempts will
+succeed at different times. The infection is attributed to the first neighbor
+who succeeds.Furthermore, all the infection attempts are independent from each
+other.  
+
+\paragraph{Example.} Let us consider a node $v$ with $n$ neighbors $u_1,\dots,
+u_n$. For simplicity, let us assume that the structural propagation
+probabilities are the same and equal to one for all edges $(u_i, v)$. We also
+assume that all neighbors of $v$ were infected at time $0$ and we use the
+exponential temporal model with parameter $\alpha$. Then, the probability that
+$v$ is infected at time larger than $t$ is given by:
+\begin{displaymath}
+    p_v(t) \eqdef \P\left[\min_{1\leq i\leq n} Z_i \geq t\right]
+    = e^{-n\alpha t}
+\end{displaymath}
+where $Z_i$ is an exponential variable of parameter $\alpha$ and where we used
+that the law of the minimum of exponential variables is also exponential with
+parameter equal to the sum of the individual variables' parameters.
+
+This example shows that the larger the neighborhood the more likely it is that
+a node will be infected quickly. The parameter $\alpha n$ of the resulting
+exponential variable captures the aggregated infection potential of the
+neighborhood.
+
+\paragraph{} Since the infection of a node $v$ is always attributed to the
+first neighbor which succeeds in infecting it, a victim node
+can have either one parent (when it is infected by it) or no parent (when it
+spawns a cascade spontaneously). Because the graph $D$ along which the cascades
+of violence occur is acyclic, it follows that the violence propagates along $D$
+by forming trees.
+
+Formally, if we denote by $(V, E)$ the vertices and edges of $D$, and we write
+$V = R\cup T$ where $R$ are the victim nodes and $T$ the non-victim nodes
+($R\cap T=\emptyset$), then \emph{the violence cascades form a random forest
+spanning $R$ in $D$}.
+
+\paragraph{Likelihood.} Given pairwise parameters $(\alpha, \lambda, \beta)$
+and a random forest $\mathcal{T} = (T_1,\ldots, T_r)$, we can compute the
+probability of observing the infection times given by the vector $\textbf{t}$:
+\begin{equation}
+    \label{eq:model}
+    \P[\mathbf{t}\,|\, \mathcal{T}, \beta, \alpha, \lambda]
+    = \beta^r(1-\beta)^{n-r}\prod_{e\in E_{\mathcal{T}}} p_e
+    \prod_{e\in E_{D_{\mathcal{\mathbf{t}}}}\setminus E_{\mathcal{T}}}
+    \tilde{p}_e
+\end{equation}
+where $E_{\mathcal{T}}$ denotes the edge set of the forest $\mathcal{T}$ and
+$E_{D_{\mathbf{t}}}$ denotes the edge set of the DAG $D_{\mathbf{t}}$ defined
+in Section~\ref{sec:dag}.
+
+\section{Maximum Likelihood Inference}
+
+In this section, we assume that we observe a vector of infection times
+$\mathbf{t}$ and we are interested in recovering the parameters $\mathcal{T},
+\beta, \alpha, \lambda$ maximizing the likelihood of our observations as given
+by the model \eqref{eq:model}. That is, we want to solve the following
+optimization problem:
+\begin{equation}
+    \label{eq:program}
+    \mathcal{T}^*, \beta^*, \alpha^*, \lambda^* \in
+    \argmax_{\mathcal{T},\beta,\alpha,\lambda}
+    \P[\mathbf{t}\,|\, \mathcal{T}, \beta, \alpha, \lambda]
+\end{equation}
+When solving this optimization program, the optimal forest $\mathcal{T}^*$ will
+indicate the most likely edges along which the infection could have propagated,
+while $\beta$, $\alpha$ and $\lambda$ will give us information about the
+temporal and structural scale of pairwise infection.
+
+\subsection{Finding the Cascades}
+
+We first assume that $\beta$, $\alpha$ and $\lambda$ are fixed and solve
+\eqref{eq:program} for $\mathcal{T}$:
+\begin{displaymath}
+    \mathcal{T}^*\in\argmax_{\mathcal{T}}
+    \beta^r(1-\beta)^{n-r}\prod_{e\in E_{\mathcal{T}}} p_e
+    \prod_{e\in E_{D_{\mathcal{\mathbf{t}}}}\setminus E_{\mathcal{T}}}
+    \tilde{p}_e
+\end{displaymath}
+Note that the solution $\mathcal{T}^*$ depends on the value of $(\beta, \alpha,
+\lambda)$. We will then optimize over these parameters in the following
+sections.
+
+By multiplying and dividing each term by $\tilde{p}_e$ in the first product, we
+can rewrite the objective function:
+\begin{displaymath}
+    \mathcal{T}^*\in\argmax_{\mathcal{T}}
+    \left(\frac{\beta}{1-\beta}\right)^r(1-\beta)^n
+    \prod_{e\in E_{\mathcal{T}}} \frac{p_e}{\tilde{p}_e}
+    \prod_{e\in E_{D_{\mathcal{\mathbf{t}}}}} \tilde{p}_e
+\end{displaymath}
+Note that now, the last product, as well as the $(1-\beta)^n$ do not depend on
+the choice of $\mathcal{T}$ and can be dropped without changing the
+optimization program. Then, by taking the $\log$, we obtain the following
+equivalent optimization program:
+\begin{displaymath}
+    \mathcal{T}^*\in\argmax_{\mathcal{T}}\; r\log\frac{\beta}{1-\beta}
+    +\sum_{e\in E_{\mathcal{T}}} \log\frac{p_e}{\tilde{p}_e}
+\end{displaymath}
+
+\subsection{Finding the Spawning Parameter}
+
+\subsection{Finding the Pairwise Propagation Parameters}
+
+\end{document}