\documentclass[10pt]{article}
\usepackage[utf8x]{inputenc}
\usepackage{amsmath,amsfonts,amsthm}
\usepackage[english]{babel}
\usepackage[capitalize, noabbrev]{cleveref}
\usepackage{algorithm}
\usepackage{algpseudocode}

\newenvironment{enumerate*}%
  {\vspace{-2ex} \begin{enumerate} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{enumerate}}
 
\newenvironment{itemize*}%
  {\vspace{-2ex} \begin{itemize} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{itemize}}
 
\newenvironment{description*}%
  {\vspace{-2ex} \begin{description} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{description}}

\DeclareMathOperator{\E}{\mathbb{E}}
\let\P\relax
\DeclareMathOperator{\P}{\mathbb{P}}
\newcommand{\ex}[1]{\E\left[#1\right]}
\newcommand{\prob}[1]{\P\left[#1\right]}
\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
\newcommand{\R}{\mathbf{R}}
\newcommand{\N}{\mathbf{N}}
\newcommand{\C}{\mathcal{C}}
\newcommand{\eps}{\varepsilon}
\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}}
\newcommand{\llbracket}{[\![}

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\algrenewcommand\algorithmicensure{\textbf{Output:}}
\algrenewcommand\algorithmicrequire{\textbf{Input:}}

\author{}
\title{}
\date{}

\begin{document}

\section{Probabilistic Model}
During the last decade, a growing body of work has focused on the graph
inference problem: how can an unknown graph be recovered from the observation
of cascades propagating along it? (CITE). While our probabilistic model for
cascades of violence strongly relies on this line of work, we depart from it in
the following ways:
\begin{itemize}
    \item in our case, the graph along which the cascades of violence propagate
        (the co-offending network) is known, and we are only interested in
        recovering (and ultimately predicting) the edges of the cascade, that
        is, identifying who influenced whom.
    \item the previous models treat multiple cascades as independent
        observations which occur sequentially. In our case, cascades can die
        and spawn at all time and temporally overlap. This need to be carefully
        accounted for in our generative model and the resulting inference
        algorithms.
\end{itemize}

\subsection{Pairwise Propagation}
\label{sec:edge}

For a pair $(u,v)$ of nodes, we model the propagation of the cascade
from $u$ to $v$ as the sequence of the following two probabilistic events:
\begin{enumerate}

    \item \emph{structural propagation:} a Bernouilli variable dictates whether
        or not a propagation occurs. The parameter of this Bernouilli variable
        is computed from the edge weights of the co-offending network as
        follows:
        \begin{equation}\label{eq:structural}
            p_{u,v}^s \eqdef \prod_{e\in P(u,v)}
            \frac{1}{1+e^{-\frac{w_e}{\lambda}}}
        \end{equation}
        where $P(u,v)$ denotes the shortest path from $u$ to $v$ and $\lambda$
        is the \emph{structural parameter}. That is, the weight $w_e$ of each
        edge $e$ on the  path from $u$ to $v$ is mapped to a probability
        $p_e\in [0,1]$ through the logistic function:
        \begin{displaymath}
            p_e \eqdef \frac{1}{1+e^{-\frac{w_e}{\lambda}}}
        \end{displaymath}
        Note that the probability of structural propagation both \emph{(a)}
        decreases as the distance between $u$ and $v$ in the co-offending
        network increases, and \emph{(b)} increases as the weights of the edges
        on the path from $u$ to $v$ increase. The structural parameter
        $\lambda$ controls how quickly the probability $p_e$ of a given edge
        converges to one as the weight $w_e$ increases.

    \item \emph{temporal propagation:} if the structural propagation occurs,
        then the propagation time $\Delta_{u,v}$ between $u$ and $v$ is drawn
        from a continuous distribution with support in $\R_+$. We denote by
        $f(t)$ the density function of this probability distribution and by
        $F(t)$ its cumulative function. Table TODO gives the expression of $f$
        and $F$ for the different temporal distributions considered in this
        work.

        In our data, the infection times are only observed with a temporal
        resolution of one day. As a consequence, for two nodes $u$ and $v$
        whose \emph{observed} infection times are respectively $t_u$ and $t_v$
        (days), the \emph{real} propagation time could be anything in the
    interval $(t_v-t_u-1, t_v-t_u]$. Hence, the probability of observing
    a propagation time $\Delta_{u,v} = t_v - t_u$ is given by:
    \begin{equation}\label{eq:temporal}
        p_{u,v}^t(\Delta_{u,v}) \eqdef \int_{\Delta_{u,v}-1}^{\Delta_{u,v}} f(t)dt
    \end{equation}
\end{enumerate}

\paragraph{Successful propagation.} For two nodes $u$ and $v$ with observed
infection times $t_u$ and $t_v$, the probability $p_{u,v}$ that $u$ infected
$v$ is obtained by multiplying the structural and temporal probabilities given
by \eqref{eq:structural} and \eqref{eq:temporal}:
\begin{displaymath}
    p_{u,v}(t_u, t_v)  \eqdef p_{u,v}^s\cdot p_{u,v}^t(\Delta_{u,v})
\end{displaymath}

\subsection{Failed Propagations and Spawning of New Cascades}
\label{sec:roots}

\paragraph{Failed propagation.}
When an edge fails to propagate the violence cascade, it could be either
because the structural propagation did not occur, or because the structural
propagation occurred, but the temporal propagation didn't occur in time.

For an edge $(u,v)$ with infection times $t_u$ and $t_v$, it follows directly
from Section~\ref{sec:edge} that structural propagation does not occur with
probability $1-p_{u,v}^s$.

If structural propagation occurs, but temporal propagation does not occur in
time, it means that node $v$ was infected at time $t_v$ (through other means)
\emph{before} $u$ succeeds in infecting $v$. That is, the propagation time was
larger than $\Delta_{u,v}$. This occurs with probability:
\begin{displaymath}
    \int_{\Delta_{u,v}}^{+\infty} f(t)dt = 1-F(\Delta_{u,v})
\end{displaymath}

Overall, the probability of a failed propagation over an edge $(u,v)$ is given
by:
\begin{displaymath}
    \tilde{p}_{u,v}(t_u, t_v)
    \eqdef 1 - p_{u,v}^s + p_{u,v}^s\left(1-F(\Delta_{u,v})\right)
    = 1 - p_{u,v}^s F(\Delta_{u,v})
\end{displaymath}

\paragraph{}For an edge $(u,v)$ where $u$ is a victim node and $v$ is a not
a victim, the situation is similar: either the structural propagation failed,
or it succeeded but the temporal propagation did not occur in time. In the
latter case, since we did not observe an infection at all for $v$, this means
that the propagation time between $u$ and $v$ was such that the infection time
of $v$ would have fallen after the end of the study period. Denoting by $T$ the
observation horizon (the end of the study period), the probability of a failed
propagation for an edge $(u,v)$ where $v$ is not a victim is:
\begin{displaymath}
    \tilde{p}_{u,v}(t_u, T) \eqdef 1-p_{u,v}^s F(T-t_u)
\end{displaymath}

\paragraph{Spawning of New Cascades.}

Our probabilistic model considers another cause of infection for victim nodes:
beyond pairwise propagation of the violence cascade, nodes can also
spontaneously become infected and spawn a new violence cascade. We simply model
this by a Bernouilli variable of parameter $\beta$: for any node, the
probability that it becomes infected spontaneously is $\beta$.

\subsection{The Directed Acyclic Vector of Infection}
\label{sec:dag}

It follows from Sections~\ref{sec:edge} and \ref{sec:roots} that \emph{(1)} no
infection can occur between two non victim nodes, \emph{(2)} no infection can
occur along an edge $(u,v)$ where $t_u > t_v$.

This suggests to orient and trim the co-offending network $G$ to construct the
following directed graph $D$ by applying the following rules:
\begin{enumerate}
    \item remove edges between non-victim nodes.
    \item orient edges from victim nodes to non-victim nodes.
    \item for any edge $(u,v)$ between two victims, orient it in the direction
        from smallest infection time to largest infection time.
\end{enumerate}

The resulting graph $D$ only contains edges along with the propagation of
violence can possibly occur. Furthermore the edges are oriented in the
direction of propagation.

It has been observed in (CITE) that $D$ is acyclic. For completeness we
reproduce their proof here and adapt it to account for non-victim nodes (which
do not exist in the original paper).

\begin{theorem}
The graph $D$ obtained from $G$ by applying rules 1. to 3. is acyclic.
\end{theorem}

\begin{proof}
    Assume by contradiction that $D$ contains a cycle $(u_1,\ldots, u_n)$ where
    $(u_1, u_2),\dots,(u_{n-1}, u_n), (u_n, u_1)$ are edges of $D$. Because of
    rule 1., all nodes $u_1,\dots, u_n$ are victim nodes.

    Applying rule 3. to all edges in the cycle implies that
    $t_1<t_2<\dots<t_n<t_1$. The two extremal terms of this chain of
    inequalities implie $t_1<t_1$ which is a contradiction.
\end{proof}

\subsection{Cascade Model}

Using Sections~\ref{sec:edge} to \ref{sec:dag}, we can now fully specify our
cascade model.

The violence propagation proceeds as follows: every time a node becomes
infected, it attempts to infect all its neighbors which are not infected yet
according to the pairwise model of Section~\ref{sec:edge}. In other words, at
a given time, a node is subjected to the ongoing infection attempts of all its
neighbors which have been infected in the past. Because of the temporal
component of the pairwise propagation probabilities, these attempts will
succeed at different times. The infection is attributed to the first neighbor
who succeeds.Furthermore, all the infection attempts are independent from each
other.  

\paragraph{Example.} Let us consider a node $v$ with $n$ neighbors $u_1,\dots,
u_n$. For simplicity, let us assume that the structural propagation
probabilities are the same and equal to one for all edges $(u_i, v)$. We also
assume that all neighbors of $v$ were infected at time $0$ and we use the
exponential temporal model with parameter $\alpha$. Then, the probability that
$v$ is infected at time larger than $t$ is given by:
\begin{displaymath}
    p_v(t) \eqdef \P\left[\min_{1\leq i\leq n} Z_i \geq t\right]
    = e^{-n\alpha t}
\end{displaymath}
where $Z_i$ is an exponential variable of parameter $\alpha$ and where we used
that the law of the minimum of exponential variables is also exponential with
parameter equal to the sum of the individual variables' parameters.

This example shows that the larger the neighborhood the more likely it is that
a node will be infected quickly. The parameter $\alpha n$ of the resulting
exponential variable captures the aggregated infection potential of the
neighborhood.

\paragraph{} Since the infection of a node $v$ is always attributed to the
first neighbor which succeeds in infecting it, a victim node
can have at either one parent (when it is infected by it) or no parent (when it
spawns a cascade spontaneously). Because the graph $D$ along which the cascades
of violence occur is acyclic, it follows that the violence propagates along $D$
by forming trees.

Formally, if we denote by $(V, E)$ the vertices and edges of $D$, and we write
$V = R\cup T$ where $R$ are the victim nodes and $T$ the non-victim nodes
($R\cap T=\emptyset$), then \emph{the violence cascades form a random forest
spanning $R$ in $D$}.

\paragraph{Likelihood.} Given the set of victim nodes $R$ and their infection
times, which we denote by the vector $\textbf{t}$, we can compute the
probability that the violence cascades propagated along the random forest
$(T_1,\dots, T_r)$ spanning $R$, where $T_1,\dots,T_r$ are trees:
\begin{displaymath}
    \P\left[(T_1,\dots, T_r)|\mathbf{t}\right]
\end{displaymath}


\section{Maximum Likelihood Inference}

\subsection{Finding the Cascades}

\subsection{Finding the Spawning Parameter}

\subsection{Finding the Pairwise Propagation Parameters}

\end{document}