summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--theory/main.tex89
1 files changed, 73 insertions, 16 deletions
diff --git a/theory/main.tex b/theory/main.tex
index a20a140..5350d8d 100644
--- a/theory/main.tex
+++ b/theory/main.tex
@@ -38,6 +38,7 @@
\newtheorem{lemma}{Lemma}
\algrenewcommand\algorithmicensure{\textbf{Output:}}
\algrenewcommand\algorithmicrequire{\textbf{Input:}}
+\DeclareMathOperator*{\argmax}{arg\,max}
\author{}
\title{}
@@ -170,12 +171,13 @@ probability that it becomes infected spontaneously is $\beta$.
\subsection{The Directed Acyclic Vector of Infection}
\label{sec:dag}
-It follows from Sections~\ref{sec:edge} and \ref{sec:roots} that \emph{(1)} no
-infection can occur between two non victim nodes, \emph{(2)} no infection can
-occur along an edge $(u,v)$ where $t_u > t_v$.
+Given a vector of infection times denoted by $\textbf{t}$, it follows from
+Sections~\ref{sec:edge} and \ref{sec:roots} that \emph{(1)} no infection can
+occur between two non victim nodes, \emph{(2)} no infection can occur along an
+edge $(u,v)$ where $t_u > t_v$.
This suggests to orient and trim the co-offending network $G$ to construct the
-following directed graph $D$ by applying the following rules:
+following directed graph $D_{\mathbf{t}}$ by applying the following rules:
\begin{enumerate}
\item remove edges between non-victim nodes.
\item orient edges from victim nodes to non-victim nodes.
@@ -183,16 +185,17 @@ following directed graph $D$ by applying the following rules:
from smallest infection time to largest infection time.
\end{enumerate}
-The resulting graph $D$ only contains edges along with the propagation of
-violence can possibly occur. Furthermore the edges are oriented in the
-direction of propagation.
+The resulting graph $D_{\mathbf{t}}$ only contains edges along with the
+propagation of violence can possibly occur. Furthermore the edges are oriented
+in the direction of propagation.
It has been observed in (CITE) that $D$ is acyclic. For completeness we
reproduce their proof here and adapt it to account for non-victim nodes (which
do not exist in the original paper).
\begin{theorem}
-The graph $D$ obtained from $G$ by applying rules 1. to 3. is acyclic.
+ The graph $D_{\mathbf{t}}$ obtained from $G$ by applying rules 1. to 3. is
+ acyclic.
\end{theorem}
\begin{proof}
@@ -241,7 +244,7 @@ neighborhood.
\paragraph{} Since the infection of a node $v$ is always attributed to the
first neighbor which succeeds in infecting it, a victim node
-can have at either one parent (when it is infected by it) or no parent (when it
+can have either one parent (when it is infected by it) or no parent (when it
spawns a cascade spontaneously). Because the graph $D$ along which the cascades
of violence occur is acyclic, it follows that the violence propagates along $D$
by forming trees.
@@ -251,18 +254,72 @@ $V = R\cup T$ where $R$ are the victim nodes and $T$ the non-victim nodes
($R\cap T=\emptyset$), then \emph{the violence cascades form a random forest
spanning $R$ in $D$}.
-\paragraph{Likelihood.} Given the set of victim nodes $R$ and their infection
-times, which we denote by the vector $\textbf{t}$, we can compute the
-probability that the violence cascades propagated along the random forest
-$(T_1,\dots, T_r)$ spanning $R$, where $T_1,\dots,T_r$ are trees:
-\begin{displaymath}
- \P\left[(T_1,\dots, T_r)|\mathbf{t}\right]
-\end{displaymath}
+\paragraph{Likelihood.} Given pairwise parameters $(\alpha, \lambda, \beta)$
+and a random forest $\mathcal{T} = (T_1,\ldots, T_r)$, we can compute the
+probability of observing the infection times given by the vector $\textbf{t}$:
+\begin{equation}
+ \label{eq:model}
+ \P[\mathbf{t}\,|\, \mathcal{T}, \beta, \alpha, \lambda]
+ = \beta^r(1-\beta)^{n-r}\prod_{e\in E_{\mathcal{T}}} p_e
+ \prod_{e\in E_{D_{\mathcal{\mathbf{t}}}}\setminus E_{\mathcal{T}}}
+ \tilde{p}_e
+\end{equation}
+where $E_{\mathcal{T}}$ denotes the edge set of the forest $\mathcal{T}$ and
+$E_{D_{\mathbf{t}}}$ denotes the edge set of the DAG $D_{\mathbf{t}}$ defined
+in Section~\ref{sec:dag}.
\section{Maximum Likelihood Inference}
+In this section, we assume that we observe a vector of infection times
+$\mathbf{t}$ and we are interested in recovering the parameters $\mathcal{T},
+\beta, \alpha, \lambda$ maximizing the likelihood of our observations as given
+by the model \eqref{eq:model}. That is, we want to solve the following
+optimization problem:
+\begin{equation}
+ \label{eq:program}
+ \mathcal{T}^*, \beta^*, \alpha^*, \lambda^* \in
+ \argmax_{\mathcal{T},\beta,\alpha,\lambda}
+ \P[\mathbf{t}\,|\, \mathcal{T}, \beta, \alpha, \lambda]
+\end{equation}
+When solving this optimization program, the optimal forest $\mathcal{T}^*$ will
+indicate the most likely edges along which the infection could have propagated,
+while $\beta$, $\alpha$ and $\lambda$ will give us information about the
+temporal and structural scale of pairwise infection.
+
\subsection{Finding the Cascades}
+We first assume that $\beta$, $\alpha$ and $\lambda$ are fixed and solve
+\eqref{eq:program} for $\mathcal{T}$:
+\begin{displaymath}
+ \mathcal{T}^*\in\argmax_{\mathcal{T}}
+ \beta^r(1-\beta)^{n-r}\prod_{e\in E_{\mathcal{T}}} p_e
+ \prod_{e\in E_{D_{\mathcal{\mathbf{t}}}}\setminus E_{\mathcal{T}}}
+ \tilde{p}_e
+\end{displaymath}
+Note that the solution $\mathcal{T}^*$ depends on the value of $(\beta, \alpha,
+\lambda)$. We will then optimize over these parameters in the following
+sections.
+
+By multiplying and dividing each term by $\tilde{p}_e$ in the first product, we
+can rewrite the objective function:
+\begin{displaymath}
+ \mathcal{T}^*\in\argmax_{\mathcal{T}}
+ \left(\frac{\beta}{1-\beta}\right)^r(1-\beta)^n
+ \prod_{e\in E_{\mathcal{T}}} \frac{p_e}{\tilde{p}_e}
+ \prod_{e\in E_{D_{\mathcal{\mathbf{t}}}}} \tilde{p}_e
+\end{displaymath}
+Note that now, the last product, as well as the $(1-\beta)^n$ do not depend on
+the choice of $\mathcal{T}$ and can be dropped without changing the
+optimization program. Then, by taking the $\log$, we obtain the following
+equivalent optimization program:
+\begin{displaymath}
+ \mathcal{T}^*\in\argmax_{\mathcal{T}}\; r\log\frac{\beta}{1-\beta}
+ +\sum_{e\in E_{\mathcal{T}}} \log\frac{p_e}{\tilde{p}_e}
+\end{displaymath}
+
+
+
+
\subsection{Finding the Spawning Parameter}
\subsection{Finding the Pairwise Propagation Parameters}