1 files changed, 51 insertions, 40 deletions
diff --git a/paper/sections/model.tex b/paper/sections/model.tex
index f4bb4c5..19d7506 100644
--- a/paper/sections/model.tex
+++ b/paper/sections/model.tex
@@ -2,8 +2,9 @@ We consider a graph ${\cal G}= (V, E, \Theta)$, where $\Theta$ is a $|V|\times
 |V|$ matrix of parameters describing the edge weights of $\mathcal{G}$.
 Intuitively, $\Theta_{i,j}$ captures the ``influence'' of node $i$ on node $j$.
 Let $m\defeq |V|$. For each node $j$, let $\theta_{j}$ be the $j^{th}$ column
-vector of $\Theta$.  A \emph{Cascade model} is a Markov process over a finite
-state space $\{0, 1, \dots, K-1\}^V$ with the following properties:
+vector of $\Theta$.  A {\color{red} discrete-time} \emph{Cascade model} is a
+Markov process over a finite state space ${\{0, 1, \dots, K-1\}}^V$ with the
+following properties:
 \begin{enumerate}
     \item Conditioned on the previous time step, the transition events between
         two states in $\{0,1,\dots, K-1\}$ for each $i \in V$ are mutually
@@ -12,20 +13,23 @@ state space $\{0, 1, \dots, K-1\}^V$ with the following properties:
     that all transition probabilities of the Markov process are either constant
     or can be expressed as a function of the graph parameters $\Theta$ and the
     set of ``contagious nodes'' at the previous time step.
-\item The initial probability over $\{0, 1, \dots, K-1\}^V$ is such that all nodes can eventually reach a \emph{contagious state}
-    with non-zero probability. The ``contagious'' nodes at $t=0$ are called
+  \item The initial probability over ${\{0, 1, \dots, K-1\}}^V$ is such that all
+  nodes can eventually reach a \emph{contagious state} with non-zero
+  probability. The ``contagious'' nodes at $t=0$ are called
     \emph{source nodes}.
 \end{enumerate}
 
 In other words, a cascade model describes a diffusion process where a set of
-contagious nodes ``influence'' other nodes in the graph to become contagious. An \emph{influence cascade} is a realisation of this random process, \emph{i.e.} the successive states of the nodes in graph ${\cal G}$. Note that both
-the ``single source'' assumption made in \cite{Daneshmand:2014} and
-\cite{Abrahao:13} as well as the ``uniformly chosen source set'' assumption
-made in \cite{Netrapalli:2012} verify condition 3.
+contagious nodes ``influence'' other nodes in the graph to become contagious. An
+\emph{influence cascade} is a realisation of this random process, \emph{i.e.}
+the successive states of the nodes in graph ${\cal G}$. Note that both the
+``single source'' assumption made in~\cite{Daneshmand:2014} and
+\cite{Abrahao:13} as well as the ``uniformly chosen source set'' assumption made
+in~\cite{Netrapalli:2012} verify condition 3.
 
-In the context of Graph Inference, \cite{Netrapalli:2012} focus
+In the context of Graph Inference,~\cite{Netrapalli:2012} focus
 on the well-known discrete-time independent cascade model recalled below, which
-\cite{Abrahao:13} and \cite{Daneshmand:2014} generalize to continuous time. We
+\cite{Abrahao:13} and~\cite{Daneshmand:2014} generalize to continuous time. We
 extend the independent cascade model in a different direction by considering
 a more general class of transition probabilities while staying in the
 discrete-time setting. We observe that despite their obvious differences, both
@@ -51,14 +55,14 @@ becoming ``contagious'' at time step $t+1$ conditioned on $X^t$ is a Bernoulli
 variable of parameter $f(\theta_j \cdot X^t)$:
 \begin{equation}
     \label{eq:glm}
-    \mathbb{P}(X^{t+1}_j = 1|X^t) 
+    \mathbb{P}(X^{t+1}_j = 1|X^t)
     = f(\theta_j \cdot X^t)
 \end{equation}
 where $f: \reals \rightarrow [0,1]$
 \end{definition}
 
 In other words, each generalized linear cascade provides, for each node $j \in
-V$ a series of measurements $(X^t, X^{t+1}_j)_{t \in {\cal T}_j}$ sampled from
+V$ a series of measurements ${(X^t, X^{t+1}_j)}_{t \in {\cal T}_j}$ sampled from
 a generalized linear model. Note also that $\E[X^{t+1}_i\,|\,X^t]
 = f(\inprod{\theta_i}{X^t})$. As such, $f$ can be interpreted as the inverse
 link function of our generalized linear cascade model.
@@ -70,7 +74,7 @@ link function of our generalized linear cascade model.
 % cascade.
 
 % \begin{definition}
-%     \label{def:glcm} 
+%     \label{def:glcm}
 %     Let us denote by $\{\mathcal{F}_t, t\in\ints\}$ the natural
 %     filtration induced by $\{X_t, t\in\ints\}$. A \emph{generalized linear
 %     cascade} is characterized by the following equation:
@@ -99,17 +103,17 @@ In the independent cascade model, nodes can be either susceptible, contagious
 or immune. At $t=0$, all source nodes are ``contagious'' and all remaining
 nodes are ``susceptible''. At each time step $t$, for each edge $(i,j)$ where
 $j$ is susceptible and $i$ is contagious, $i$ attempts to infect $j$ with
-probability $p_{i,j}\in(0,1]$; the infection attempts are mutually independent.
+probability $p_{i,j}\in]0,1]$; the infection attempts are mutually independent.
 If $i$ succeeds, $j$ will become contagious at time step $t+1$. Regardless of
 $i$'s success, node $i$ will be immune at time $t+1$. In other words, nodes
 stay contagious for only one time step. The cascade process terminates when no
 contagious nodes remain.
 
-If we denote by $X^t$ the indicator variable of the set of contagious nodes at time
-step $t$, then if $j$ is susceptible at time step $t+1$, we have:
+If we denote by $X^t$ the indicator variable of the set of contagious nodes at
+time step $t$, then if $j$ is susceptible at time step $t+1$, we have:
 \begin{displaymath}
     \P\big[X^{t+1}_j = 1\,|\, X^{t}\big]
-    = 1 - \prod_{i = 1}^m (1 - p_{i,j})^{X^t_i}.
+    = 1 - \prod_{i = 1}^m {(1 - p_{i,j})}^{X^t_i}.
 \end{displaymath}
 Defining $\Theta_{i,j} \defeq \log(1-p_{i,j})$, this can be rewritten as:
 \begin{equation}\label{eq:ic}
@@ -124,7 +128,8 @@ with inverse link function $f : z \mapsto 1 - e^z$.
 
 \subsubsection{The Linear Voter Model}
 
-In the Linear Voter Model, nodes can be either \emph{red} or \emph{blue}. Without loss of generality, we can suppose that the
+In the Linear Voter Model, nodes can be either \emph{red} or \emph{blue}.
+Without loss of generality, we can suppose that the
 \emph{blue} nodes are contagious. The parameters of the graph are normalized
 such that $\forall i, \ \sum_j \Theta_{i,j} = 1$ and we assume that
 $\Theta_{i,i}$ is always non-zero, meaning that all nodes have self-loops.
@@ -134,7 +139,8 @@ cascades stops at a fixed horizon time $T$ or if all nodes are of the same color
 If we denote by $X^t$ the indicator variable of the set of blue nodes at time
 step $t$, then we have:
 \begin{equation}
-\mathbb{P}\left[X^{t+1}_j = 1 | X^t \right] = \sum_{i=1}^m \Theta_{i,j} X_i^t = \inprod{\Theta_j}{X^t}
+\mathbb{P}\left[X^{t+1}_j = 1 | X^t \right] = \sum_{i=1}^m \Theta_{i,j} X_i^t =
+\inprod{\Theta_j}{X^t}
 \tag{V}
 \end{equation}
 
@@ -143,25 +149,27 @@ with inverse link function $f: z \mapsto z$.
 
 % \subsection{The Linear Threshold Model}
 
-% In the Linear Threshold Model, each node $j\in V$ has a threshold $t_j$ from the interval $[0,1]$ and for each node, the sum of incoming weights is less than $1$: $\forall j\in V$, $\sum_{i=1}^m \Theta_{i,j} \leq 1$. 
+% In the Linear Threshold Model, each node $j\in V$ has a threshold $t_j$ from
+% the interval $[0,1]$ and for each node, the sum of incoming weights is less
+% than $1$: $\forall j\in V$, $\sum_{i=1}^m \Theta_{i,j} \leq 1$.
 
-% Nodes have two states: susceptible or contagious. At each time step, each susceptible
-% node $j$ becomes contagious if the sum of the incoming weights from contagious parents is greater than $j$'s threshold $t_j$. Nodes remain contagious until the end of the cascade, which is reached when no new susceptible nodes become contagious.
+% Nodes have two states: susceptible or contagious. At each time step, each
+% susceptible node $j$ becomes contagious if the sum of the incoming weights
+% from contagious parents is greater than $j$'s threshold $t_j$. Nodes remain
+% contagious until the end of the cascade, which is reached when no new
+% susceptible nodes become contagious.
 
-% As such, the source nodes are chosen, the process is entirely deterministic. Let $X^t$ be the indicator variable of the set of contagious nodes at time step $t-1$, then:
-% \begin{equation}
-% \nonumber
-%     X^{t+1}_j = \mathbbm{1}_{\sum_{i=1}^m \Theta_{i,j}X^t_i > t_j}
-%     = \mathbbm{1}_{\inprod{\theta_j}{X^t} > t_j}
-% \end{equation}
-% where we defined again $\theta_j\defeq (\Theta_{1,j},\ldots,\Theta_{m,j})$. In other words, for every step in the linear threshold model, we observe the following signal:
+% As such, the source nodes are chosen, the process is entirely deterministic.
+% Let $X^t$ be the indicator variable of the set of contagious nodes at time
+% step $t-1$, then: \begin{equation} \nonumber X^{t+1}_j =
+% \mathbbm{1}_{\sum_{i=1}^m \Theta_{i,j}X^t_i > t_j} =
+% \mathbbm{1}_{\inprod{\theta_j}{X^t} > t_j} \end{equation} where we defined
+% again $\theta_j\defeq (\Theta_{1,j},\ldots,\Theta_{m,j})$. In other words, for
+% every step in the linear threshold model, we observe the following signal:
 
-% \begin{equation}
-%     \label{eq:lt}
-%     \tag{LT}
-%     \mathbb{P} \left[X^{t+1}_j = 1 | X^t\right] = \text{sign} \left(\inprod{\theta_j}{X^t} - t_j \right)
-% \end{equation}
-% where ``sign'' is the function $\mathbbm{1}_{\cdot > 0}$.
+% \begin{equation} \label{eq:lt} \tag{LT} \mathbb{P} \left[X^{t+1}_j = 1 |
+% X^t\right] = \text{sign} \left(\inprod{\theta_j}{X^t} - t_j \right)
+% \end{equation} where ``sign'' is the function $\mathbbm{1}_{\cdot > 0}$.
 
 
 
@@ -182,8 +190,8 @@ $\Theta$ via Maximum Likelihood Estimation (MLE). Denoting by $\mathcal{L}$ the
 log-likelihood function, we consider the following $\ell_1$-regularized MLE
 problem:
 \begin{displaymath}
-    \hat{\Theta} \in \argmax_{\Theta} \frac{1}{n} \mathcal{L}(\Theta\,|\,x^1,\ldots,x^n)
-    - \lambda\|\Theta\|_1
+    \hat{\Theta} \in \argmax_{\Theta} \frac{1}{n}
+    \mathcal{L}(\Theta\,|\,x^1,\ldots,x^n) - \lambda\|\Theta\|_1
 \end{displaymath}
 where $\lambda$ is the regularization factor which helps preventing
 overfitting and controls the sparsity of the solution.
@@ -197,10 +205,12 @@ $\Theta$ can be estimated by a separate optimization program:
     \hat{\theta}_i \in \argmax_{\theta} \mathcal{L}_i(\theta_i\,|\,x^1,\ldots,x^n)
     - \lambda\|\theta_i\|_1
 \end{equation}
-where we denote by ${\cal T}_i$ the time steps at which node $i$ is susceptible and:
+where we denote by ${\cal T}_i$ the time steps at which node $i$ is susceptible
+and:
 \begin{multline*}
-    \mathcal{L}_i(\theta_i\,|\,x^1,\ldots,x^n) = \frac{1}{|{\cal T}_i|} \sum_{t\in {\cal T}_i } x_i^{t+1}\log f(\inprod{\theta_i}{x^{t}}) \\
-    + (1 - x_i^{t+1})\log\big(1-f(\inprod{\theta_i}{x^t})\big)
+    \mathcal{L}_i(\theta_i\,|\,x^1,\ldots,x^n) = \frac{1}{|{\cal T}_i|}
+    \sum_{t\in {\cal T}_i } x_i^{t+1}\log f(\inprod{\theta_i}{x^{t}}) \\ + (1 -
+    x_i^{t+1})\log\big(1-f(\inprod{\theta_i}{x^t})\big)
 \end{multline*}
 
 In the case of the voter model, the measurements include all time steps until
@@ -217,3 +227,4 @@ a twice-differentiable function $f$ is log concave iff. $f''f \leq f'^2$. It is
 easy to verify this property for $f$ and $(1-f)$ in the Independent Cascade
 Model and Voter Model.
 
+{\color{red} TODO: talk about the different constraints}