aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2015-01-24 19:26:37 -0500
committerThibaut Horel <thibaut.horel@gmail.com>2015-01-24 19:26:37 -0500
commit38435205bff612b501f863b78cb91e7322e35594 (patch)
tree270ee93fd383c1fa249eaa3c3d33c0f0e9a20a1a
parent05f79327171003500015da2cc4974c6568d56061 (diff)
downloadcascades-38435205bff612b501f863b78cb91e7322e35594.tar.gz
Beginning of the model section
-rw-r--r--paper/paper.tex11
-rw-r--r--paper/sections/model.tex52
2 files changed, 61 insertions, 2 deletions
diff --git a/paper/paper.tex b/paper/paper.tex
index b677e6c..86ef9d8 100644
--- a/paper/paper.tex
+++ b/paper/paper.tex
@@ -46,6 +46,17 @@
% Therefore, a short form for the running title is supplied here:
\icmltitlerunning{Cracking Cascades: Sparse Recovery for Graph Inference}
+\usepackage{amsmath, amsfonts, amssymb}
+\newcommand{\reals}{\mathbb{R}}
+\newcommand{\ints}{\mathbb{N}}
+\DeclareMathOperator{\E}{\mathbb{E}}
+\let\P\relax
+\DeclareMathOperator{\P}{\mathbb{P}}
+\newcommand{\ex}[1]{\E\left[#1\right]}
+\newcommand{\prob}[1]{\P\left[#1\right]}
+\newcommand{\inprod}[2]{#1 \cdot #2}
+\newcommand{\defeq}{\equiv}
+
\begin{document}
\twocolumn[
diff --git a/paper/sections/model.tex b/paper/sections/model.tex
index 601ddeb..3c0a354 100644
--- a/paper/sections/model.tex
+++ b/paper/sections/model.tex
@@ -1,4 +1,52 @@
-\subsection{Independent Cascade Model}
+\subsection{Influence Cascades}
+
+We consider a weighted graph $G$ over the set of vertices $V$. The edge weights
+are given by an (unknown) weight matrix $\Theta:V^2\to\reals^+$. $\Theta_{i,j}
+= 0$ simply expresses that there is no edge from $i$ to $j$. We define $m=|V|$,
+and for $j\in V$, we write $\theta_j$ the $j$-th column of $\Theta$: this is
+the indicator vector of the parents of $j$ in $G$.
+
+An \emph{influence cascade} is a homogeneous discrete-time Markov process over
+$\{0,1\}^m$. More precisely, we represent the set of nodes infected at time
+step $t-1\in\ints^+$ by a vector $x^{t}\in\{0,1\}^{m}$ where $x^{t}_k = 1$ can
+be interpreted as \emph{node $k$ was infected at time step $t-1$}.
+
+We will restrict ourselves to influence processes such that for $j\in V$,
+$x_j^{t+1}$ is a Bernouilli variable whose parameter can be expressed as
+a function $f$ of $\inprod{\theta_j}{x^t}$, where $\inprod{a}{b}$ is the inner
+product of $a$ and $b$ in $\reals^m$:
+$\theta_j$ and $x^t$:
+\begin{equation}\label{eq:markov}
+ \P\big[x_j^{t+1} = 1\,|\, x^t\big] \defeq f(\inprod{\theta_j}{x^t}).
+\end{equation}
+As we will later see, this type of influence processes encompasses the most
+widely used influence processes: the Voter model (V), the Independent Cascade
+model (IC), and the Linear Threshold model (T).
+
+\subsection{Problem}
+
+Our goal is to infer the weight matrix $\Theta$ by observing influence
+cascades. Given the form of \eqref{eq:markov}, we will focus on a single row
+$\theta_j$ of $\Theta$ and we will write $y^t_j \defeq x_j^{t+1}$. When there
+is no ambiguity, we will simply write $y^t = y^t_j$ and $\theta= \theta_j$.
-\subsection{The Voter Model}
+The input of our problem is a set of observations $(x^1, y^1),\ldots,(x^n,
+y^n)$. If we observe more than one cascade, we simply concatenate the
+observations from each cascade to form a single longer cascade.
+Equation~\eqref{eq:markov} still holds for each observation.
+
+With these notations, the set of observations follows a Generalized Linear
+Model with Bernoulli distribution and link function $g$ such that $g^{-1}
+= f$. The log-likelihood of $\theta$ can be written:
+\begin{displaymath}
+ \mathcal{L}(\theta\,|\, x, y) = \sum_{t=1}^ny^t \log f(\inprod{\theta}{x^t})
+ + (1-y^t)\log \big(1-f(\inprod{\theta}{x^t})\big)
+\end{displaymath}
+
+\paragraph{Sparsity}
+
+
+\subsection{Voter Model}
+
+\subsection{Independent Cascade Model}