aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--paper/sections/model.tex145
-rw-r--r--src/algorithms.py16
-rw-r--r--src/convex_optimization.py48
-rw-r--r--src/make_plots.py10
4 files changed, 121 insertions, 98 deletions
diff --git a/paper/sections/model.tex b/paper/sections/model.tex
index e35aa4a..44236da 100644
--- a/paper/sections/model.tex
+++ b/paper/sections/model.tex
@@ -1,65 +1,55 @@
-We consider a graph ${\cal G}= (V, E, \Theta)$, where $\Theta$ is a $|V|\times
-|V|$ matrix of paremeters describing the edge weights of $\mathcal{G}$. We
-define $m\defeq |V|$. A \emph{cascade model} is a Markov process over the
-finite state space $\{0, 1, \dots, K\}^V$. An \emph{influence cascade} is
-a realisation of the random process described by a cascade model.
+We consider a graph ${\cal G}= (V, E, \Theta)$, where $\Theta$ is a $|V|\times |V|$ matrix of parameters describing the edge weights of $\mathcal{G}$. Let $m\defeq |V|$. For each node $j$, let $\Theta_{j}$ be the $j^{th}$ column vector of $\Theta$. Intuitively, $\Theta_{i,j}$ captures the ``influence'' of node $i$ on node $j$. A \emph{Cascade model} is a Markov process over a finite state space $\{0, 1, \dots, K-1\}^V$ with the following properties:
+\begin{enumerate}
+\item Conditioned on the previous time step, the transition probabilities for each node $i \in V$ are mutually independent.
+\item Of the $K$ possible states, there exists a \emph{contagious state} such that all transition probabilities of the Markov process can be expressed as a function of the graph parameters $\Theta$ and the set of ``contagious nodes'' at the previous time step.
+\item The initial probability over $\{0, 1, \dots, K-1\}^V$ of the Cascade Model is such that all nodes can eventually reach a \emph{contagious state} with non-zero probability. The ``contagious'' nodes at $t=0$ are called \emph{source nodes}.
+\end{enumerate}
-In practice, we restrict ourselves to discrete-time homogeneous cascade models.
-At $t=0$, each node in the graph has a constant probability $p_{init}$ of being
-in the ``active state''. The active nodes at $t=0$ are called the source nodes.
-Our probabilistic model for the source nodes is more realistic and less
-restrictive than the ``single source'' assumption made in \cite{Daneshmand:2014}
-and \cite{Abrahao:13}.
+In other words, a cascade model describes a diffusion process, whereby a set of contagious nodes ``influence'' other nodes in the graph to adopt or not their behavior. An \emph{influence cascade} is a realisation of this random process: the successive states of the nodes in graph ${\cal G}$. Note that both the ``single source'' assumption made in \cite{Daneshmand:2014} and \cite{Abrahao:13} as well as the ``uniformly chosen source set'' assumption made in \cite{Netrapalli:2012} verify condition 3.
+In the context of Graph Inference, \cite{Netrapalli:2012} focuses specifically on the well-known discrete-time independent cascade model recalled below, which \cite{Abrahao:13} and \cite{Daneshmand:2014} generalize to continuous time. Whilst we restrict ourselves to discrete-time processes, we consider both the independent cascade model and the linear voter model, and show they can be solved by a very similar maximum-likelihood program. The linear threshold model is a special case and is discussed in Section~\ref{sec:linear_threshold}.
-\subsection{Generalized Linear Cascade Models}
-
-Denoting by $X^t$ the state of the cascade at time step $t$, we interpret
-$X^t_j = 1$ for node $j\in V$ as ``node $j$ is active'', \emph{i.e}, ``$j$
-exhibits the source nodes' behavior at time step $t$''. We draw inspiration
-from \emph{generalized linear models} (GLM) to define a generalized linear
-cascade.
+% Denoting by $X^t$ the state of the cascade at time step $t$, we interpret
+% $X^t_j = 1$ for node $j\in V$ as ``node $j$ is contagious'', \emph{i.e}, ``$j$
+% exhibits the source nodes' behavior at time step $t$''. We draw inspiration
+% from \emph{generalized linear models} (GLM) to define a generalized linear
+% cascade.
-\begin{definition}
- \label{def:glcm}
- Let us denote by $\{\mathcal{F}_t, t\in\ints\}$ the natural
- filtration induced by $\{X_t, t\in\ints\}$. A \emph{generalized linear
- cascade} is characterized by the following equation:
- \begin{displaymath}
- \P[X^{t+1}=x\,|\, \mathcal{F}_t] =
- \prod_{i=1}^m f(\inprod{\theta_i}{X^{t}})^{x_i}
- \big(1-f(\inprod{\theta_i}{X^{t}}\big)^{1-x_i}
- \end{displaymath}
-where $f:\mathbb{R}\to[0,1]$ and $\theta_i$ is the $i$-th column of $\Theta$.
-\end{definition}
+% \begin{definition}
+% \label{def:glcm}
+% Let us denote by $\{\mathcal{F}_t, t\in\ints\}$ the natural
+% filtration induced by $\{X_t, t\in\ints\}$. A \emph{generalized linear
+% cascade} is characterized by the following equation:
+% \begin{displaymath}
+% \P[X^{t+1}=x\,|\, \mathcal{F}_t] =
+% \prod_{i=1}^m f(\inprod{\theta_i}{X^{t}})^{x_i}
+% \big(1-f(\inprod{\theta_i}{X^{t}}\big)^{1-x_i}
+% \end{displaymath}
+% where $f:\mathbb{R}\to[0,1]$ and $\theta_i$ is the $i$-th column of $\Theta$.
+% \end{definition}
-It follows immediately from this definition that a generalized linear cascade
-satisfies the Markov property:
-\begin{displaymath}
- \P[X^{t+1}=x\,|\mathcal{F}_t] = \P[X^{t+1}=x\,|\, X^t]
-\end{displaymath}
-Note also that $\E[X^{t+1}_i\,|\,X^t] = f(\inprod{\theta_i}{X^t})$. As such,
-$f$ can be interpreted as the inverse link function of our generalized linear
-cascade model.
+% It follows immediately from this definition that a generalized linear cascade
+% satisfies the Markov property:
+% \begin{displaymath}
+% \P[X^{t+1}=x\,|\mathcal{F}_t] = \P[X^{t+1}=x\,|\, X^t]
+% \end{displaymath}
+% Note also that $\E[X^{t+1}_i\,|\,X^t] = f(\inprod{\theta_i}{X^t})$. As such,
+% $f$ can be interpreted as the inverse link function of our generalized linear
+% cascade model.
-\subsection{Examples}
-\label{subsec:examples}
+\subsection{Independent Cascade Model}
-In this section, we show that the well-known Independent Cascade Model and the Voter model are Generalized Linear Cascades. The Linear Threshold model will be discussed in Section~\ref{sec:linear_threshold}.
-
-\subsubsection{Independent Cascade Model}
-
-In the independent cascade model, nodes can be either susceptible, active or
-inactive. At $t=0$, all source nodes are ``active'' and all remaining nodes are
+In the independent cascade model, nodes can be either susceptible, contagious or
+immune. At $t=0$, all source nodes are ``contagious'' and all remaining nodes are
``susceptible''. At each time step $t$, for each edge $(i,j)$ where $j$ is
-susceptible and $i$ is active, $i$ attempts to infect $j$ with probability
+susceptible and $i$ is contagious, $i$ attempts to infect $j$ with probability
$p_{i,j}\in[0,1]$, the infection attempts are independent of each other. If $i$
-succeeds, $j$ will become active at time step $t+1$. Regardless of $i$'s
-success, node $i$ will be inactive at time $t+1$. In other words, nodes stay
-active for only one time step. The cascade process terminates when no active
+succeeds, $j$ will become contagious at time step $t+1$. Regardless of $i$'s
+success, node $i$ will be immune at time $t+1$. In other words, nodes stay
+contagious for only one time step. The cascade process terminates when no contagious
nodes remain.
-If we denote by $X^t$ the indicator variable of the set of active nodes at time
+If we denote by $X^t$ the indicator variable of the set of contagious nodes at time
step $t$, then if $j$ is susceptible at time step $t+1$, we have:
\begin{displaymath}
\P\big[X^{t+1}_j = 1\,|\, X^{t}\big]
@@ -72,24 +62,57 @@ Defining $\Theta_{i,j} \defeq \log(1-p_{i,j})$, this can be rewritten as:
= 1 - \prod_{i = 1}^m e^{\Theta_{i,j}X^t_i}
= 1 - e^{\inprod{\theta_j}{X^t}}
\end{equation}
-which is a Generalized Linear Cascade model with inverse link function $f(z)
-= 1 - e^z$.
-\subsubsection{The Voter Model}
+\subsection{The Linear Voter Model}
-In the Voter Model, nodes can be either red or blue, where ``blue'' is the
-source state. The parameters of the graph are normalized such that $\forall i,
+In the Linear Voter Model, nodes can be either \emph{red} or \emph{blue}. Both states verify the properties of a contagious state, but for ease of presentation, we will suppose that the contagious nodes are the \emph{blue}. The parameters of the graph are normalized such that $\forall i,
\ \sum_j \Theta_{i,j} = 1$. Each round, every node $j$ independently chooses
-one of its neighbors with probability $\Theta_ij$ and adopts their color. The
+one of its neighbors with probability $\Theta_{ij}$ and adopts their color. The
cascades stops at a fixed horizon time T or if all nodes are of the same color.
If we denote by $X^t$ the indicator variable of the set of blue nodes at time
step $t$, then we have:
\begin{equation}
-\mathbb{P}\left[X^{t+1}_j = 1 | X^t \right] = \sum_{i=1}^m \Theta_{i,j} X_i^t = \inprod{\theta_j}{X^t}
+\mathbb{P}\left[X^{t+1}_j = 1 | X^t \right] = \sum_{i=1}^m \Theta_{i,j} X_i^t = \inprod{\Theta_j}{X^t}
\tag{V}
\end{equation}
-which is a Generalized Linear Cascade model with inverse link function
-$f(z) = z$.
+
+% \subsection{The Linear Threshold Model}
+
+% In the Linear Threshold Model, each node $j\in V$ has a threshold $t_j$ from the interval $[0,1]$ and for each node, the sum of incoming weights is less than $1$: $\forall j\in V$, $\sum_{i=1}^m \Theta_{i,j} \leq 1$.
+
+% Nodes have two states: susceptible or contagious. At each time step, each susceptible
+% node $j$ becomes contagious if the sum of the incoming weights from contagious parents is greater than $j$'s threshold $t_j$. Nodes remain contagious until the end of the cascade, which is reached when no new susceptible nodes become contagious.
+
+% As such, the source nodes are chosen, the process is entirely deterministic. Let $X^t$ be the indicator variable of the set of contagious nodes at time step $t-1$, then:
+% \begin{equation}
+% \nonumber
+% X^{t+1}_j = \mathbbm{1}_{\sum_{i=1}^m \Theta_{i,j}X^t_i > t_j}
+% = \mathbbm{1}_{\inprod{\theta_j}{X^t} > t_j}
+% \end{equation}
+% where we defined again $\theta_j\defeq (\Theta_{1,j},\ldots,\Theta_{m,j})$. In other words, for every step in the linear threshold model, we observe the following signal:
+
+% \begin{equation}
+% \label{eq:lt}
+% \tag{LT}
+% \mathbb{P} \left[X^{t+1}_j = 1 | X^t\right] = \text{sign} \left(\inprod{\theta_j}{X^t} - t_j \right)
+% \end{equation}
+% where ``sign'' is the function $\mathbbm{1}_{\cdot > 0}$.
+
+\subsection{Generalized Linear Cascade Models}
+\label{sec:GLC}
+
+Despite their obvious differences, both models share a common similarity: conditioned on node $j$ being susceptible at time step $t$, the probability that node $j$ becomes infected at time step $t+1$ is a bernoulli of parameter $f(\Theta_j \cdot X^t)$, where $f:\mathbb{R} \rightarrow [0,1]$. In the case of the voter model, $f_{\text{V}}$ is the identify function, and in the case of the independent cascade model $f_{\text{ICC}} : z \rightarrow 1 - e^z$. We draw inspiration from generalized linear models to introduce \emph{Generalized Linear Cascades}:
+
+\begin{definition}
+\label{def:glcm}
+Let us denote by $\{\mathcal{F}_t, t\in\ints\}$ the natural filtration induced by the state of ${\cal G}$ up to time step t. A \emph{generalized linear cascade} is a cascade model characterized by the following equation:
+\begin{displaymath}
+ \P[X^{t+1}=x\,|\, \mathcal{F}_t] =
+ \prod_{i=1}^m f_{X^t_i,x_i}(\Theta_i \cdot X^t)
+\end{displaymath}
+where $f_{s,p}:\mathbb{R}\to[0,1]$ for each state pair $(s,p) \in \{0, \dots, K-1 \}$
+\end{definition}
+
\subsection{Maximum Likelihood Estimation}
@@ -124,7 +147,7 @@ $\Theta$ can be estimated by a separate optimization program:
\hat{\theta}_i \in \argmax_{\theta}\frac{1}{n_i}\mathcal{L}_i(\theta_i\,|\,x^1,\ldots,x^n)
+ \lambda\|\theta_i\|_1
\end{equation}
-where we denote by $n_i$ the first step at which node $i$ becomes active and
+where we denote by $n_i$ the first step at which node $i$ becomes contagious and
where:
\begin{multline}
\mathcal{L}_i(\theta_i\,|\,x^1,\ldots,x^n) = \frac{1}{n_i}
diff --git a/src/algorithms.py b/src/algorithms.py
index 926d9bc..9ebccf1 100644
--- a/src/algorithms.py
+++ b/src/algorithms.py
@@ -31,7 +31,7 @@ def greedy_prediction(G, cascades):
return G_hat
-def recovery_l1obj_l2constraint(G, cascades, floor_cstt, passed_function,
+def recovery_passed_function(G, cascades, floor_cstt, passed_function,
*args, **kwargs):
"""
Returns estimated graph from convex program specified by passed_function
@@ -40,11 +40,13 @@ def recovery_l1obj_l2constraint(G, cascades, floor_cstt, passed_function,
G_hat = cascade_creation.InfluenceGraph(max_proba=None)
G_hat.add_nodes_from(G.nodes())
+ f_x, f_xz = passed_function(*args, **kwargs)
+
for node in G_hat.nodes():
print(node)
try:
M, w = cascade_creation.icc_matrixvector_for_node(cascades, node)
- p_node, __ = passed_function(M,w, *args, **kwargs)
+ p_node, _ = convex_optimization.diff_and_opt(M, w, f_x, f_xz)
G_hat = cascade_creation.add_edges_from_proba_vector(G=G_hat,
p_node=p_node, node=node, floor_cstt=floor_cstt)
except timeout.TimeoutError:
@@ -86,15 +88,15 @@ def test():
unit test
"""
G = cascade_creation.InfluenceGraph(max_proba = .8)
- G.erdos_init(n = 10, p = .2)
+ G.erdos_init(n = 100, p = .2)
import time
t0 = time.time()
A = cascade_creation.generate_cascades(G, .2, 1000)
- if 1:
- G_hat = greedy_prediction(G, A)
if 0:
- G_hat = recovery_l1obj_l2constraint(G, A,
- passed_function=convex_optimization.l1obj_l2penalization,
+ G_hat = greedy_prediction(G, A)
+ if 1:
+ G_hat = recovery_passed_function(G, A,
+ passed_function=convex_optimization.type_lasso,
floor_cstt=.1, lbda=10)
correctness_measure(G, G_hat, print_values=True)
diff --git a/src/convex_optimization.py b/src/convex_optimization.py
index 1f34508..163b6d5 100644
--- a/src/convex_optimization.py
+++ b/src/convex_optimization.py
@@ -44,55 +44,52 @@ def sparse_recovery(M_val, w_val, lbda):
return diff_and_opt(theta, theta_, M, M_val, w, lbda, y)
-@timeout.timeout(20)
-def type_lasso(M_val, w_val, lbda):
+def type_lasso(lbda):
"""
Solves:
min - sum_j theta_j + lbda*|e^{M*theta} - (1 - w)|_2
s.t theta_j <= 0
"""
- assert len(M_val) == len(w_val)
-
- if M_val.dtype == bool:
- M_val = M_val.astype('float32')
-
if type(lbda) == int or type(lbda) == float:
lbda = np.array(lbda)
+ lbda = theano.shared(lbda.astype(theano.config.floatX))
+
theta = tensor.row().T
theta_ = theta.flatten()
- M = theano.shared(M_val.astype(theano.config.floatX))
- w = theano.shared(w_val.astype(theano.config.floatX))
- lbda = theano.shared(lbda.astype(theano.config.floatX))
+ M = tensor.matrix(name='M', dtype=theano.config.floatX)
+ w = tensor.vector(name='w', dtype=theano.config.floatX)
y = lbda * (theta_).norm(1) + (
tensor.exp(M.dot(theta_)) - (1 - w)).norm(2)
- return diff_and_opt(theta, theta_, M, M_val, w, lbda, y)
-
-
-def diff_and_opt(theta, theta_, M, M_val, w, lbda, y):
z = tensor.row().T
z_ = z.flatten()
- m, n = M_val.shape
-
y_diff = tensor.grad(y, theta_)
y_hess = z[0] * theano.gradient.hessian(y, theta_)
- f_x = theano.function([theta], [y, y_diff], allow_input_downcast=True)
- f_xz = theano.function([theta, z], [y, y_diff, y_hess],
- allow_input_downcast=True)
+ f_x = theano.function([theta, M, w], [y, y_diff],
+ allow_input_downcast=True)
+ f_xz = theano.function([theta, z, M, w], [y, y_diff, y_hess],
+ allow_input_downcast=True)
+
+ return f_x, f_xz
+
+@timeout.timeout(8)
+def diff_and_opt(M_val, w_val, f_x, f_xz):
+
+ m, n = M_val.shape
def F(x=None, z=None):
if x is None:
return 0, cvxopt.matrix(-.001, (n,1))
elif z is None:
- y, y_diff = f_x(x)
+ y, y_diff = f_x(x, M_val, w_val)
return cvxopt.matrix(float(y), (1, 1)),\
cvxopt.matrix(y_diff.astype("float64")).T
else:
- y, y_diff, y_hess = f_xz(x, z)
+ y, y_diff, y_hess = f_xz(x, z, M_val, w_val)
return cvxopt.matrix(float(y), (1, 1)), \
cvxopt.matrix(y_diff.astype("float64")).T, \
cvxopt.matrix(y_hess.astype("float64"))
@@ -104,7 +101,7 @@ def diff_and_opt(theta, theta_, M, M_val, w, lbda, y):
#cvxopt.solvers.options['feastol'] = 2e-5
#cvxopt.solvers.options['abstol'] = 2e-5
#cvxopt.solvers.options['maxiters'] = 100
- cvxopt.solvers.options['show_progress'] = False
+ cvxopt.solvers.options['show_progress'] = True
try:
theta = cvxopt.solvers.cp(F, G, h)['x']
except ArithmeticError:
@@ -128,13 +125,14 @@ def test():
M_val, w_val = cascade_creation.icc_matrixvector_for_node(A, 0)
#Type lasso
- if 0:
- p_vec, theta = type_lasso(M_val, w_val, lbda)
+ if 1:
+ f_x, f_xz = type_lasso(lbda)
+ p_vec, _ = diff_and_opt(M_val, w_val, f_x, f_xz)
print(p_vec)
print(G.mat)
#Sparse recovery
- if 1:
+ if 0:
p_vec, theta = sparse_recovery(M_val, w_val, lbda)
print(p_vec)
print(G.mat)
diff --git a/src/make_plots.py b/src/make_plots.py
index 3ac8362..c1cafe7 100644
--- a/src/make_plots.py
+++ b/src/make_plots.py
@@ -23,7 +23,7 @@ def compare_greedy_and_lagrange_cs284r():
#Lagrange Objective
G_hat = algorithms.recovery_l1obj_l2constraint(G, A,
passed_function=convex_optimization.type_lasso,
- floor_cstt=.1, lbda=10)
+ floor_cstt=.05, lbda=10)
algorithms.correctness_measure(G, G_hat, print_values=True)
@@ -32,11 +32,11 @@ def test():
unit test
"""
G = cascade_creation.InfluenceGraph(max_proba=.8)
- G.erdos_init(n=20, p=.2)
+ G.erdos_init(n=30, p=.2)
A = cascade_creation.generate_cascades(G, p_init=.1, n_cascades=1000)
- G_hat = algorithms.recovery_l1obj_l2constraint(G, A,
- passed_function=convex_optimization.sparse_recovery,
- floor_cstt=.1, lbda=1)
+ G_hat = algorithms.recovery_passed_function(G, A,
+ passed_function=convex_optimization.type_lasso,
+ floor_cstt=.001, lbda=.0001)
algorithms.correctness_measure(G, G_hat, print_values=True)
if __name__=="__main__":