diff options
| author | jeanpouget-abadie <jean.pougetabadie@gmail.com> | 2015-02-02 22:45:07 -0500 |
|---|---|---|
| committer | jeanpouget-abadie <jean.pougetabadie@gmail.com> | 2015-02-02 22:45:07 -0500 |
| commit | da8c267a9d10deb593e98d06c26ad09afd2af3b1 (patch) | |
| tree | fe09aa8099841c209597c913b20a97eed136e3c0 | |
| parent | 683dbbdff11b7aa3b99641c82c3413c9c1bd1d9b (diff) | |
| download | cascades-da8c267a9d10deb593e98d06c26ad09afd2af3b1.tar.gz | |
model section
| -rw-r--r-- | paper/sections/model.tex | 145 | ||||
| -rw-r--r-- | src/algorithms.py | 16 | ||||
| -rw-r--r-- | src/convex_optimization.py | 48 | ||||
| -rw-r--r-- | src/make_plots.py | 10 |
4 files changed, 121 insertions, 98 deletions
diff --git a/paper/sections/model.tex b/paper/sections/model.tex index e35aa4a..44236da 100644 --- a/paper/sections/model.tex +++ b/paper/sections/model.tex @@ -1,65 +1,55 @@ -We consider a graph ${\cal G}= (V, E, \Theta)$, where $\Theta$ is a $|V|\times -|V|$ matrix of paremeters describing the edge weights of $\mathcal{G}$. We -define $m\defeq |V|$. A \emph{cascade model} is a Markov process over the -finite state space $\{0, 1, \dots, K\}^V$. An \emph{influence cascade} is -a realisation of the random process described by a cascade model. +We consider a graph ${\cal G}= (V, E, \Theta)$, where $\Theta$ is a $|V|\times |V|$ matrix of parameters describing the edge weights of $\mathcal{G}$. Let $m\defeq |V|$. For each node $j$, let $\Theta_{j}$ be the $j^{th}$ column vector of $\Theta$. Intuitively, $\Theta_{i,j}$ captures the ``influence'' of node $i$ on node $j$. A \emph{Cascade model} is a Markov process over a finite state space $\{0, 1, \dots, K-1\}^V$ with the following properties: +\begin{enumerate} +\item Conditioned on the previous time step, the transition probabilities for each node $i \in V$ are mutually independent. +\item Of the $K$ possible states, there exists a \emph{contagious state} such that all transition probabilities of the Markov process can be expressed as a function of the graph parameters $\Theta$ and the set of ``contagious nodes'' at the previous time step. +\item The initial probability over $\{0, 1, \dots, K-1\}^V$ of the Cascade Model is such that all nodes can eventually reach a \emph{contagious state} with non-zero probability. The ``contagious'' nodes at $t=0$ are called \emph{source nodes}. +\end{enumerate} -In practice, we restrict ourselves to discrete-time homogeneous cascade models. -At $t=0$, each node in the graph has a constant probability $p_{init}$ of being -in the ``active state''. The active nodes at $t=0$ are called the source nodes. -Our probabilistic model for the source nodes is more realistic and less -restrictive than the ``single source'' assumption made in \cite{Daneshmand:2014} -and \cite{Abrahao:13}. +In other words, a cascade model describes a diffusion process, whereby a set of contagious nodes ``influence'' other nodes in the graph to adopt or not their behavior. An \emph{influence cascade} is a realisation of this random process: the successive states of the nodes in graph ${\cal G}$. Note that both the ``single source'' assumption made in \cite{Daneshmand:2014} and \cite{Abrahao:13} as well as the ``uniformly chosen source set'' assumption made in \cite{Netrapalli:2012} verify condition 3. +In the context of Graph Inference, \cite{Netrapalli:2012} focuses specifically on the well-known discrete-time independent cascade model recalled below, which \cite{Abrahao:13} and \cite{Daneshmand:2014} generalize to continuous time. Whilst we restrict ourselves to discrete-time processes, we consider both the independent cascade model and the linear voter model, and show they can be solved by a very similar maximum-likelihood program. The linear threshold model is a special case and is discussed in Section~\ref{sec:linear_threshold}. -\subsection{Generalized Linear Cascade Models} - -Denoting by $X^t$ the state of the cascade at time step $t$, we interpret -$X^t_j = 1$ for node $j\in V$ as ``node $j$ is active'', \emph{i.e}, ``$j$ -exhibits the source nodes' behavior at time step $t$''. We draw inspiration -from \emph{generalized linear models} (GLM) to define a generalized linear -cascade. +% Denoting by $X^t$ the state of the cascade at time step $t$, we interpret +% $X^t_j = 1$ for node $j\in V$ as ``node $j$ is contagious'', \emph{i.e}, ``$j$ +% exhibits the source nodes' behavior at time step $t$''. We draw inspiration +% from \emph{generalized linear models} (GLM) to define a generalized linear +% cascade. -\begin{definition} - \label{def:glcm} - Let us denote by $\{\mathcal{F}_t, t\in\ints\}$ the natural - filtration induced by $\{X_t, t\in\ints\}$. A \emph{generalized linear - cascade} is characterized by the following equation: - \begin{displaymath} - \P[X^{t+1}=x\,|\, \mathcal{F}_t] = - \prod_{i=1}^m f(\inprod{\theta_i}{X^{t}})^{x_i} - \big(1-f(\inprod{\theta_i}{X^{t}}\big)^{1-x_i} - \end{displaymath} -where $f:\mathbb{R}\to[0,1]$ and $\theta_i$ is the $i$-th column of $\Theta$. -\end{definition} +% \begin{definition} +% \label{def:glcm} +% Let us denote by $\{\mathcal{F}_t, t\in\ints\}$ the natural +% filtration induced by $\{X_t, t\in\ints\}$. A \emph{generalized linear +% cascade} is characterized by the following equation: +% \begin{displaymath} +% \P[X^{t+1}=x\,|\, \mathcal{F}_t] = +% \prod_{i=1}^m f(\inprod{\theta_i}{X^{t}})^{x_i} +% \big(1-f(\inprod{\theta_i}{X^{t}}\big)^{1-x_i} +% \end{displaymath} +% where $f:\mathbb{R}\to[0,1]$ and $\theta_i$ is the $i$-th column of $\Theta$. +% \end{definition} -It follows immediately from this definition that a generalized linear cascade -satisfies the Markov property: -\begin{displaymath} - \P[X^{t+1}=x\,|\mathcal{F}_t] = \P[X^{t+1}=x\,|\, X^t] -\end{displaymath} -Note also that $\E[X^{t+1}_i\,|\,X^t] = f(\inprod{\theta_i}{X^t})$. As such, -$f$ can be interpreted as the inverse link function of our generalized linear -cascade model. +% It follows immediately from this definition that a generalized linear cascade +% satisfies the Markov property: +% \begin{displaymath} +% \P[X^{t+1}=x\,|\mathcal{F}_t] = \P[X^{t+1}=x\,|\, X^t] +% \end{displaymath} +% Note also that $\E[X^{t+1}_i\,|\,X^t] = f(\inprod{\theta_i}{X^t})$. As such, +% $f$ can be interpreted as the inverse link function of our generalized linear +% cascade model. -\subsection{Examples} -\label{subsec:examples} +\subsection{Independent Cascade Model} -In this section, we show that the well-known Independent Cascade Model and the Voter model are Generalized Linear Cascades. The Linear Threshold model will be discussed in Section~\ref{sec:linear_threshold}. - -\subsubsection{Independent Cascade Model} - -In the independent cascade model, nodes can be either susceptible, active or -inactive. At $t=0$, all source nodes are ``active'' and all remaining nodes are +In the independent cascade model, nodes can be either susceptible, contagious or +immune. At $t=0$, all source nodes are ``contagious'' and all remaining nodes are ``susceptible''. At each time step $t$, for each edge $(i,j)$ where $j$ is -susceptible and $i$ is active, $i$ attempts to infect $j$ with probability +susceptible and $i$ is contagious, $i$ attempts to infect $j$ with probability $p_{i,j}\in[0,1]$, the infection attempts are independent of each other. If $i$ -succeeds, $j$ will become active at time step $t+1$. Regardless of $i$'s -success, node $i$ will be inactive at time $t+1$. In other words, nodes stay -active for only one time step. The cascade process terminates when no active +succeeds, $j$ will become contagious at time step $t+1$. Regardless of $i$'s +success, node $i$ will be immune at time $t+1$. In other words, nodes stay +contagious for only one time step. The cascade process terminates when no contagious nodes remain. -If we denote by $X^t$ the indicator variable of the set of active nodes at time +If we denote by $X^t$ the indicator variable of the set of contagious nodes at time step $t$, then if $j$ is susceptible at time step $t+1$, we have: \begin{displaymath} \P\big[X^{t+1}_j = 1\,|\, X^{t}\big] @@ -72,24 +62,57 @@ Defining $\Theta_{i,j} \defeq \log(1-p_{i,j})$, this can be rewritten as: = 1 - \prod_{i = 1}^m e^{\Theta_{i,j}X^t_i} = 1 - e^{\inprod{\theta_j}{X^t}} \end{equation} -which is a Generalized Linear Cascade model with inverse link function $f(z) -= 1 - e^z$. -\subsubsection{The Voter Model} +\subsection{The Linear Voter Model} -In the Voter Model, nodes can be either red or blue, where ``blue'' is the -source state. The parameters of the graph are normalized such that $\forall i, +In the Linear Voter Model, nodes can be either \emph{red} or \emph{blue}. Both states verify the properties of a contagious state, but for ease of presentation, we will suppose that the contagious nodes are the \emph{blue}. The parameters of the graph are normalized such that $\forall i, \ \sum_j \Theta_{i,j} = 1$. Each round, every node $j$ independently chooses -one of its neighbors with probability $\Theta_ij$ and adopts their color. The +one of its neighbors with probability $\Theta_{ij}$ and adopts their color. The cascades stops at a fixed horizon time T or if all nodes are of the same color. If we denote by $X^t$ the indicator variable of the set of blue nodes at time step $t$, then we have: \begin{equation} -\mathbb{P}\left[X^{t+1}_j = 1 | X^t \right] = \sum_{i=1}^m \Theta_{i,j} X_i^t = \inprod{\theta_j}{X^t} +\mathbb{P}\left[X^{t+1}_j = 1 | X^t \right] = \sum_{i=1}^m \Theta_{i,j} X_i^t = \inprod{\Theta_j}{X^t} \tag{V} \end{equation} -which is a Generalized Linear Cascade model with inverse link function -$f(z) = z$. + +% \subsection{The Linear Threshold Model} + +% In the Linear Threshold Model, each node $j\in V$ has a threshold $t_j$ from the interval $[0,1]$ and for each node, the sum of incoming weights is less than $1$: $\forall j\in V$, $\sum_{i=1}^m \Theta_{i,j} \leq 1$. + +% Nodes have two states: susceptible or contagious. At each time step, each susceptible +% node $j$ becomes contagious if the sum of the incoming weights from contagious parents is greater than $j$'s threshold $t_j$. Nodes remain contagious until the end of the cascade, which is reached when no new susceptible nodes become contagious. + +% As such, the source nodes are chosen, the process is entirely deterministic. Let $X^t$ be the indicator variable of the set of contagious nodes at time step $t-1$, then: +% \begin{equation} +% \nonumber +% X^{t+1}_j = \mathbbm{1}_{\sum_{i=1}^m \Theta_{i,j}X^t_i > t_j} +% = \mathbbm{1}_{\inprod{\theta_j}{X^t} > t_j} +% \end{equation} +% where we defined again $\theta_j\defeq (\Theta_{1,j},\ldots,\Theta_{m,j})$. In other words, for every step in the linear threshold model, we observe the following signal: + +% \begin{equation} +% \label{eq:lt} +% \tag{LT} +% \mathbb{P} \left[X^{t+1}_j = 1 | X^t\right] = \text{sign} \left(\inprod{\theta_j}{X^t} - t_j \right) +% \end{equation} +% where ``sign'' is the function $\mathbbm{1}_{\cdot > 0}$. + +\subsection{Generalized Linear Cascade Models} +\label{sec:GLC} + +Despite their obvious differences, both models share a common similarity: conditioned on node $j$ being susceptible at time step $t$, the probability that node $j$ becomes infected at time step $t+1$ is a bernoulli of parameter $f(\Theta_j \cdot X^t)$, where $f:\mathbb{R} \rightarrow [0,1]$. In the case of the voter model, $f_{\text{V}}$ is the identify function, and in the case of the independent cascade model $f_{\text{ICC}} : z \rightarrow 1 - e^z$. We draw inspiration from generalized linear models to introduce \emph{Generalized Linear Cascades}: + +\begin{definition} +\label{def:glcm} +Let us denote by $\{\mathcal{F}_t, t\in\ints\}$ the natural filtration induced by the state of ${\cal G}$ up to time step t. A \emph{generalized linear cascade} is a cascade model characterized by the following equation: +\begin{displaymath} + \P[X^{t+1}=x\,|\, \mathcal{F}_t] = + \prod_{i=1}^m f_{X^t_i,x_i}(\Theta_i \cdot X^t) +\end{displaymath} +where $f_{s,p}:\mathbb{R}\to[0,1]$ for each state pair $(s,p) \in \{0, \dots, K-1 \}$ +\end{definition} + \subsection{Maximum Likelihood Estimation} @@ -124,7 +147,7 @@ $\Theta$ can be estimated by a separate optimization program: \hat{\theta}_i \in \argmax_{\theta}\frac{1}{n_i}\mathcal{L}_i(\theta_i\,|\,x^1,\ldots,x^n) + \lambda\|\theta_i\|_1 \end{equation} -where we denote by $n_i$ the first step at which node $i$ becomes active and +where we denote by $n_i$ the first step at which node $i$ becomes contagious and where: \begin{multline} \mathcal{L}_i(\theta_i\,|\,x^1,\ldots,x^n) = \frac{1}{n_i} diff --git a/src/algorithms.py b/src/algorithms.py index 926d9bc..9ebccf1 100644 --- a/src/algorithms.py +++ b/src/algorithms.py @@ -31,7 +31,7 @@ def greedy_prediction(G, cascades): return G_hat -def recovery_l1obj_l2constraint(G, cascades, floor_cstt, passed_function, +def recovery_passed_function(G, cascades, floor_cstt, passed_function, *args, **kwargs): """ Returns estimated graph from convex program specified by passed_function @@ -40,11 +40,13 @@ def recovery_l1obj_l2constraint(G, cascades, floor_cstt, passed_function, G_hat = cascade_creation.InfluenceGraph(max_proba=None) G_hat.add_nodes_from(G.nodes()) + f_x, f_xz = passed_function(*args, **kwargs) + for node in G_hat.nodes(): print(node) try: M, w = cascade_creation.icc_matrixvector_for_node(cascades, node) - p_node, __ = passed_function(M,w, *args, **kwargs) + p_node, _ = convex_optimization.diff_and_opt(M, w, f_x, f_xz) G_hat = cascade_creation.add_edges_from_proba_vector(G=G_hat, p_node=p_node, node=node, floor_cstt=floor_cstt) except timeout.TimeoutError: @@ -86,15 +88,15 @@ def test(): unit test """ G = cascade_creation.InfluenceGraph(max_proba = .8) - G.erdos_init(n = 10, p = .2) + G.erdos_init(n = 100, p = .2) import time t0 = time.time() A = cascade_creation.generate_cascades(G, .2, 1000) - if 1: - G_hat = greedy_prediction(G, A) if 0: - G_hat = recovery_l1obj_l2constraint(G, A, - passed_function=convex_optimization.l1obj_l2penalization, + G_hat = greedy_prediction(G, A) + if 1: + G_hat = recovery_passed_function(G, A, + passed_function=convex_optimization.type_lasso, floor_cstt=.1, lbda=10) correctness_measure(G, G_hat, print_values=True) diff --git a/src/convex_optimization.py b/src/convex_optimization.py index 1f34508..163b6d5 100644 --- a/src/convex_optimization.py +++ b/src/convex_optimization.py @@ -44,55 +44,52 @@ def sparse_recovery(M_val, w_val, lbda): return diff_and_opt(theta, theta_, M, M_val, w, lbda, y) -@timeout.timeout(20) -def type_lasso(M_val, w_val, lbda): +def type_lasso(lbda): """ Solves: min - sum_j theta_j + lbda*|e^{M*theta} - (1 - w)|_2 s.t theta_j <= 0 """ - assert len(M_val) == len(w_val) - - if M_val.dtype == bool: - M_val = M_val.astype('float32') - if type(lbda) == int or type(lbda) == float: lbda = np.array(lbda) + lbda = theano.shared(lbda.astype(theano.config.floatX)) + theta = tensor.row().T theta_ = theta.flatten() - M = theano.shared(M_val.astype(theano.config.floatX)) - w = theano.shared(w_val.astype(theano.config.floatX)) - lbda = theano.shared(lbda.astype(theano.config.floatX)) + M = tensor.matrix(name='M', dtype=theano.config.floatX) + w = tensor.vector(name='w', dtype=theano.config.floatX) y = lbda * (theta_).norm(1) + ( tensor.exp(M.dot(theta_)) - (1 - w)).norm(2) - return diff_and_opt(theta, theta_, M, M_val, w, lbda, y) - - -def diff_and_opt(theta, theta_, M, M_val, w, lbda, y): z = tensor.row().T z_ = z.flatten() - m, n = M_val.shape - y_diff = tensor.grad(y, theta_) y_hess = z[0] * theano.gradient.hessian(y, theta_) - f_x = theano.function([theta], [y, y_diff], allow_input_downcast=True) - f_xz = theano.function([theta, z], [y, y_diff, y_hess], - allow_input_downcast=True) + f_x = theano.function([theta, M, w], [y, y_diff], + allow_input_downcast=True) + f_xz = theano.function([theta, z, M, w], [y, y_diff, y_hess], + allow_input_downcast=True) + + return f_x, f_xz + +@timeout.timeout(8) +def diff_and_opt(M_val, w_val, f_x, f_xz): + + m, n = M_val.shape def F(x=None, z=None): if x is None: return 0, cvxopt.matrix(-.001, (n,1)) elif z is None: - y, y_diff = f_x(x) + y, y_diff = f_x(x, M_val, w_val) return cvxopt.matrix(float(y), (1, 1)),\ cvxopt.matrix(y_diff.astype("float64")).T else: - y, y_diff, y_hess = f_xz(x, z) + y, y_diff, y_hess = f_xz(x, z, M_val, w_val) return cvxopt.matrix(float(y), (1, 1)), \ cvxopt.matrix(y_diff.astype("float64")).T, \ cvxopt.matrix(y_hess.astype("float64")) @@ -104,7 +101,7 @@ def diff_and_opt(theta, theta_, M, M_val, w, lbda, y): #cvxopt.solvers.options['feastol'] = 2e-5 #cvxopt.solvers.options['abstol'] = 2e-5 #cvxopt.solvers.options['maxiters'] = 100 - cvxopt.solvers.options['show_progress'] = False + cvxopt.solvers.options['show_progress'] = True try: theta = cvxopt.solvers.cp(F, G, h)['x'] except ArithmeticError: @@ -128,13 +125,14 @@ def test(): M_val, w_val = cascade_creation.icc_matrixvector_for_node(A, 0) #Type lasso - if 0: - p_vec, theta = type_lasso(M_val, w_val, lbda) + if 1: + f_x, f_xz = type_lasso(lbda) + p_vec, _ = diff_and_opt(M_val, w_val, f_x, f_xz) print(p_vec) print(G.mat) #Sparse recovery - if 1: + if 0: p_vec, theta = sparse_recovery(M_val, w_val, lbda) print(p_vec) print(G.mat) diff --git a/src/make_plots.py b/src/make_plots.py index 3ac8362..c1cafe7 100644 --- a/src/make_plots.py +++ b/src/make_plots.py @@ -23,7 +23,7 @@ def compare_greedy_and_lagrange_cs284r(): #Lagrange Objective G_hat = algorithms.recovery_l1obj_l2constraint(G, A, passed_function=convex_optimization.type_lasso, - floor_cstt=.1, lbda=10) + floor_cstt=.05, lbda=10) algorithms.correctness_measure(G, G_hat, print_values=True) @@ -32,11 +32,11 @@ def test(): unit test """ G = cascade_creation.InfluenceGraph(max_proba=.8) - G.erdos_init(n=20, p=.2) + G.erdos_init(n=30, p=.2) A = cascade_creation.generate_cascades(G, p_init=.1, n_cascades=1000) - G_hat = algorithms.recovery_l1obj_l2constraint(G, A, - passed_function=convex_optimization.sparse_recovery, - floor_cstt=.1, lbda=1) + G_hat = algorithms.recovery_passed_function(G, A, + passed_function=convex_optimization.type_lasso, + floor_cstt=.001, lbda=.0001) algorithms.correctness_measure(G, G_hat, print_values=True) if __name__=="__main__": |
