diff options
| author | jeanpouget-abadie <jean.pougetabadie@gmail.com> | 2015-11-05 16:46:34 -0500 |
|---|---|---|
| committer | jeanpouget-abadie <jean.pougetabadie@gmail.com> | 2015-11-05 16:46:34 -0500 |
| commit | 5f1f60a6cbec1140a0fbd90d5fc56dabc2f9888b (patch) | |
| tree | 7b0c2f955a1b3e13dec6d6a50ff65bd1a2e6ec03 /simulation/bayes.py | |
| parent | 15e4871fc224e9e74c93b772b15aea7031f262ab (diff) | |
| download | cascades-5f1f60a6cbec1140a0fbd90d5fc56dabc2f9888b.tar.gz | |
adding graph level bayes stuff
Diffstat (limited to 'simulation/bayes.py')
| -rw-r--r-- | simulation/bayes.py | 76 |
1 files changed, 64 insertions, 12 deletions
diff --git a/simulation/bayes.py b/simulation/bayes.py index 8e50dfd..5dcd48c 100644 --- a/simulation/bayes.py +++ b/simulation/bayes.py @@ -1,25 +1,26 @@ import pymc import numpy as np -def glm_node_setup(cascade, y_obs, g_node, prior=None, *args, **kwargs): +def glm_node_setup(cascade, y_obs, prior=None, *args, **kwargs): """ - Build an IC PyMC model from: + Build an IC PyMC node-level model from: -observed cascades: cascade -outcome vector: y_obs - -graph parameters of node's parents: g_node -desired PyMC prior and parameters: prior, *args - Note: we use the glm modelisation : y = Bernoulli[f(x.dot(g_node))] + Note: we use the glm formulation: y = Bernoulli[f(x.dot(theta))] """ + n_nodes = len(cascade[0]) + # Container class for node's parents - theta = np.empty(len(g_node), dtype=object) - for j, parent in enumerate(g_node): + theta = np.empty(n_nodes, dtype=object) + for j in xrange(n_nodes): if prior is None: theta[j] = pymc.Beta('theta_{}'.format(j), alpha=1, beta=1) else: theta[j] = prior('theta_{}'.format(j), *args, **kwargs) # Observed container class for cascade realization - x = np.empty(len(g_node), dtype=object) + x = np.empty(n_nodes, dtype=object) for i, val in enumerate(cascade.T): x[i] = pymc.Normal('x_{}'.format(i), 0, 1, value=val, observed=True) @@ -34,19 +35,70 @@ def glm_node_setup(cascade, y_obs, g_node, prior=None, *args, **kwargs): return pymc.Model([y, pymc.Container(theta), pymc.Container(x)]) +def mc_graph_setup(infected, susceptible, prior=None, *args, **kwargs): + """ + Build an IC PyMC graph-level model from: + -infected nodes over time: list/tuple of list/tuple of np.array + -susceptible nodes over time: same format as above + Note: we use the Markov Chain formulation: X_{t+1}|X_t,theta = f(X_t.theta) + """ + # Container class for graph parameters + n_nodes = len(infected[0][0]) + theta = np.empty((n_nodes,n_nodes), dtype=object) + for i in xrange(n_nodes): + for j in xrange(n_nodes): + if prior is None: + theta[i, j] = pymc.Beta('theta_{}{}'.format(i, j), alpha=1, + beta=1) + else: + theta[i, j] = prior('theta_{}{}'.format(i, j), *args, **kwargs) + + # Container class for cascade realization + x = {} + for i, cascade in enumerate(infected): + for j, step in enumerate(cascade): + for k, node in enumerate(step): + if j and susceptible[i][j][k]: + p = 1. - pymc.exp(-cascade[j-1].dot(theta[k])) + else: + p = .5 + x[i, j, k] = pymc.Bernoulli('x_{}{}{}'.format(i, j, k), p=p, + value=node, observed=True) + + return pymc.Model([pymc.Container(theta), pymc.Container(x)]) + + if __name__=="__main__": import main import matplotlib.pyplot as plt + import seaborn + seaborn.set_style('whitegrid') g = np.array([[0, 1, 1, 0], [1, 0, 0, 1], [1, 0, 0, 1], [0, 1, 1, 0]]) p = 0.5 g = np.log(1. / (1 - p * g)) - error = [] + + print('running the graph-level MC set-up') + cascades = main.simulate_cascades(100, g) + infected, susc = main.build_cascade_list(cascades) + model = mc_graph_setup(infected, susc) + mcmc = pymc.MCMC(model) + mcmc.sample(10**3, 100) + fig, ax = plt.subplots(len(g), len(g)) + for i in xrange(len(g)): + for j in xrange(len(g)): + ax[i,j].locator_params(nbins=3, axis='x') + ax[i,j].hist(mcmc.trace('theta_{}{}'.format(i,j))[:]) + plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=.1) + plt.show() + + + print('running the node level set-up') node = 0 - cascades = main.simulate_cascades(5000, g) + cascades = main.simulate_cascades(100, g) cascade, y_obs = main.build_matrix(cascades, node) - model = glm_node_setup(cascade, y_obs, g[0]) + model = glm_node_setup(cascade, y_obs) mcmc = pymc.MCMC(model) - mcmc.sample(10**5, 10**4) - plt.hist(mcmc.trace('theta_1')[:]) + mcmc.sample(1e5, 1e4) + plt.hist(mcmc.trace('theta_1')[:], bins=1e2) plt.show() |
