import pymc import numpy as np def glm_node_setup(cascade, y_obs, prior=None, *args, **kwargs): """ Build an IC PyMC node-level model from: -observed cascades: cascade -outcome vector: y_obs -desired PyMC prior and parameters: prior, *args Note: we use the glm formulation: y = Bernoulli[f(x.dot(theta))] """ n_nodes = len(cascade[0]) # Container class for node's parents theta = np.empty(n_nodes, dtype=object) for j in xrange(n_nodes): if prior is None: theta[j] = pymc.Beta('theta_{}'.format(j), alpha=1, beta=1) else: theta[j] = prior('theta_{}'.format(j), *args, **kwargs) # Observed container class for cascade realization x = np.empty(n_nodes, dtype=object) for i, val in enumerate(cascade.T): x[i] = pymc.Normal('x_{}'.format(i), 0, 1, value=val, observed=True) @pymc.deterministic def glm_p(x=x, theta=theta): return 1. - np.exp(-x.dot(theta)) @pymc.observed def y(glm_p=glm_p, value=y_obs): return pymc.bernoulli_like(value, glm_p) return pymc.Model([y, pymc.Container(theta), pymc.Container(x)]) def mc_graph_setup(infected, susceptible, prior=None, *args, **kwargs): """ Build an IC PyMC graph-level model from: -infected nodes over time: list/tuple of list/tuple of np.array -susceptible nodes over time: same format as above Note: we use the Markov Chain formulation: X_{t+1}|X_t,theta = f(X_t.theta) """ # Container class for graph parameters n_nodes = len(infected[0][0]) theta = np.empty((n_nodes,n_nodes), dtype=object) for i in xrange(n_nodes): for j in xrange(n_nodes): if prior is None: theta[i, j] = pymc.Beta('theta_{}{}'.format(i, j), alpha=1, beta=1) else: theta[i, j] = prior('theta_{}{}'.format(i, j), *args, **kwargs) # Container class for cascade realization x = {} for i, cascade in enumerate(infected): for j, step in enumerate(cascade): for k, node in enumerate(step): if j and susceptible[i][j][k]: p = 1. - pymc.exp(-cascade[j-1].dot(theta[k])) else: p = .5 x[i, j, k] = pymc.Bernoulli('x_{}{}{}'.format(i, j, k), p=p, value=node, observed=True) return pymc.Model([pymc.Container(theta), pymc.Container(x)]) if __name__=="__main__": import main import matplotlib.pyplot as plt import seaborn seaborn.set_style('whitegrid') g = np.array([[0, 1, 1, 0], [1, 0, 0, 1], [1, 0, 0, 1], [0, 1, 1, 0]]) p = 0.5 g = np.log(1. / (1 - p * g)) print('running the graph-level MC set-up') cascades = main.simulate_cascades(1000, g) infected, susc = main.build_cascade_list(cascades) model = mc_graph_setup(infected, susc) mcmc = pymc.MCMC(model) mcmc.sample(10**4, 1000) fig, ax = plt.subplots(len(g), len(g)) for i in xrange(len(g)): for j in xrange(len(g)): ax[i,j].locator_params(nbins=3, axis='x') ax[i,j].hist(mcmc.trace('theta_{}{}'.format(i,j))[:], normed=True) ax[i, j].set_xlim([0,1]) ax[i, j].plot([g[i, j]]*2, [0, ax[i,j].get_ylim()[-1]], color='red') plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=.1) plt.show() print('running the node level set-up') node = 0 cascades = main.simulate_cascades(100, g) cascade, y_obs = main.build_matrix(cascades, node) model = glm_node_setup(cascade, y_obs) mcmc = pymc.MCMC(model) mcmc.sample(1e5, 1e4) plt.hist(mcmc.trace('theta_1')[:], bins=1e2) plt.show()