simulation/bayes.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52

import pymc
import numpy as np

def glm_node_setup(cascade, y_obs, g_node, prior=None, *args, **kwargs):
    """
    Build an IC PyMC model from:
        -observed cascades: cascade
        -outcome vector: y_obs
        -graph parameters of node's parents: g_node
        -desired PyMC prior and parameters: prior, *args
    Note: we use the glm modelisation : y = Bernoulli[f(x.dot(g_node))]
    """
    # Container class for node's parents
    theta = np.empty(len(g_node), dtype=object)
    for j, parent in enumerate(g_node):
        if prior is None:
            theta[j] = pymc.Beta('theta_{}'.format(j), alpha=1, beta=1)
        else:
            theta[j] = prior('theta_{}'.format(j), *args, **kwargs)

    # Observed container class for cascade realization
    x = np.empty(len(g_node), dtype=object)
    for i, val in enumerate(cascade.T):
        x[i] = pymc.Normal('x_{}'.format(i), 0, 1, value=val, observed=True)

    @pymc.deterministic
    def glm_p(x=x, theta=theta):
        return 1. - np.exp(-x.dot(theta))

    @pymc.observed
    def y(glm_p=glm_p, value=y_obs):
        return pymc.bernoulli_like(value, glm_p)

    return pymc.Model([y, pymc.Container(theta), pymc.Container(x)])


if __name__=="__main__":
    import main
    import matplotlib.pyplot as plt
    g = np.array([[0, 1, 1, 0], [1, 0, 0, 1], [1, 0, 0, 1], [0, 1, 1, 0]])
    p = 0.5
    g = np.log(1. / (1 - p * g))
    error = []
    node = 0
    cascades = main.simulate_cascades(5000, g)
    cascade, y_obs = main.build_matrix(cascades, node)
    model = glm_node_setup(cascade, y_obs, g[0])
    mcmc = pymc.MCMC(model)
    mcmc.sample(10**5, 10**4)
    plt.hist(mcmc.trace('theta_1')[:])
    plt.show()