aboutsummaryrefslogtreecommitdiffstats
path: root/simulation/bayes.py
blob: bde9e94c6b45cc4c8326203b875693afa858cb6d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import pymc
import numpy as np

def glm_node_setup(cascade, y_obs, prior=None, *args, **kwargs):
    """
    Build an IC PyMC node-level model from:
        -observed cascades: cascade
        -outcome vector: y_obs
        -desired PyMC prior and parameters: prior, *args
    Note: we use the glm formulation: y = Bernoulli[f(x.dot(theta))]
    """
    n_nodes = len(cascade[0])

    # Container class for node's parents
    theta = np.empty(n_nodes, dtype=object)
    for j in xrange(n_nodes):
        if prior is None:
            theta[j] = pymc.Beta('theta_{}'.format(j), alpha=1, beta=1)
        else:
            theta[j] = prior('theta_{}'.format(j), *args, **kwargs)

    # Observed container class for cascade realization
    x = np.empty(n_nodes, dtype=object)
    for i, val in enumerate(cascade.T):
        x[i] = pymc.Normal('x_{}'.format(i), 0, 1, value=val, observed=True)

    @pymc.deterministic
    def glm_p(x=x, theta=theta):
        return 1. - np.exp(-x.dot(theta))

    @pymc.observed
    def y(glm_p=glm_p, value=y_obs):
        return pymc.bernoulli_like(value, glm_p)

    return pymc.Model([y, pymc.Container(theta), pymc.Container(x)])


def formatLabel(s, n):
    return '0'*(len(str(n)) - len(str(s))) + str(s)


def mc_graph_setup(infected, susceptible, prior=None, *args, **kwargs):
    """
    Build an IC PyMC graph-level model from:
        -infected nodes over time: list/tuple of list/tuple of np.array
        -susceptible nodes over time: same format as above
    Note: we use the Markov Chain formulation: X_{t+1}|X_t,theta = f(X_t.theta)
    """

    # Container class for graph parameters
    n_nodes = len(infected[0][0])
    theta = np.empty((n_nodes,n_nodes), dtype=object)
    if prior is None:
        for i in xrange(n_nodes):
            for j in xrange(n_nodes):
                theta[i, j] = pymc.Beta('theta_{}{}'.format(formatLabel(i,
                                        n_nodes-1), formatLabel(j, n_nodes-1)),
                                        alpha=1, beta=1)
    else:
        theta = prior(theta=theta, *args, **kwargs)

    # Container class for cascade realization
    x = {}
    for i, cascade in enumerate(infected):
        for j, step in enumerate(cascade):
            for k, node in enumerate(step):
                if j and susceptible[i][j][k]:
                    p = 1. - pymc.exp(-cascade[j-1].dot(theta[k]))
                else:
                    p = .5
                x[i, j, k] = pymc.Bernoulli('x_{}{}{}'.format(i, j, k), p=p,
                        value=node, observed=True)

    return pymc.Model([pymc.Container(theta), pymc.Container(x)])


if __name__=="__main__":
    import main
    import matplotlib.pyplot as plt
    import seaborn
    seaborn.set_style('whitegrid')
    g = np.array([[0, 1, 1, 0], [1, 0, 0, 1], [1, 0, 0, 1], [0, 1, 1, 0]])
    p = 0.5
    g = np.log(1. / (1 - p * g))

    print('running the graph-level MC set-up')
    n_nodes = len(g)
    cascades = main.simulate_cascades(1000, g)
    infected, susc = main.build_cascade_list(cascades)
    model = mc_graph_setup(infected, susc)
    mcmc = pymc.MCMC(model)
    mcmc.sample(10**4, 1000)
    fig, ax = plt.subplots(len(g), len(g))
    for i in xrange(n_nodes):
        for j in xrange(n_nodes):
            if n_nodes < 5:
                ax[i,j].locator_params(nbins=3, axis='x')
            else:
                ax[i, j].get_xaxis().set_ticks([])
                ax[i, j].get_yaxis().set_ticks([])
            it, jt = formatLabel(i, n_nodes-1), formatLabel(j, n_nodes-1)
            ax[i,j].hist(mcmc.trace('theta_{}{}'.format(it,jt))[:], normed=True)
            ax[i, j].set_xlim([0,1])
            ax[i, j].plot([g[i, j]]*2, [0, ax[i,j].get_ylim()[-1]], color='red')
    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=.1)
    plt.show()

    print('running the node level set-up')
    node = 0
    cascades = main.simulate_cascades(100, g)
    cascade, y_obs = main.build_matrix(cascades, node)
    model = glm_node_setup(cascade, y_obs)
    mcmc = pymc.MCMC(model)
    mcmc.sample(1e5, 1e4)
    plt.hist(mcmc.trace('theta_1')[:], bins=1e2)
    plt.show()