diff options
| author | jeanpouget-abadie <jean.pougetabadie@gmail.com> | 2015-11-23 13:50:01 -0500 |
|---|---|---|
| committer | jeanpouget-abadie <jean.pougetabadie@gmail.com> | 2015-11-23 13:50:01 -0500 |
| commit | 4383fbb4179e73e8b4c28eabd64199b3f7a16eee (patch) | |
| tree | 41ac39c33b33ff9b6f89b4ab800b7c50dab818a7 /simulation/vi_theano.py | |
| parent | 65638ea7d886c25b8bf75e43a5ce46db2ebbaf53 (diff) | |
| download | cascades-4383fbb4179e73e8b4c28eabd64199b3f7a16eee.tar.gz | |
adding likelihood to theano implementation
Diffstat (limited to 'simulation/vi_theano.py')
| -rw-r--r-- | simulation/vi_theano.py | 76 |
1 files changed, 56 insertions, 20 deletions
diff --git a/simulation/vi_theano.py b/simulation/vi_theano.py index 562fa67..9e8fdb0 100644 --- a/simulation/vi_theano.py +++ b/simulation/vi_theano.py @@ -8,29 +8,29 @@ n_cascades = 1000 n_nodes = 4 n_samples = 100 srng = tsr.shared_randomstreams.RandomStreams(seed=123) -lr = 1e-2 -n_epochs = 10 +lr = 5*1e-3 +n_epochs = 20 +###############Variational Inference#################### # Declare Theano variables -mu = theano.shared(.5 * np.random.rand(1, n_nodes, n_nodes), name="mu", - broadcastable=(True, False, False)) -sig = theano.shared(.3 * np.random.rand(1, n_nodes, n_nodes), name="sig", - broadcastable=(True, False, False)) -mu0 = theano.shared(.5 * np.random.rand(1, n_nodes, n_nodes), name="mu", - broadcastable=(True, False, False)) -sig0 = theano.shared(.3 * np.random.rand(1, n_nodes, n_nodes), name="sig", - broadcastable=(True, False, False)) +mu = theano.shared(.5 + .2 * np.random.normal(size=(1, n_nodes, n_nodes)), + name="mu", broadcastable=(True, False, False)) +sig = theano.shared(.1 + .04 * np.random.normal(size=(1, n_nodes, n_nodes)), + name="sig", broadcastable=(True, False, False)) +mu0 = theano.shared(.5 + .2 * np.random.normal(size=(1, n_nodes, n_nodes)), + name="mu", broadcastable=(True, False, False)) +sig0 = theano.shared(.1 + .04 * np.random.normal(size=(1, n_nodes, n_nodes)), + name="sig", broadcastable=(True, False, False)) x = tsr.matrix(name='x', dtype='int8') s = tsr.matrix(name='s', dtype='int8') # Construct Theano graph theta = srng.normal((n_samples, n_nodes, n_nodes)) * sig + mu -y = tsr.clip(tsr.dot(x, theta), 1e-3, 1) +y = tsr.maximum(tsr.dot(x, theta), 1e-3) infect = tsr.log(1. - tsr.exp(-y[0:-1])).dimshuffle(1, 0, 2) lkl_pos = tsr.sum(infect * (x[1:] & s[1:])) / n_samples lkl_neg = tsr.sum(-y[0:-1].dimshuffle(1, 0, 2) * (~x[1:] & s[1:])) / n_samples - lkl = lkl_pos + lkl_neg kl = tsr.sum(tsr.log(sig / sig0) + (sig0**2 + (mu0 - mu)**2)/(2*sig)**2) res = lkl + kl @@ -48,8 +48,24 @@ train_kl = theano.function(inputs=[], outputs=[], (sig, tsr.clip(sig + lr * gsigkl, 1e-3, 1)))) +###############Maximum Likelihood##################### + +x = tsr.matrix(name='x', dtype='int8') +s = tsr.matrix(name='s', dtype='int8') +params = theano.shared(.5 + .01*np.random.normal(size=(n_nodes, n_nodes)), + name='params') +y = tsr.maximum(tsr.dot(x, params), 1e-5) +infect = tsr.log(1. - tsr.exp(-y[0:-1])) +lkl_pos = tsr.sum(infect * (x[1:] & s[1:])) +lkl_neg = tsr.sum(-y[0:-1] * (~x[1:] & s[1:])) +lkl_mle = lkl_pos + lkl_neg +gparams = theano.gradient.grad(lkl_mle, params) +train_mle = theano.function(inputs=[x, s], outputs=lkl_mle, updates=[(params, + tsr.clip(params + lr * gparams, 0, 1))]) + + if __name__ == "__main__": - graph = np.random.binomial(2, p=.2, size=(n_nodes, n_nodes)) + graph = .5 * np.random.binomial(2, p=.5, size=(n_nodes, n_nodes)) for k in range(len(graph)): graph[k, k] = 0 p = 0.5 @@ -57,10 +73,30 @@ if __name__ == "__main__": cascades = mn.build_cascade_list(mn.simulate_cascades(n_cascades, graph), collapse=True) x_obs, s_obs = cascades[0], cascades[1] - for i in range(n_epochs): - train_kl() - for k in xrange(len(x_obs)/100): - cost = train(x_obs[k*100:(k+1)*100], s_obs[k*100:(k+1)*100]) - print(cost) - print(mu.get_value()) - print(graph) + + #mle + lkl_plot = [] + if 0: + for i in range(n_epochs): + for xt, st in zip(x_obs, s_obs): + lkl = train_mle(xt, st) + lkl_plot.append(lkl) + print(graph) + w = params.get_value() + for k in range(len(w)): + w[k, k] = 0 + print(w) + import matplotlib.pyplot as plt + plt.plot(lkl_plot) + plt.show() + + #variational inference + if 1: + for i in range(n_epochs): + train_kl() + for k in xrange(len(x_obs)/100): + cost = train(x_obs[k*100:(k+1)*100], s_obs[k*100:(k+1)*100]) + print(cost) + print(graph) + print(mu.get_value()) + print(sig.get_value()) |
