import main as mn import theano from theano import tensor as tsr import theano.tensor.shared_randomstreams import numpy as np n_cascades = 1000 n_nodes = 4 n_samples = 100 srng = tsr.shared_randomstreams.RandomStreams(seed=123) lr = 5*1e-3 n_epochs = 20 ###############Variational Inference#################### # Declare Theano variables mu = theano.shared(.5 + .2 * np.random.normal(size=(1, n_nodes, n_nodes)), name="mu", broadcastable=(True, False, False)) sig = theano.shared(.1 + .04 * np.random.normal(size=(1, n_nodes, n_nodes)), name="sig", broadcastable=(True, False, False)) mu0 = theano.shared(.5 + .2 * np.random.normal(size=(1, n_nodes, n_nodes)), name="mu", broadcastable=(True, False, False)) sig0 = theano.shared(.1 + .04 * np.random.normal(size=(1, n_nodes, n_nodes)), name="sig", broadcastable=(True, False, False)) x = tsr.matrix(name='x', dtype='int8') s = tsr.matrix(name='s', dtype='int8') # Construct Theano graph theta = srng.normal((n_samples, n_nodes, n_nodes)) * sig + mu y = tsr.maximum(tsr.dot(x, theta), 1e-3) infect = tsr.log(1. - tsr.exp(-y[0:-1])).dimshuffle(1, 0, 2) lkl_pos = tsr.sum(infect * (x[1:] & s[1:])) / n_samples lkl_neg = tsr.sum(-y[0:-1].dimshuffle(1, 0, 2) * (~x[1:] & s[1:])) / n_samples lkl = lkl_pos + lkl_neg kl = tsr.sum(tsr.log(sig / sig0) + (sig0**2 + (mu0 - mu)**2)/(2*sig)**2) res = lkl + kl gmu, gsig = theano.gradient.grad(lkl, [mu, sig]) gmukl, gsigkl = theano.grad(kl, [mu, sig]) # Compile into functions loglkl_full = theano.function([x, s], lkl) train = theano.function(inputs=[x, s], outputs=res, updates=((mu, tsr.clip(mu + lr * gmu, 0, 1)), (sig, tsr.clip(sig + lr * gsig, 1e-3, 1)))) train_kl = theano.function(inputs=[], outputs=[], updates=((mu, tsr.clip(mu + lr * gmukl, 0, 1)), (sig, tsr.clip(sig + lr * gsigkl, 1e-3, 1)))) ###############Maximum Likelihood##################### x = tsr.matrix(name='x', dtype='int8') s = tsr.matrix(name='s', dtype='int8') params = theano.shared(.5 + .01*np.random.normal(size=(n_nodes, n_nodes)), name='params') y = tsr.maximum(tsr.dot(x, params), 1e-5) infect = tsr.log(1. - tsr.exp(-y[0:-1])) lkl_pos = tsr.sum(infect * (x[1:] & s[1:])) lkl_neg = tsr.sum(-y[0:-1] * (~x[1:] & s[1:])) lkl_mle = lkl_pos + lkl_neg gparams = theano.gradient.grad(lkl_mle, params) train_mle = theano.function(inputs=[x, s], outputs=lkl_mle, updates=[(params, tsr.clip(params + lr * gparams, 0, 1))]) if __name__ == "__main__": graph = .5 * np.random.binomial(2, p=.5, size=(n_nodes, n_nodes)) for k in range(len(graph)): graph[k, k] = 0 p = 0.5 graph = np.log(1. / (1 - p * graph)) cascades = mn.build_cascade_list(mn.simulate_cascades(n_cascades, graph), collapse=True) x_obs, s_obs = cascades[0], cascades[1] #mle lkl_plot = [] if 0: for i in range(n_epochs): for xt, st in zip(x_obs, s_obs): lkl = train_mle(xt, st) lkl_plot.append(lkl) print(graph) w = params.get_value() for k in range(len(w)): w[k, k] = 0 print(w) import matplotlib.pyplot as plt plt.plot(lkl_plot) plt.show() #variational inference if 1: for i in range(n_epochs): train_kl() for k in xrange(len(x_obs)/100): cost = train(x_obs[k*100:(k+1)*100], s_obs[k*100:(k+1)*100]) print(cost) print(graph) print(mu.get_value()) print(sig.get_value())