From 4383fbb4179e73e8b4c28eabd64199b3f7a16eee Mon Sep 17 00:00:00 2001 From: jeanpouget-abadie Date: Mon, 23 Nov 2015 13:50:01 -0500 Subject: adding likelihood to theano implementation --- simulation/main.py | 27 ++++++++++-------- simulation/vi.py | 18 ++++++------ simulation/vi_theano.py | 76 ++++++++++++++++++++++++++++++++++++------------- 3 files changed, 80 insertions(+), 41 deletions(-) (limited to 'simulation') diff --git a/simulation/main.py b/simulation/main.py index c2446d7..decdf8f 100644 --- a/simulation/main.py +++ b/simulation/main.py @@ -19,9 +19,9 @@ def simulate_cascade(x, graph): - susc: the nodes susceptible at the beginning of this time step - x: the subset of susc who became infected """ + yield x, np.zeros(graph.shape[0], dtype=bool) susc = np.ones(graph.shape[0], dtype=bool) - susc = susc ^ x # t=0, the source is not susceptible - yield x, susc + #yield x, susc while np.any(x): susc = susc ^ x # nodes infected at previous step are now inactive if not np.any(susc): @@ -60,13 +60,16 @@ if __name__ == "__main__": g = np.array([[0, 0, 1], [0, 0, 0.5], [0, 0, 0]]) p = 0.5 g = np.log(1. / (1 - p * g)) - error = [] - sizes = [10, 10**2, 10**3] - for s in sizes: - cascades = simulate_cascades(s, g) - cascade, y_obs = mn.build_matrix(cascades, 0) - conf = mn.bootstrap(cascade, y_obs, n_iter=100) - estimand = np.linalg.norm(np.delete(conf - g[0], 0, axis=1), axis=1) - error.append(mn.confidence_interval(*np.histogram(estimand, bins=50))) - plt.semilogx(sizes, error) - plt.show() + print(g) + sizes = [10**3] + for si in sizes: + cascades = simulate_cascades(si, g) + cascade, y_obs = mn.build_matrix(cascades, 2) + print(mn.infer(cascade, y_obs)) + #conf = mn.bootstrap(cascade, y_obs, n_iter=100) + #estimand = np.linalg.norm(np.delete(conf - g[0], 0, axis=1), axis=1) + #plt.hist(estimand, bins=40) + #plt.show() + #error.append(mn.confidence_interval(*np.histogram(estimand, bins=50))) + #plt.plot(sizes, error) + #plt.show() diff --git a/simulation/vi.py b/simulation/vi.py index aeccb69..1e45761 100644 --- a/simulation/vi.py +++ b/simulation/vi.py @@ -50,7 +50,7 @@ def kl(params1, params0): grad_kl = grad(kl) -def sgd(mu1, sig1, mu0, sig0, cascades, n_e=100, lr=lambda t: 1e-2, n_print=10): +def sgd(mu1, sig1, mu0, sig0, cascades, n_e=100, lr=lambda t: 1e-1, n_print=10): g_mu1, g_sig1 = grad_kl((mu1, sig1), (mu0, sig0)) for t in xrange(n_e): lrt = lr(t) # learning rate @@ -61,19 +61,19 @@ def sgd(mu1, sig1, mu0, sig0, cascades, n_e=100, lr=lambda t: 1e-2, n_print=10): sig1 = np.maximum(sig1 + lrt * g_sig1, 1e-3) res = np.sum(ll_full((mu1, sig1), x, s) for x, s in zip(*cascades))\ + kl((mu1, sig1), (mu0, sig0)) - if step % n_print == 0: - logging.info("Epoch:{}\tStep:{}\tLB:{}\t".format(t, step, res)) - print mu1[0:2, 0:2] - print sig1[0:2, 0:2] + #if step % n_print == 0: + logging.info("Epoch:{}\tStep:{}\tLB:{}\t".format(t, step, res)) + print mu1 + print sig1 if __name__ == '__main__': - #graph = np.array([[0, 0, 1], [0, 0, 0.5], [0, 0, 0]]) - graph = np.random.binomial(2, p=.2, size=(10, 10)) + graph = np.array([[0, 0, 1], [0, 0, 0.5], [0, 0, 0]]) + #graph = np.random.binomial(2, p=.2, size=(4, 4)) p = 0.5 graph = np.log(1. / (1 - p * graph)) - print(graph[0:2, 0:2]) - cascades = mn.build_cascade_list(mn.simulate_cascades(500, graph)) + print(graph) + cascades = mn.build_cascade_list(mn.simulate_cascades(100, graph)) mu0, sig0 = (1. + .2 * np.random.normal(size=graph.shape), 1 + .2 * np.random.normal(size=graph.shape)) mu1, sig1 = (1. + .2 * np.random.normal(size=graph.shape), diff --git a/simulation/vi_theano.py b/simulation/vi_theano.py index 562fa67..9e8fdb0 100644 --- a/simulation/vi_theano.py +++ b/simulation/vi_theano.py @@ -8,29 +8,29 @@ n_cascades = 1000 n_nodes = 4 n_samples = 100 srng = tsr.shared_randomstreams.RandomStreams(seed=123) -lr = 1e-2 -n_epochs = 10 +lr = 5*1e-3 +n_epochs = 20 +###############Variational Inference#################### # Declare Theano variables -mu = theano.shared(.5 * np.random.rand(1, n_nodes, n_nodes), name="mu", - broadcastable=(True, False, False)) -sig = theano.shared(.3 * np.random.rand(1, n_nodes, n_nodes), name="sig", - broadcastable=(True, False, False)) -mu0 = theano.shared(.5 * np.random.rand(1, n_nodes, n_nodes), name="mu", - broadcastable=(True, False, False)) -sig0 = theano.shared(.3 * np.random.rand(1, n_nodes, n_nodes), name="sig", - broadcastable=(True, False, False)) +mu = theano.shared(.5 + .2 * np.random.normal(size=(1, n_nodes, n_nodes)), + name="mu", broadcastable=(True, False, False)) +sig = theano.shared(.1 + .04 * np.random.normal(size=(1, n_nodes, n_nodes)), + name="sig", broadcastable=(True, False, False)) +mu0 = theano.shared(.5 + .2 * np.random.normal(size=(1, n_nodes, n_nodes)), + name="mu", broadcastable=(True, False, False)) +sig0 = theano.shared(.1 + .04 * np.random.normal(size=(1, n_nodes, n_nodes)), + name="sig", broadcastable=(True, False, False)) x = tsr.matrix(name='x', dtype='int8') s = tsr.matrix(name='s', dtype='int8') # Construct Theano graph theta = srng.normal((n_samples, n_nodes, n_nodes)) * sig + mu -y = tsr.clip(tsr.dot(x, theta), 1e-3, 1) +y = tsr.maximum(tsr.dot(x, theta), 1e-3) infect = tsr.log(1. - tsr.exp(-y[0:-1])).dimshuffle(1, 0, 2) lkl_pos = tsr.sum(infect * (x[1:] & s[1:])) / n_samples lkl_neg = tsr.sum(-y[0:-1].dimshuffle(1, 0, 2) * (~x[1:] & s[1:])) / n_samples - lkl = lkl_pos + lkl_neg kl = tsr.sum(tsr.log(sig / sig0) + (sig0**2 + (mu0 - mu)**2)/(2*sig)**2) res = lkl + kl @@ -48,8 +48,24 @@ train_kl = theano.function(inputs=[], outputs=[], (sig, tsr.clip(sig + lr * gsigkl, 1e-3, 1)))) +###############Maximum Likelihood##################### + +x = tsr.matrix(name='x', dtype='int8') +s = tsr.matrix(name='s', dtype='int8') +params = theano.shared(.5 + .01*np.random.normal(size=(n_nodes, n_nodes)), + name='params') +y = tsr.maximum(tsr.dot(x, params), 1e-5) +infect = tsr.log(1. - tsr.exp(-y[0:-1])) +lkl_pos = tsr.sum(infect * (x[1:] & s[1:])) +lkl_neg = tsr.sum(-y[0:-1] * (~x[1:] & s[1:])) +lkl_mle = lkl_pos + lkl_neg +gparams = theano.gradient.grad(lkl_mle, params) +train_mle = theano.function(inputs=[x, s], outputs=lkl_mle, updates=[(params, + tsr.clip(params + lr * gparams, 0, 1))]) + + if __name__ == "__main__": - graph = np.random.binomial(2, p=.2, size=(n_nodes, n_nodes)) + graph = .5 * np.random.binomial(2, p=.5, size=(n_nodes, n_nodes)) for k in range(len(graph)): graph[k, k] = 0 p = 0.5 @@ -57,10 +73,30 @@ if __name__ == "__main__": cascades = mn.build_cascade_list(mn.simulate_cascades(n_cascades, graph), collapse=True) x_obs, s_obs = cascades[0], cascades[1] - for i in range(n_epochs): - train_kl() - for k in xrange(len(x_obs)/100): - cost = train(x_obs[k*100:(k+1)*100], s_obs[k*100:(k+1)*100]) - print(cost) - print(mu.get_value()) - print(graph) + + #mle + lkl_plot = [] + if 0: + for i in range(n_epochs): + for xt, st in zip(x_obs, s_obs): + lkl = train_mle(xt, st) + lkl_plot.append(lkl) + print(graph) + w = params.get_value() + for k in range(len(w)): + w[k, k] = 0 + print(w) + import matplotlib.pyplot as plt + plt.plot(lkl_plot) + plt.show() + + #variational inference + if 1: + for i in range(n_epochs): + train_kl() + for k in xrange(len(x_obs)/100): + cost = train(x_obs[k*100:(k+1)*100], s_obs[k*100:(k+1)*100]) + print(cost) + print(graph) + print(mu.get_value()) + print(sig.get_value()) -- cgit v1.2.3-70-g09d2