diff options
| author | jeanpouget-abadie <jean.pougetabadie@gmail.com> | 2015-11-30 12:29:22 -0500 |
|---|---|---|
| committer | jeanpouget-abadie <jean.pougetabadie@gmail.com> | 2015-11-30 12:29:22 -0500 |
| commit | f9e3d5e4dda32f33e5e5a0e82dda30a23f5dfae6 (patch) | |
| tree | 197b087961aec8f6c75d73a3446f2a00718b75b5 /simulation/vi_blocks.py | |
| parent | 582ea9dade68859e3d863d80a3aeddcb10a4c368 (diff) | |
| download | cascades-f9e3d5e4dda32f33e5e5a0e82dda30a23f5dfae6.tar.gz | |
making variational inference work
Diffstat (limited to 'simulation/vi_blocks.py')
| -rw-r--r-- | simulation/vi_blocks.py | 37 |
1 files changed, 20 insertions, 17 deletions
diff --git a/simulation/vi_blocks.py b/simulation/vi_blocks.py index 58b68d3..dcf6b46 100644 --- a/simulation/vi_blocks.py +++ b/simulation/vi_blocks.py @@ -26,16 +26,16 @@ class ClippedParams(blocks.algorithms.StepRule): def create_vi_model(n_nodes, n_samp=100): """return variational inference theano computation graph""" - def aux(): - rand = .1 + .05 * np.random.normal(size=(n_nodes, n_nodes)) - return rand.astype(theano.config.floatX) + def aux(a, b): + rand = a + b * np.random.normal(size=(n_nodes, n_nodes)) + return np.clip(rand, 1e-3, 1 - 1e-3).astype(theano.config.floatX) x = tsr.matrix(name='x', dtype='int8') s = tsr.matrix(name='s', dtype='int8') - mu = theano.shared(value=aux(), name='mu1') - sig = theano.shared(value=aux(), name='sig1') - mu0 = theano.shared(value=aux(), name='mu0') - sig0 = theano.shared(value=aux(), name='sig0') + mu = theano.shared(value=aux(.5, .1), name='mu1') + sig = theano.shared(value=aux(.5, .1), name='sig1') + mu0 = theano.shared(value=aux(.5, .1), name='mu0') + sig0 = theano.shared(value=aux(.5, .1), name='sig0') srng = tsr.shared_randomstreams.RandomStreams(seed=123) theta = srng.normal((n_samp, n_nodes, n_nodes)) * sig[None, :, :] + mu[None, @@ -45,8 +45,8 @@ def create_vi_model(n_nodes, n_samp=100): lkl_pos = tsr.sum(infect * (x[1:] & s[1:])) / n_samp lkl_neg = tsr.sum(-y[0:-1].dimshuffle(1, 0, 2) * (~x[1:] & s[1:])) / n_samp lkl = lkl_pos + lkl_neg - kl = tsr.sum(tsr.log(sig / sig0) + (sig0**2 + (mu0 - mu)**2)/(2*sig)**2) - cost = lkl + kl + kl = tsr.sum(tsr.log(sig0 / sig) + (sig**2 + (mu0 - mu)**2)/(2*sig0)**2) + cost = - lkl + kl cost.name = 'cost' return x, s, mu, sig, cost @@ -55,25 +55,28 @@ def create_vi_model(n_nodes, n_samp=100): if __name__ == "__main__": n_cascades = 10000 batch_size = 1000 - graph = mn.create_random_graph(n_nodes=3) + n_samples = 50 + graph = mn.create_random_graph(n_nodes=4) print('GRAPH:\n', graph, '\n-------------\n') - x, s, mu, sig, cost = create_vi_model(len(graph)) + x, s, mu, sig, cost = create_vi_model(len(graph), n_samples) + rmse, g_shared = ab.rmse_error(graph, mu) step_rules= blocks.algorithms.CompositeRule([blocks.algorithms.AdaDelta(), ClippedParams(1e-3, 1 - 1e-3)]) - alg = blocks.algorithms.GradientDescent(cost=-cost, parameters=[mu, sig], + alg = blocks.algorithms.GradientDescent(cost=cost, parameters=[mu, sig], step_rule=step_rules) - data_stream = ab.create_fixed_data_stream(n_cascades, graph, batch_size, - shuffle=False) + #data_stream = ab.create_fixed_data_stream(n_cascades, graph, batch_size, + # shuffle=False) + data_stream = ab.create_learned_data_stream(graph, batch_size) loop = blocks.main_loop.MainLoop( alg, data_stream, extensions=[ blocks.extensions.FinishAfter(after_n_batches = 10**4), - blocks.extensions.monitoring.TrainingDataMonitoring([cost, mu, sig], - after_batch=True), - blocks.extensions.Printing(every_n_batches = 10), + blocks.extensions.monitoring.TrainingDataMonitoring([cost, mu, sig, + rmse, g_shared], after_batch=True), + blocks.extensions.Printing(every_n_batches = 100), ] ) loop.run() |
