import utils import utils_blocks as ub import theano from theano import tensor as tsr from blocks import algorithms, main_loop import blocks.extensions as be import blocks.extensions.monitoring as bm import theano.tensor.shared_randomstreams import numpy as np class ClippedParams(algorithms.StepRule): """A rule to maintain parameters within a specified range""" def __init__(self, min_value, max_value): self.min_value = min_value self.max_value = max_value def compute_step(self, parameter, previous_step): min_clipped = tsr.switch(parameter - previous_step < self.min_value, 0, previous_step) return tsr.switch(parameter - previous_step > self.max_value, 0, min_clipped), [] def create_vi_model(n_nodes, n_samp=100): """return variational inference theano computation graph""" def aux(a, b): rand = a + b * np.random.normal(size=(n_nodes, n_nodes)) return np.clip(rand, 1e-3, 1 - 1e-3).astype(theano.config.floatX) x = tsr.matrix(name='x', dtype='int8') s = tsr.matrix(name='s', dtype='int8') mu = theano.shared(value=aux(.5, .1), name='mu1') sig = theano.shared(value=aux(.5, .1), name='sig1') mu0 = theano.shared(value=aux(.5, .1), name='mu0') sig0 = theano.shared(value=aux(.5, .1), name='sig0') srng = tsr.shared_randomstreams.RandomStreams(seed=123) theta = (srng.normal((n_samp, n_nodes, n_nodes)) * sig[None, :, :] + mu[None, :, :]) y = tsr.maximum(tsr.dot(x, theta), 1e-3) infect = tsr.log(1. - tsr.exp(-y[0:-1])).dimshuffle(1, 0, 2) lkl_pos = tsr.sum(infect * (x[1:] & s[1:])) / n_samp lkl_neg = tsr.sum(-y[0:-1].dimshuffle(1, 0, 2) * (~x[1:] & s[1:])) / n_samp lkl = lkl_pos + lkl_neg kl = tsr.sum(tsr.log(sig0 / sig) + (sig**2 + (mu0 - mu)**2)/(2*sig0)**2) cost = - lkl + kl cost.name = 'cost' return x, s, mu, sig, cost if __name__ == "__main__": batch_size = 100 frequency = 10 n_samples = 50 graph = utils.create_random_graph(n_nodes=10) print('GRAPH:\n', graph, '\n-------------\n') x, s, mu, sig, cost = create_vi_model(len(graph), n_samples) rmse = ub.rmse_error(graph, mu) step_rules = algorithms.CompositeRule([algorithms.AdaDelta(), ClippedParams(1e-3, 1 - 1e-3)]) alg = algorithms.GradientDescent(cost=cost, parameters=[mu, sig], step_rule=step_rules) data_stream = ub.dynamic_data_stream(graph, batch_size) loop = main_loop.MainLoop( alg, data_stream, log_backend="sqlite", extensions=[ be.FinishAfter(after_n_batches=10**4), bm.TrainingDataMonitoring([cost, rmse, mu], every_n_batches=frequency), be.Printing(every_n_batches=frequency, after_epoch=False), ub.JSONDump("logs/tmp.json", every_n_batches=10), #ub.ActiveLearning(dataset=data_stream.dataset, params=graph) ] ) loop.run()