diff options
| -rw-r--r-- | simulation/active_blocks.py | 150 | ||||
| -rw-r--r-- | simulation/main.py | 23 | ||||
| -rw-r--r-- | simulation/vi_blocks.py | 79 | ||||
| -rw-r--r-- | simulation/vi_theano.py | 110 |
4 files changed, 248 insertions, 114 deletions
diff --git a/simulation/active_blocks.py b/simulation/active_blocks.py new file mode 100644 index 0000000..47fce2f --- /dev/null +++ b/simulation/active_blocks.py @@ -0,0 +1,150 @@ +import main as mn +import theano +from theano import tensor as tsr +import blocks +import blocks.algorithms, blocks.main_loop, blocks.extensions.monitoring +import picklable_itertools +import numpy as np +from six.moves import range +import fuel +import fuel.datasets +import collections + + +class LearnedDataset(fuel.datasets.Dataset): + """ + Dynamically-created dataset (for active learning) + -compatible with ConstantScheme with request corresponding to a + batch_size + """ + provides_sources = ('x', 's') + + def __init__(self, node_p, graph, source=mn.var_source, **kwargs): + super(LearnedDataset, self).__init__(**kwargs) + self.node_p = node_p + self.graph = graph + self.n_cascades = 1 # nbr of cascades of total size approx = request + self.source = lambda graph, t : source(graph, t, self.node_p) + + def get_data(self, state=None, request=None): + floatX = 'int8' + x_obs = np.empty((request, len(self.graph)), dtype=floatX) + s_obs = np.empty((request, len(self.graph)), dtype=floatX) + i = 0 + while i < request: + x_tmp, s_tmp = mn.build_cascade_list( + mn.simulate_cascades(self.n_cascades, graph, self.source), + collapse=True + ) + x_obs[i:i + len(x_tmp)] = x_tmp[:request - i] + s_obs[i:i + len(x_tmp)] = s_tmp[:request - i] + i += len(x_tmp) + self.n_cascades += 1 # learn optimal nbr in loop + self.n_cascades = max(1, self.n_cascades - 2) + return (x_obs, s_obs) + + +class ActiveLearning(blocks.extensions.SimpleExtension): + """ + Extension which updates the node_p array passed to the get_data method of + LearnedDataset + """ + def __init__(self, dataset, **kwargs): + super(ActiveLearning, self).__init__(**kwargs) + self.dataset = dataset + + def do(self, which_callback, *args): + pass + + +class ShuffledBatchesScheme(fuel.schemes.ShuffledScheme): + """ + Iteration scheme over finite dataset: + -shuffles batches but not within batch + -arguments: dataset_size (int) ; batch_size (int) + """ + def get_request_iterator(self): + indices = list(self.indices) # self.indices = xrange(dataset_size) + start = np.random.randint(self.batch_size) + batches = list(map( + list, + picklable_itertools.extras.partition_all(self.batch_size, + indices[start:]) + )) + if indices[:start]: + batches.append(indices[:start]) + batches = np.asarray(batches) + return iter(batches[np.random.permutation(len(batches))]) + + +def create_mle_model(n_nodes): + """ + return cascade likelihood theano computation graph + """ + x = tsr.matrix(name='x', dtype='int8') + s = tsr.matrix(name='s', dtype='int8') + params = theano.shared( + .5 + .01 * + np.random.normal(size=(n_nodes, n_nodes)).astype(theano.config.floatX), + name='params' + ) + y = tsr.maximum(tsr.dot(x, params), 1e-5) + infect = tsr.log(1. - tsr.exp(-y[0:-1])) + lkl_pos = tsr.sum(infect * (x[1:] & s[1:])) + lkl_neg = tsr.sum(-y[0:-1] * (~x[1:] & s[1:])) + lkl_mle = lkl_pos + lkl_neg + lkl_mle.name = 'cost' + return x, s, params, lkl_mle + + +def create_fixed_data_stream(n_cascades, graph, batch_size, shuffle=True): + """ + creates a datastream for a fixed (not learned) dataset: + -shuffle (bool): shuffle minibatches but not within minibatch, else + sequential (non-shuffled) batches are used + """ + cascades = mn.build_cascade_list(mn.simulate_cascades(n_cascades, graph), + collapse=True) + x_obs, s_obs = cascades[0], cascades[1] + data_set = fuel.datasets.base.IndexableDataset(collections.OrderedDict( + [('x', x_obs), ('s', s_obs)] + )) + if shuffle: + scheme = ShuffledBatchesScheme(len(x_obs), batch_size=batch_size) + else: + scheme = fuel.schemes.SequentialScheme(len(x_obs), + batch_size=batch_size) + return fuel.streams.DataStream(dataset=data_set, iteration_scheme=scheme) + + +def create_learned_data_stream(graph, batch_size): + node_p = np.ones(len(graph)) / len(graph) + data_set = LearnedDataset(node_p, graph) + scheme = fuel.schemes.ConstantScheme(batch_size) + return fuel.streams.DataStream(dataset=data_set, iteration_scheme=scheme) + + +if __name__ == "__main__": + batch_size = 1000 + graph = mn.create_random_graph(n_nodes=1000) + print('GRAPH:\n', graph, '\n-------------\n') + + x, s, params, cost = create_mle_model(len(graph)) + + alg = blocks.algorithms.GradientDescent( + cost=-cost, parameters=[params], step_rule=blocks.algorithms.AdaDelta() + ) + data_stream = create_learned_data_stream(graph, batch_size) + #data_stream = create_fixed_data_stream(n_cascades, graph, batch_size, + # shuffle=False) + loop = blocks.main_loop.MainLoop( + alg, data_stream, + extensions=[ + blocks.extensions.FinishAfter(after_n_batches = 10**4), + blocks.extensions.monitoring.TrainingDataMonitoring([cost, params], + after_batch=True), + blocks.extensions.Printing(every_n_batches = 10), + #ActiveLearning(active_dataset) + ] + ) + loop.run() diff --git a/simulation/main.py b/simulation/main.py index 1c0b3e8..a916034 100644 --- a/simulation/main.py +++ b/simulation/main.py @@ -12,6 +12,13 @@ from six.moves import range seaborn.set_style("white") +def create_random_graph(n_nodes, p=.5): + graph = .5 * np.random.binomial(2, p=.5, size=(n_nodes, n_nodes)) + for k in range(len(graph)): + graph[k, k] = 0 + return np.log(1. / (1 - p * graph)) + + def simulate_cascade(x, graph): """ Simulate an IC cascade given a graph and initial state. @@ -22,7 +29,6 @@ def simulate_cascade(x, graph): """ yield x, np.zeros(graph.shape[0], dtype=bool) susc = np.ones(graph.shape[0], dtype=bool) - #yield x, susc while np.any(x): susc = susc ^ x # nodes infected at previous step are now inactive if not np.any(susc): @@ -39,6 +45,14 @@ def uniform_source(graph, *args, **kwargs): return x0 +def var_source(graph, t, node_p=None, *args, **kwargs): + if node_p is None: + node_p = np.ones(len(graph)) / len(graph) + x0 = np.zeros(graph.shape[0], dtype=bool) + x0[nr.choice(a=len(graph), p=node_p)] = True + return x0 + + def simulate_cascades(n, graph, source=uniform_source): for t in range(n): x0 = source(graph, t) @@ -58,9 +72,10 @@ def build_cascade_list(cascades, collapse=False): if __name__ == "__main__": - g = np.array([[0, 0, 1], [0, 0, 0.5], [0, 0, 0]]) - p = 0.5 - g = np.log(1. / (1 - p * g)) + #g = np.array([[0, 0, 1], [0, 0, 0.5], [0, 0, 0]]) + #p = 0.5 + #g = np.log(1. / (1 - p * g)) + g = create_random_graph(n_nodes=3) print(g) sizes = [10**3] for si in sizes: diff --git a/simulation/vi_blocks.py b/simulation/vi_blocks.py new file mode 100644 index 0000000..3f88611 --- /dev/null +++ b/simulation/vi_blocks.py @@ -0,0 +1,79 @@ +import main as mn +import theano +from theano import tensor as tsr +import blocks +import blocks.algorithms, blocks.main_loop, blocks.extensions.monitoring +import theano.tensor.shared_randomstreams +import numpy as np +from six.moves import range +import fuel +import fuel.datasets +import active_blocks as ab + + +class ClippedParams(blocks.algorithms.StepRule): + """A rule to maintain parameters within a specified range""" + def __init__(self, min_value, max_value): + self.min_value = min_value + self.max_value = max_value + + def compute_step(self, parameter, previous_step): + min_clipped = tsr.switch(parameter - previous_step < self.min_value, + self.min_value, previous_step) + return tsr.switch(parameter - previous_step > self.max_value, + self.max_value, min_clipped) + + +def create_vi_model(n_nodes, n_samp=100): + """return variational inference theano computation graph""" + def aux(): + rand = .1 + .05 * np.random.normal(size=(n_nodes, n_nodes)) + return rand.astype(theano.config.floatX) + + x = tsr.matrix(name='x', dtype='int8') + s = tsr.matrix(name='s', dtype='int8') + mu = theano.shared(value=aux(), name='mu1') + sig = theano.shared(value=aux(), name='sig1') + mu0 = theano.shared(value=aux(), name='mu0') + sig0 = theano.shared(value=aux(), name='sig0') + + srng = tsr.shared_randomstreams.RandomStreams(seed=123) + theta = srng.normal((n_samp, n_nodes, n_nodes)) * sig[None, :, :] + mu[None, + :, :] + y = tsr.maximum(tsr.dot(x, theta), 1e-3) + infect = tsr.log(1. - tsr.exp(-y[0:-1])).dimshuffle(1, 0, 2) + lkl_pos = tsr.sum(infect * (x[1:] & s[1:])) / n_samp + lkl_neg = tsr.sum(-y[0:-1].dimshuffle(1, 0, 2) * (~x[1:] & s[1:])) / n_samp + lkl = lkl_pos + lkl_neg + kl = tsr.sum(tsr.log(sig / sig0) + (sig0**2 + (mu0 - mu)**2)/(2*sig)**2) + cost = lkl + kl + cost.name = 'cost' + + return x, s, mu, sig, cost + + +if __name__ == "__main__": + n_cascades = 10000 + batch_size = 1000 + graph = mn.create_random_graph(n_nodes=3) + print('GRAPH:\n', graph, '\n-------------\n') + + x, s, mu, sig, cost = create_vi_model(len(graph)) + + step_rules= blocks.algorithms.CompositeRule([blocks.algorithms.AdaDelta(), + ClippedParams(1e-3, 1 - 1e-3)]) + + alg = blocks.algorithms.GradientDescent(cost=-cost, parameters=[mu, sig], + step_rule=step_rules) + data_stream = ab.create_fixed_data_stream(n_cascades, graph, batch_size, + shuffle=False) + loop = blocks.main_loop.MainLoop( + alg, data_stream, + extensions=[ + blocks.extensions.FinishAfter(after_n_batches = 10**4), + blocks.extensions.monitoring.TrainingDataMonitoring([cost, mu, sig], + after_batch=True), + blocks.extensions.Printing(every_n_batches = 10), + ] + ) + loop.run() diff --git a/simulation/vi_theano.py b/simulation/vi_theano.py deleted file mode 100644 index cfd1dcf..0000000 --- a/simulation/vi_theano.py +++ /dev/null @@ -1,110 +0,0 @@ -import main as mn -import theano -from theano import tensor as tsr -import theano.tensor.shared_randomstreams -import numpy as np - -n_cascades = 1000 -n_nodes = 3 -n_samples = 100 -srng = tsr.shared_randomstreams.RandomStreams(seed=123) -lr = 1e-1 -n_epochs = 1 - -###############Variational Inference#################### - -# Declare Theano variables -mu = theano.shared(.5 + .2 * np.random.normal(size=(1, n_nodes, n_nodes)), - name="mu", broadcastable=(True, False, False)) -sig = theano.shared(.1 + .04 * np.random.normal(size=(1, n_nodes, n_nodes)), - name="sig", broadcastable=(True, False, False)) -mu0 = theano.shared(.5 + .2 * np.random.normal(size=(1, n_nodes, n_nodes)), - name="mu", broadcastable=(True, False, False)) -sig0 = theano.shared(.1 + .04 * np.random.normal(size=(1, n_nodes, n_nodes)), - name="sig", broadcastable=(True, False, False)) -x = tsr.matrix(name='x', dtype='int8') -s = tsr.matrix(name='s', dtype='int8') - -# Construct Theano graph -theta = srng.normal((n_samples, n_nodes, n_nodes)) * sig + mu -y = tsr.maximum(tsr.dot(x, theta), 1e-3) -infect = tsr.log(1. - tsr.exp(-y[0:-1])).dimshuffle(1, 0, 2) -lkl_pos = tsr.sum(infect * (x[1:] & s[1:])) / n_samples -lkl_neg = tsr.sum(-y[0:-1].dimshuffle(1, 0, 2) * (~x[1:] & s[1:])) / n_samples -lkl = lkl_pos + lkl_neg -kl = tsr.sum(tsr.log(sig / sig0) + (sig0**2 + (mu0 - mu)**2)/(2*sig)**2) -res = lkl + kl - -gmu, gsig = theano.gradient.grad(lkl, [mu, sig]) -gmukl, gsigkl = theano.grad(kl, [mu, sig]) - -# Compile into functions -loglkl_full = theano.function([x, s], lkl) -train = theano.function(inputs=[x, s], outputs=res, - updates=((mu, tsr.clip(mu + lr * gmu, 0, 1)), - (sig, tsr.clip(sig + lr * gsig, 1e-3, 1)))) -train_kl = theano.function(inputs=[], outputs=[], - updates=((mu, tsr.clip(mu + lr * gmukl, 0, 1)), - (sig, tsr.clip(sig + lr * gsigkl, 1e-3, 1)))) - - -###############Maximum Likelihood##################### - -x = tsr.matrix(name='x', dtype='int8') -s = tsr.matrix(name='s', dtype='int8') -params = theano.shared(.5 + .01*np.random.normal(size=(n_nodes, n_nodes)), - name='params') -y = tsr.maximum(tsr.dot(x, params), 1e-5) -infect = tsr.log(1. - tsr.exp(-y[0:-1])) -lkl_pos = tsr.sum(infect * (x[1:] & s[1:])) -lkl_neg = tsr.sum(-y[0:-1] * (~x[1:] & s[1:])) -lkl_mle = lkl_pos + lkl_neg -gparams = theano.gradient.grad(lkl_mle, params) -train_mle = theano.function(inputs=[x, s], outputs=lkl_mle, updates=[(params, - tsr.clip(params + lr * gparams, 0, 1))]) - - -if __name__ == "__main__": - graph = .5 * np.random.binomial(2, p=.5, size=(n_nodes, n_nodes)) - for k in range(len(graph)): - graph[k, k] = 0 - p = 0.5 - graph = np.log(1. / (1 - p * graph)) - - graph = np.array([[0, 0, 1], [0, 0, 0.5], [0, 0, 0]]) - p = 0.5 - graph = np.log(1. / (1 - p * graph)) - - cascades = mn.build_cascade_list(mn.simulate_cascades(n_cascades, graph), - collapse=True) - x_obs, s_obs = cascades[0], cascades[1] - - #mle - lkl_plot = [] - if 1: - for i in range(n_epochs): - for k in xrange(len(x_obs)/100): - xt = x_obs[k*100:(k+1)*100] - st = s_obs[k*100:(k+1)*100] - lkl = train_mle(xt, st) - lkl_plot.append(lkl) - print(params.get_value()) - print(graph) - w = params.get_value() - for k in range(len(w)): - w[k, k] = 0 - print(w) - #import matplotlib.pyplot as plt - #plt.plot(lkl_plot) - #plt.show() - - #variational inference - if 0: - for i in range(n_epochs): - train_kl() - for k in xrange(len(x_obs)/100): - cost = train(x_obs[k*100:(k+1)*100], s_obs[k*100:(k+1)*100]) - print(cost) - print(graph) - print(mu.get_value()) - print(sig.get_value()) |
