diff options
Diffstat (limited to 'simulation/active_blocks.py')
| -rw-r--r-- | simulation/active_blocks.py | 31 |
1 files changed, 22 insertions, 9 deletions
diff --git a/simulation/active_blocks.py b/simulation/active_blocks.py index 47fce2f..e3924c6 100644 --- a/simulation/active_blocks.py +++ b/simulation/active_blocks.py @@ -54,7 +54,9 @@ class ActiveLearning(blocks.extensions.SimpleExtension): self.dataset = dataset def do(self, which_callback, *args): - pass + out_degree = np.sum(self.dataset.graph, axis=1) + self.dataset.node_p = out_degree / np.sum(out_degree) + print(self.dataset.node_p) class ShuffledBatchesScheme(fuel.schemes.ShuffledScheme): @@ -77,10 +79,13 @@ class ShuffledBatchesScheme(fuel.schemes.ShuffledScheme): return iter(batches[np.random.permutation(len(batches))]) -def create_mle_model(n_nodes): +def create_mle_model(graph): """ return cascade likelihood theano computation graph """ + n_nodes = len(graph) + g_shared = theano.shared(value=graph, name='graph') + x = tsr.matrix(name='x', dtype='int8') s = tsr.matrix(name='s', dtype='int8') params = theano.shared( @@ -94,7 +99,13 @@ def create_mle_model(n_nodes): lkl_neg = tsr.sum(-y[0:-1] * (~x[1:] & s[1:])) lkl_mle = lkl_pos + lkl_neg lkl_mle.name = 'cost' - return x, s, params, lkl_mle + + diff = (g_shared - params) ** 2 + subarray = tsr.arange(g_shared.shape[0]) + tsr.set_subtensor(diff[subarray, subarray], 0) + rmse = tsr.sum(diff) / (n_nodes ** 2) + rmse.name = 'rmse' + return x, s, params, lkl_mle, rmse def create_fixed_data_stream(n_cascades, graph, batch_size, shuffle=True): @@ -126,25 +137,27 @@ def create_learned_data_stream(graph, batch_size): if __name__ == "__main__": batch_size = 1000 - graph = mn.create_random_graph(n_nodes=1000) + #graph = mn.create_random_graph(n_nodes=1000) + graph = mn.create_star(1000) print('GRAPH:\n', graph, '\n-------------\n') - x, s, params, cost = create_mle_model(len(graph)) + x, s, params, cost, rmse = create_mle_model(graph) alg = blocks.algorithms.GradientDescent( cost=-cost, parameters=[params], step_rule=blocks.algorithms.AdaDelta() ) data_stream = create_learned_data_stream(graph, batch_size) + #n_cascades = 10000 #data_stream = create_fixed_data_stream(n_cascades, graph, batch_size, # shuffle=False) loop = blocks.main_loop.MainLoop( alg, data_stream, extensions=[ blocks.extensions.FinishAfter(after_n_batches = 10**4), - blocks.extensions.monitoring.TrainingDataMonitoring([cost, params], - after_batch=True), - blocks.extensions.Printing(every_n_batches = 10), - #ActiveLearning(active_dataset) + blocks.extensions.monitoring.TrainingDataMonitoring([cost, params, + rmse], after_batch=True), + blocks.extensions.Printing(every_n_batches = 10)#, + #ActiveLearning(data_stream.dataset) ] ) loop.run() |
