diff options
Diffstat (limited to 'simulation')
| -rw-r--r-- | simulation/active_blocks.py | 50 | ||||
| -rw-r--r-- | simulation/main.py | 101 | ||||
| -rw-r--r-- | simulation/mcmc.py (renamed from simulation/bayes.py) | 0 | ||||
| -rw-r--r-- | simulation/mle.py (renamed from simulation/mleNode.py) | 0 | ||||
| -rw-r--r-- | simulation/utils.py | 64 |
5 files changed, 84 insertions, 131 deletions
diff --git a/simulation/active_blocks.py b/simulation/active_blocks.py index 569cb6c..a1f6e76 100644 --- a/simulation/active_blocks.py +++ b/simulation/active_blocks.py @@ -1,11 +1,12 @@ -import main as mn +import utils import theano from theano import tensor as tsr import blocks -import blocks.algorithms, blocks.main_loop, blocks.extensions.monitoring +from blocks import algorithms, main_loop +import blocks.extensions as be +import blocks.extensions.monitoring as bm import picklable_itertools import numpy as np -from six.moves import range import fuel import fuel.datasets import collections @@ -19,28 +20,16 @@ class LearnedDataset(fuel.datasets.Dataset): """ provides_sources = ('x', 's') - def __init__(self, node_p, graph, source=mn.var_source, **kwargs): + def __init__(self, node_p, graph, **kwargs): super(LearnedDataset, self).__init__(**kwargs) self.node_p = node_p self.graph = graph - self.n_cascades = 1 # nbr of cascades of total size approx = request - self.source = lambda graph, t : source(graph, t, self.node_p) + self.source = lambda graph: utils.random_source(graph, self.node_p) def get_data(self, state=None, request=None): - floatX = 'int8' - x_obs = np.empty((request, len(self.graph)), dtype=floatX) - s_obs = np.empty((request, len(self.graph)), dtype=floatX) - i = 0 - while i < request: - x_tmp, s_tmp = mn.build_cascade_list( - mn.simulate_cascades(self.n_cascades, self.graph, self.source), - collapse=True - ) - x_obs[i:i + len(x_tmp)] = x_tmp[:request - i] - s_obs[i:i + len(x_tmp)] = s_tmp[:request - i] - i += len(x_tmp) - self.n_cascades += 1 # learn optimal nbr in loop - self.n_cascades = max(1, self.n_cascades - 2) + # floatX = 'int8' + x_obs, s_obs = utils.simulate_cascades(request, self.graph, self.source) + return (x_obs, s_obs) @@ -115,8 +104,9 @@ def create_fixed_data_stream(n_cascades, graph, batch_size, shuffle=True): -shuffle (bool): shuffle minibatches but not within minibatch, else sequential (non-shuffled) batches are used """ - cascades = mn.build_cascade_list(mn.simulate_cascades(n_cascades, graph), - collapse=True) + cascades = utils.build_cascade_list( + utils.simulate_cascades(n_cascades, graph), + collapse=True) x_obs, s_obs = cascades[0], cascades[1] data_set = fuel.datasets.base.IndexableDataset(collections.OrderedDict( [('x', x_obs), ('s', s_obs)] @@ -138,23 +128,23 @@ def create_learned_data_stream(graph, batch_size): if __name__ == "__main__": batch_size = 1000 - graph = mn.create_star(1000) + graph = utils.create_wheel(1000) print('GRAPH:\n', graph, '\n-------------\n') x, s, params, cost = create_mle_model(graph) rmse, g_shared = rmse_error(graph, params) - alg = blocks.algorithms.GradientDescent( - cost=-cost, parameters=[params], step_rule=blocks.algorithms.AdaDelta() + alg = algorithms.GradientDescent( + cost=-cost, parameters=[params], step_rule=blocks.algorithms.AdaDelta() ) data_stream = create_learned_data_stream(graph, batch_size) - loop = blocks.main_loop.MainLoop( + loop = main_loop.MainLoop( alg, data_stream, extensions=[ - blocks.extensions.FinishAfter(after_n_batches = 10**4), - blocks.extensions.monitoring.TrainingDataMonitoring([cost, params, - rmse, g_shared], after_batch=True), - blocks.extensions.Printing(every_n_batches = 10), + be.FinishAfter(after_n_batches=10**4), + bm.TrainingDataMonitoring([cost, params, + rmse, g_shared], after_batch=True), + be.Printing(every_n_batches=10), ActiveLearning(data_stream.dataset), ] ) diff --git a/simulation/main.py b/simulation/main.py deleted file mode 100644 index 81133c7..0000000 --- a/simulation/main.py +++ /dev/null @@ -1,101 +0,0 @@ -import mleNode as mn - -import numpy as np -from numpy.linalg import norm -import numpy.random as nr -from scipy.optimize import minimize -import matplotlib.pyplot as plt -import seaborn -from random import random, randint -from six.moves import range - -seaborn.set_style("white") - - -def create_random_graph(n_nodes, p=.5): - graph = .5 * np.random.binomial(2, p=.5, size=(n_nodes, n_nodes)) - for k in range(len(graph)): - graph[k, k] = 0 - return np.log(1. / (1 - p * graph)) - - -def create_star(n_nodes, p=.5): - graph = np.zeros((n_nodes, n_nodes)) - graph[0] = np.ones((n_nodes,)) - graph[0, 0] = 0 - for index, row in enumerate(graph[1:-1]): - row[index + 1] = 1 - graph[-1, 1] = 1 - return np.log(1. / (1 - p * graph)) - - -def simulate_cascade(x, graph): - """ - Simulate an IC cascade given a graph and initial state. - - For each time step we yield: - - susc: the nodes susceptible at the beginning of this time step - - x: the subset of susc who became infected - """ - yield x, np.zeros(graph.shape[0], dtype=bool) - susc = np.ones(graph.shape[0], dtype=bool) - while np.any(x): - susc = susc ^ x # nodes infected at previous step are now inactive - if not np.any(susc): - break - x = 1 - np.exp(-np.dot(graph.T, x)) - y = nr.random(x.shape[0]) - x = (x >= y) & susc - yield x, susc - - -def uniform_source(graph, *args, **kwargs): - x0 = np.zeros(graph.shape[0], dtype=bool) - x0[nr.randint(0, graph.shape[0])] = True - return x0 - - -def var_source(graph, t, node_p=None, *args, **kwargs): - if node_p is None: - node_p = np.ones(len(graph)) / len(graph) - x0 = np.zeros(graph.shape[0], dtype=bool) - x0[nr.choice(a=len(graph), p=node_p)] = True - return x0 - - -def simulate_cascades(n, graph, source=uniform_source): - for t in range(n): - x0 = source(graph, t) - yield simulate_cascade(x0, graph) - - -def build_cascade_list(cascades, collapse=False): - x, s = [], [] - for cascade in cascades: - xlist, slist = zip(*cascade) - x.append(np.vstack(xlist)) - s.append(np.vstack(slist)) - if not collapse: - return x, s - else: - return np.vstack(x), np.vstack(s) - - -if __name__ == "__main__": - #g = np.array([[0, 0, 1], [0, 0, 0.5], [0, 0, 0]]) - #p = 0.5 - #g = np.log(1. / (1 - p * g)) - g = create_random_graph(n_nodes=3) - print(g) - sizes = [10**3] - for si in sizes: - cascades = simulate_cascades(si, g) - cascade, y_obs = mn.build_matrix(cascades, 2) - print(mn.infer(cascade, y_obs)) - #conf = mn.bootstrap(cascade, y_obs, n_iter=100) - #estimand = np.linalg.norm(np.delete(conf - g[0], 0, axis=1), axis=1) - #plt.hist(estimand, bins=40) - #plt.show() - #error.append(mn.confidence_interval(*np.histogram(estimand, bins=50))) - #plt.plot(sizes, error) - #plt.show() diff --git a/simulation/bayes.py b/simulation/mcmc.py index bde9e94..bde9e94 100644 --- a/simulation/bayes.py +++ b/simulation/mcmc.py diff --git a/simulation/mleNode.py b/simulation/mle.py index c6b2e85..c6b2e85 100644 --- a/simulation/mleNode.py +++ b/simulation/mle.py diff --git a/simulation/utils.py b/simulation/utils.py new file mode 100644 index 0000000..aad7771 --- /dev/null +++ b/simulation/utils.py @@ -0,0 +1,64 @@ +import numpy as np +import numpy.random as nr +from six.moves import range + + +def create_random_graph(n_nodes, p=.5): + graph = .5 * np.random.binomial(2, p=.5, size=(n_nodes, n_nodes)) + for k in range(len(graph)): + graph[k, k] = 0 + return np.log(1. / (1 - p * graph)) + + +def create_wheel(n_nodes, p=.5): + graph = np.zeros((n_nodes, n_nodes)) + graph[0] = np.ones(n_nodes) + graph[0, 0] = 0 + for i in range(1, n_nodes-1): + graph[i, i + 1] = 1 + graph[n_nodes-1, 1] = 1 + return np.log(1. / (1 - p * graph)) + + +def simulate_cascade(x, graph): + """ + Simulate an IC cascade given a graph and initial state. + + For each time step we yield: + - susc: the nodes susceptible at the beginning of this time step + - x: the subset of susc who became infected + """ + yield x, np.zeros(graph.shape[0], dtype=bool) + susc = np.ones(graph.shape[0], dtype=bool) + while np.any(x): + susc = susc ^ x # nodes infected at previous step are now inactive + if not np.any(susc): + break + x = 1 - np.exp(-np.dot(graph.T, x)) + y = nr.random(x.shape[0]) + x = (x >= y) & susc + yield x, susc + + +def random_source(graph, node_p=None): + n_nodes = graph.shape[0] + if node_p is None: + node_p = np.ones(n_nodes) / n_nodes + x0 = np.zeros(graph.shape[0], dtype=bool) + x0[nr.choice(n_nodes, p=node_p)] = True + return x0 + + +def simulate_cascades(n_obs, graph, source=random_source): + n_nodes = graph.shape[0] + x_obs = np.zeros((n_obs, n_nodes), dtype=bool) + s_obs = np.zeros((n_obs, n_nodes), dtype=bool) + i = 0 + while i < n_obs: + for x, s in simulate_cascade(source(graph), graph): + x_obs[i] = x + s_obs[i] = s + i += 1 + if i >= n_obs: + break + return x_obs, s_obs |
