aboutsummaryrefslogtreecommitdiffstats
path: root/simulation
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2015-11-30 19:57:58 -0500
committerThibaut Horel <thibaut.horel@gmail.com>2015-11-30 19:57:58 -0500
commitf1762904c648b2089031ba6ce46ccaaac4f3514c (patch)
tree78b13559034985d8f2d16314a4fce340f2070aba /simulation
parent52cf8293061a1e35b5b443ef6dc70aa51727cf00 (diff)
downloadcascades-f1762904c648b2089031ba6ce46ccaaac4f3514c.tar.gz
Big code cleanup
Diffstat (limited to 'simulation')
-rw-r--r--simulation/active_blocks.py50
-rw-r--r--simulation/main.py101
-rw-r--r--simulation/mcmc.py (renamed from simulation/bayes.py)0
-rw-r--r--simulation/mle.py (renamed from simulation/mleNode.py)0
-rw-r--r--simulation/utils.py64
5 files changed, 84 insertions, 131 deletions
diff --git a/simulation/active_blocks.py b/simulation/active_blocks.py
index 569cb6c..a1f6e76 100644
--- a/simulation/active_blocks.py
+++ b/simulation/active_blocks.py
@@ -1,11 +1,12 @@
-import main as mn
+import utils
import theano
from theano import tensor as tsr
import blocks
-import blocks.algorithms, blocks.main_loop, blocks.extensions.monitoring
+from blocks import algorithms, main_loop
+import blocks.extensions as be
+import blocks.extensions.monitoring as bm
import picklable_itertools
import numpy as np
-from six.moves import range
import fuel
import fuel.datasets
import collections
@@ -19,28 +20,16 @@ class LearnedDataset(fuel.datasets.Dataset):
"""
provides_sources = ('x', 's')
- def __init__(self, node_p, graph, source=mn.var_source, **kwargs):
+ def __init__(self, node_p, graph, **kwargs):
super(LearnedDataset, self).__init__(**kwargs)
self.node_p = node_p
self.graph = graph
- self.n_cascades = 1 # nbr of cascades of total size approx = request
- self.source = lambda graph, t : source(graph, t, self.node_p)
+ self.source = lambda graph: utils.random_source(graph, self.node_p)
def get_data(self, state=None, request=None):
- floatX = 'int8'
- x_obs = np.empty((request, len(self.graph)), dtype=floatX)
- s_obs = np.empty((request, len(self.graph)), dtype=floatX)
- i = 0
- while i < request:
- x_tmp, s_tmp = mn.build_cascade_list(
- mn.simulate_cascades(self.n_cascades, self.graph, self.source),
- collapse=True
- )
- x_obs[i:i + len(x_tmp)] = x_tmp[:request - i]
- s_obs[i:i + len(x_tmp)] = s_tmp[:request - i]
- i += len(x_tmp)
- self.n_cascades += 1 # learn optimal nbr in loop
- self.n_cascades = max(1, self.n_cascades - 2)
+ # floatX = 'int8'
+ x_obs, s_obs = utils.simulate_cascades(request, self.graph, self.source)
+
return (x_obs, s_obs)
@@ -115,8 +104,9 @@ def create_fixed_data_stream(n_cascades, graph, batch_size, shuffle=True):
-shuffle (bool): shuffle minibatches but not within minibatch, else
sequential (non-shuffled) batches are used
"""
- cascades = mn.build_cascade_list(mn.simulate_cascades(n_cascades, graph),
- collapse=True)
+ cascades = utils.build_cascade_list(
+ utils.simulate_cascades(n_cascades, graph),
+ collapse=True)
x_obs, s_obs = cascades[0], cascades[1]
data_set = fuel.datasets.base.IndexableDataset(collections.OrderedDict(
[('x', x_obs), ('s', s_obs)]
@@ -138,23 +128,23 @@ def create_learned_data_stream(graph, batch_size):
if __name__ == "__main__":
batch_size = 1000
- graph = mn.create_star(1000)
+ graph = utils.create_wheel(1000)
print('GRAPH:\n', graph, '\n-------------\n')
x, s, params, cost = create_mle_model(graph)
rmse, g_shared = rmse_error(graph, params)
- alg = blocks.algorithms.GradientDescent(
- cost=-cost, parameters=[params], step_rule=blocks.algorithms.AdaDelta()
+ alg = algorithms.GradientDescent(
+ cost=-cost, parameters=[params], step_rule=blocks.algorithms.AdaDelta()
)
data_stream = create_learned_data_stream(graph, batch_size)
- loop = blocks.main_loop.MainLoop(
+ loop = main_loop.MainLoop(
alg, data_stream,
extensions=[
- blocks.extensions.FinishAfter(after_n_batches = 10**4),
- blocks.extensions.monitoring.TrainingDataMonitoring([cost, params,
- rmse, g_shared], after_batch=True),
- blocks.extensions.Printing(every_n_batches = 10),
+ be.FinishAfter(after_n_batches=10**4),
+ bm.TrainingDataMonitoring([cost, params,
+ rmse, g_shared], after_batch=True),
+ be.Printing(every_n_batches=10),
ActiveLearning(data_stream.dataset),
]
)
diff --git a/simulation/main.py b/simulation/main.py
deleted file mode 100644
index 81133c7..0000000
--- a/simulation/main.py
+++ /dev/null
@@ -1,101 +0,0 @@
-import mleNode as mn
-
-import numpy as np
-from numpy.linalg import norm
-import numpy.random as nr
-from scipy.optimize import minimize
-import matplotlib.pyplot as plt
-import seaborn
-from random import random, randint
-from six.moves import range
-
-seaborn.set_style("white")
-
-
-def create_random_graph(n_nodes, p=.5):
- graph = .5 * np.random.binomial(2, p=.5, size=(n_nodes, n_nodes))
- for k in range(len(graph)):
- graph[k, k] = 0
- return np.log(1. / (1 - p * graph))
-
-
-def create_star(n_nodes, p=.5):
- graph = np.zeros((n_nodes, n_nodes))
- graph[0] = np.ones((n_nodes,))
- graph[0, 0] = 0
- for index, row in enumerate(graph[1:-1]):
- row[index + 1] = 1
- graph[-1, 1] = 1
- return np.log(1. / (1 - p * graph))
-
-
-def simulate_cascade(x, graph):
- """
- Simulate an IC cascade given a graph and initial state.
-
- For each time step we yield:
- - susc: the nodes susceptible at the beginning of this time step
- - x: the subset of susc who became infected
- """
- yield x, np.zeros(graph.shape[0], dtype=bool)
- susc = np.ones(graph.shape[0], dtype=bool)
- while np.any(x):
- susc = susc ^ x # nodes infected at previous step are now inactive
- if not np.any(susc):
- break
- x = 1 - np.exp(-np.dot(graph.T, x))
- y = nr.random(x.shape[0])
- x = (x >= y) & susc
- yield x, susc
-
-
-def uniform_source(graph, *args, **kwargs):
- x0 = np.zeros(graph.shape[0], dtype=bool)
- x0[nr.randint(0, graph.shape[0])] = True
- return x0
-
-
-def var_source(graph, t, node_p=None, *args, **kwargs):
- if node_p is None:
- node_p = np.ones(len(graph)) / len(graph)
- x0 = np.zeros(graph.shape[0], dtype=bool)
- x0[nr.choice(a=len(graph), p=node_p)] = True
- return x0
-
-
-def simulate_cascades(n, graph, source=uniform_source):
- for t in range(n):
- x0 = source(graph, t)
- yield simulate_cascade(x0, graph)
-
-
-def build_cascade_list(cascades, collapse=False):
- x, s = [], []
- for cascade in cascades:
- xlist, slist = zip(*cascade)
- x.append(np.vstack(xlist))
- s.append(np.vstack(slist))
- if not collapse:
- return x, s
- else:
- return np.vstack(x), np.vstack(s)
-
-
-if __name__ == "__main__":
- #g = np.array([[0, 0, 1], [0, 0, 0.5], [0, 0, 0]])
- #p = 0.5
- #g = np.log(1. / (1 - p * g))
- g = create_random_graph(n_nodes=3)
- print(g)
- sizes = [10**3]
- for si in sizes:
- cascades = simulate_cascades(si, g)
- cascade, y_obs = mn.build_matrix(cascades, 2)
- print(mn.infer(cascade, y_obs))
- #conf = mn.bootstrap(cascade, y_obs, n_iter=100)
- #estimand = np.linalg.norm(np.delete(conf - g[0], 0, axis=1), axis=1)
- #plt.hist(estimand, bins=40)
- #plt.show()
- #error.append(mn.confidence_interval(*np.histogram(estimand, bins=50)))
- #plt.plot(sizes, error)
- #plt.show()
diff --git a/simulation/bayes.py b/simulation/mcmc.py
index bde9e94..bde9e94 100644
--- a/simulation/bayes.py
+++ b/simulation/mcmc.py
diff --git a/simulation/mleNode.py b/simulation/mle.py
index c6b2e85..c6b2e85 100644
--- a/simulation/mleNode.py
+++ b/simulation/mle.py
diff --git a/simulation/utils.py b/simulation/utils.py
new file mode 100644
index 0000000..aad7771
--- /dev/null
+++ b/simulation/utils.py
@@ -0,0 +1,64 @@
+import numpy as np
+import numpy.random as nr
+from six.moves import range
+
+
+def create_random_graph(n_nodes, p=.5):
+ graph = .5 * np.random.binomial(2, p=.5, size=(n_nodes, n_nodes))
+ for k in range(len(graph)):
+ graph[k, k] = 0
+ return np.log(1. / (1 - p * graph))
+
+
+def create_wheel(n_nodes, p=.5):
+ graph = np.zeros((n_nodes, n_nodes))
+ graph[0] = np.ones(n_nodes)
+ graph[0, 0] = 0
+ for i in range(1, n_nodes-1):
+ graph[i, i + 1] = 1
+ graph[n_nodes-1, 1] = 1
+ return np.log(1. / (1 - p * graph))
+
+
+def simulate_cascade(x, graph):
+ """
+ Simulate an IC cascade given a graph and initial state.
+
+ For each time step we yield:
+ - susc: the nodes susceptible at the beginning of this time step
+ - x: the subset of susc who became infected
+ """
+ yield x, np.zeros(graph.shape[0], dtype=bool)
+ susc = np.ones(graph.shape[0], dtype=bool)
+ while np.any(x):
+ susc = susc ^ x # nodes infected at previous step are now inactive
+ if not np.any(susc):
+ break
+ x = 1 - np.exp(-np.dot(graph.T, x))
+ y = nr.random(x.shape[0])
+ x = (x >= y) & susc
+ yield x, susc
+
+
+def random_source(graph, node_p=None):
+ n_nodes = graph.shape[0]
+ if node_p is None:
+ node_p = np.ones(n_nodes) / n_nodes
+ x0 = np.zeros(graph.shape[0], dtype=bool)
+ x0[nr.choice(n_nodes, p=node_p)] = True
+ return x0
+
+
+def simulate_cascades(n_obs, graph, source=random_source):
+ n_nodes = graph.shape[0]
+ x_obs = np.zeros((n_obs, n_nodes), dtype=bool)
+ s_obs = np.zeros((n_obs, n_nodes), dtype=bool)
+ i = 0
+ while i < n_obs:
+ for x, s in simulate_cascade(source(graph), graph):
+ x_obs[i] = x
+ s_obs[i] = s
+ i += 1
+ if i >= n_obs:
+ break
+ return x_obs, s_obs