active learning for mle + variational inf. (not bug-free)

author: jeanpouget-abadie <jean.pougetabadie@gmail.com> 2015-11-29 17:03:22 -0500
committer: jeanpouget-abadie <jean.pougetabadie@gmail.com> 2015-11-29 17:03:22 -0500
commit: 7322c00eafcde38dadbf9d4f05a1572d627355bf (patch)
tree: 45db3a6e4d71b0a17be2a87b49a225d0d430cb1b
parent: 041aa021657a3c290952b222e3141449638bad19 (diff)
download: cascades-7322c00eafcde38dadbf9d4f05a1572d627355bf.tar.gz
4 files changed, 248 insertions, 114 deletions
diff --git a/simulation/active_blocks.py b/simulation/active_blocks.py
new file mode 100644
index 0000000..47fce2f
--- /dev/null
+++ b/simulation/active_blocks.py
@@ -0,0 +1,150 @@
+import main as mn
+import theano
+from theano import tensor as tsr
+import blocks
+import blocks.algorithms, blocks.main_loop, blocks.extensions.monitoring
+import picklable_itertools
+import numpy as np
+from six.moves import range
+import fuel
+import fuel.datasets
+import collections
+
+
+class LearnedDataset(fuel.datasets.Dataset):
+    """
+    Dynamically-created dataset (for active learning)
+        -compatible with ConstantScheme with request corresponding to a
+        batch_size
+    """
+    provides_sources = ('x', 's')
+
+    def __init__(self, node_p, graph, source=mn.var_source, **kwargs):
+        super(LearnedDataset, self).__init__(**kwargs)
+        self.node_p = node_p
+        self.graph = graph
+        self.n_cascades = 1  # nbr of cascades of total size approx = request
+        self.source = lambda graph, t : source(graph, t, self.node_p)
+
+    def get_data(self, state=None, request=None):
+        floatX = 'int8'
+        x_obs = np.empty((request, len(self.graph)), dtype=floatX)
+        s_obs = np.empty((request, len(self.graph)), dtype=floatX)
+        i = 0
+        while i < request:
+            x_tmp, s_tmp = mn.build_cascade_list(
+                mn.simulate_cascades(self.n_cascades, graph, self.source),
+                collapse=True
+            )
+            x_obs[i:i + len(x_tmp)] = x_tmp[:request - i]
+            s_obs[i:i + len(x_tmp)] = s_tmp[:request - i]
+            i += len(x_tmp)
+            self.n_cascades += 1  # learn optimal nbr in loop
+        self.n_cascades = max(1, self.n_cascades - 2)
+        return (x_obs, s_obs)
+
+
+class ActiveLearning(blocks.extensions.SimpleExtension):
+    """
+    Extension which updates the node_p array passed to the get_data method of
+    LearnedDataset
+    """
+    def __init__(self, dataset, **kwargs):
+        super(ActiveLearning, self).__init__(**kwargs)
+        self.dataset = dataset
+
+    def do(self, which_callback, *args):
+        pass
+
+
+class ShuffledBatchesScheme(fuel.schemes.ShuffledScheme):
+    """
+    Iteration scheme over finite dataset:
+        -shuffles batches but not within batch
+        -arguments: dataset_size (int) ; batch_size (int)
+    """
+    def get_request_iterator(self):
+        indices = list(self.indices)  # self.indices = xrange(dataset_size)
+        start = np.random.randint(self.batch_size)
+        batches = list(map(
+            list,
+            picklable_itertools.extras.partition_all(self.batch_size,
+                                                     indices[start:])
+        ))
+        if indices[:start]:
+            batches.append(indices[:start])
+        batches = np.asarray(batches)
+        return iter(batches[np.random.permutation(len(batches))])
+
+
+def create_mle_model(n_nodes):
+    """
+    return cascade likelihood theano computation graph
+    """
+    x = tsr.matrix(name='x', dtype='int8')
+    s = tsr.matrix(name='s', dtype='int8')
+    params = theano.shared(
+        .5 + .01 *
+        np.random.normal(size=(n_nodes, n_nodes)).astype(theano.config.floatX),
+        name='params'
+    )
+    y = tsr.maximum(tsr.dot(x, params), 1e-5)
+    infect = tsr.log(1. - tsr.exp(-y[0:-1]))
+    lkl_pos = tsr.sum(infect * (x[1:] & s[1:]))
+    lkl_neg = tsr.sum(-y[0:-1] * (~x[1:] & s[1:]))
+    lkl_mle = lkl_pos + lkl_neg
+    lkl_mle.name = 'cost'
+    return x, s, params, lkl_mle
+
+
+def create_fixed_data_stream(n_cascades, graph, batch_size, shuffle=True):
+    """
+    creates a datastream for a fixed (not learned) dataset:
+        -shuffle (bool): shuffle minibatches but not within minibatch, else
+        sequential (non-shuffled) batches are used
+    """
+    cascades = mn.build_cascade_list(mn.simulate_cascades(n_cascades, graph),
+                                     collapse=True)
+    x_obs, s_obs = cascades[0], cascades[1]
+    data_set = fuel.datasets.base.IndexableDataset(collections.OrderedDict(
+        [('x', x_obs), ('s', s_obs)]
+    ))
+    if shuffle:
+        scheme = ShuffledBatchesScheme(len(x_obs), batch_size=batch_size)
+    else:
+        scheme = fuel.schemes.SequentialScheme(len(x_obs),
+                                               batch_size=batch_size)
+    return fuel.streams.DataStream(dataset=data_set, iteration_scheme=scheme)
+
+
+def create_learned_data_stream(graph, batch_size):
+    node_p = np.ones(len(graph)) / len(graph)
+    data_set = LearnedDataset(node_p, graph)
+    scheme = fuel.schemes.ConstantScheme(batch_size)
+    return fuel.streams.DataStream(dataset=data_set, iteration_scheme=scheme)
+
+
+if __name__ == "__main__":
+    batch_size = 1000
+    graph = mn.create_random_graph(n_nodes=1000)
+    print('GRAPH:\n', graph, '\n-------------\n')
+
+    x, s, params, cost = create_mle_model(len(graph))
+
+    alg = blocks.algorithms.GradientDescent(
+       cost=-cost, parameters=[params], step_rule=blocks.algorithms.AdaDelta()
+    )
+    data_stream = create_learned_data_stream(graph, batch_size)
+    #data_stream = create_fixed_data_stream(n_cascades, graph, batch_size,
+    #        shuffle=False)
+    loop = blocks.main_loop.MainLoop(
+        alg, data_stream,
+        extensions=[
+            blocks.extensions.FinishAfter(after_n_batches = 10**4),
+            blocks.extensions.monitoring.TrainingDataMonitoring([cost, params],
+                after_batch=True),
+            blocks.extensions.Printing(every_n_batches = 10),
+            #ActiveLearning(active_dataset)
+        ]
+    )
+    loop.run()
diff --git a/simulation/main.py b/simulation/main.py
index 1c0b3e8..a916034 100644
--- a/simulation/main.py
+++ b/simulation/main.py
@@ -12,6 +12,13 @@ from six.moves import range
 seaborn.set_style("white")
 
 
+def create_random_graph(n_nodes, p=.5):
+    graph = .5 * np.random.binomial(2, p=.5, size=(n_nodes, n_nodes))
+    for k in range(len(graph)):
+        graph[k, k] = 0
+    return np.log(1. / (1 - p * graph))
+
+
 def simulate_cascade(x, graph):
     """
     Simulate an IC cascade given a graph and initial state.
@@ -22,7 +29,6 @@ def simulate_cascade(x, graph):
     """
     yield x, np.zeros(graph.shape[0], dtype=bool)
     susc = np.ones(graph.shape[0], dtype=bool)
-    #yield x, susc
     while np.any(x):
         susc = susc ^ x  # nodes infected at previous step are now inactive
         if not np.any(susc):
@@ -39,6 +45,14 @@ def uniform_source(graph, *args, **kwargs):
     return x0
 
 
+def var_source(graph, t, node_p=None, *args, **kwargs):
+    if node_p is None:
+        node_p = np.ones(len(graph)) / len(graph)
+    x0 = np.zeros(graph.shape[0], dtype=bool)
+    x0[nr.choice(a=len(graph), p=node_p)] = True
+    return x0
+
+
 def simulate_cascades(n, graph, source=uniform_source):
     for t in range(n):
         x0 = source(graph, t)
@@ -58,9 +72,10 @@ def build_cascade_list(cascades, collapse=False):
 
 
 if __name__ == "__main__":
-    g = np.array([[0, 0, 1], [0, 0, 0.5], [0, 0, 0]])
-    p = 0.5
-    g = np.log(1. / (1 - p * g))
+    #g = np.array([[0, 0, 1], [0, 0, 0.5], [0, 0, 0]])
+    #p = 0.5
+    #g = np.log(1. / (1 - p * g))
+    g = create_random_graph(n_nodes=3)
     print(g)
     sizes = [10**3]
     for si in sizes:
diff --git a/simulation/vi_blocks.py b/simulation/vi_blocks.py
new file mode 100644
index 0000000..3f88611
--- /dev/null
+++ b/simulation/vi_blocks.py
@@ -0,0 +1,79 @@
+import main as mn
+import theano
+from theano import tensor as tsr
+import blocks
+import blocks.algorithms, blocks.main_loop, blocks.extensions.monitoring
+import theano.tensor.shared_randomstreams
+import numpy as np
+from six.moves import range
+import fuel
+import fuel.datasets
+import active_blocks as ab
+
+
+class ClippedParams(blocks.algorithms.StepRule):
+    """A rule to maintain parameters within a specified range"""
+    def __init__(self, min_value, max_value):
+        self.min_value = min_value
+        self.max_value = max_value
+
+    def compute_step(self, parameter, previous_step):
+        min_clipped = tsr.switch(parameter - previous_step < self.min_value,
+                                 self.min_value, previous_step)
+        return tsr.switch(parameter - previous_step > self.max_value,
+                          self.max_value, min_clipped)
+
+
+def create_vi_model(n_nodes, n_samp=100):
+    """return variational inference theano computation graph"""
+    def aux():
+        rand = .1 + .05 * np.random.normal(size=(n_nodes, n_nodes))
+        return rand.astype(theano.config.floatX)
+
+    x = tsr.matrix(name='x', dtype='int8')
+    s = tsr.matrix(name='s', dtype='int8')
+    mu = theano.shared(value=aux(), name='mu1')
+    sig = theano.shared(value=aux(), name='sig1')
+    mu0 = theano.shared(value=aux(), name='mu0')
+    sig0 = theano.shared(value=aux(), name='sig0')
+
+    srng = tsr.shared_randomstreams.RandomStreams(seed=123)
+    theta = srng.normal((n_samp, n_nodes, n_nodes)) * sig[None, :, :] + mu[None,
+            :, :]
+    y = tsr.maximum(tsr.dot(x, theta), 1e-3)
+    infect = tsr.log(1. - tsr.exp(-y[0:-1])).dimshuffle(1, 0, 2)
+    lkl_pos = tsr.sum(infect * (x[1:] & s[1:])) / n_samp
+    lkl_neg = tsr.sum(-y[0:-1].dimshuffle(1, 0, 2) * (~x[1:] & s[1:])) / n_samp
+    lkl = lkl_pos + lkl_neg
+    kl = tsr.sum(tsr.log(sig / sig0) + (sig0**2 + (mu0 - mu)**2)/(2*sig)**2)
+    cost = lkl + kl
+    cost.name = 'cost'
+
+    return x, s, mu, sig, cost
+
+
+if __name__ == "__main__":
+    n_cascades = 10000
+    batch_size = 1000
+    graph = mn.create_random_graph(n_nodes=3)
+    print('GRAPH:\n', graph, '\n-------------\n')
+
+    x, s, mu, sig, cost = create_vi_model(len(graph))
+
+    step_rules= blocks.algorithms.CompositeRule([blocks.algorithms.AdaDelta(),
+        ClippedParams(1e-3, 1 - 1e-3)])
+
+    alg = blocks.algorithms.GradientDescent(cost=-cost, parameters=[mu, sig],
+        step_rule=step_rules)
+    data_stream = ab.create_fixed_data_stream(n_cascades, graph, batch_size,
+            shuffle=False)
+    loop = blocks.main_loop.MainLoop(
+        alg, data_stream,
+        extensions=[
+            blocks.extensions.FinishAfter(after_n_batches = 10**4),
+            blocks.extensions.monitoring.TrainingDataMonitoring([cost, mu, sig],
+                after_batch=True),
+            blocks.extensions.Printing(every_n_batches = 10),
+        ]
+    )
+    loop.run()
diff --git a/simulation/vi_theano.py b/simulation/vi_theano.py
deleted file mode 100644
index cfd1dcf..0000000
--- a/simulation/vi_theano.py
+++ /dev/null
@@ -1,110 +0,0 @@
-import main as mn
-import theano
-from theano import tensor as tsr
-import theano.tensor.shared_randomstreams
-import numpy as np
-
-n_cascades = 1000
-n_nodes = 3
-n_samples = 100
-srng = tsr.shared_randomstreams.RandomStreams(seed=123)
-lr = 1e-1
-n_epochs = 1
-
-###############Variational Inference####################
-
-# Declare Theano variables
-mu = theano.shared(.5 + .2 * np.random.normal(size=(1, n_nodes, n_nodes)),
-                    name="mu", broadcastable=(True, False, False))
-sig = theano.shared(.1 + .04 * np.random.normal(size=(1, n_nodes, n_nodes)),
-                    name="sig", broadcastable=(True, False, False))
-mu0 = theano.shared(.5 + .2 * np.random.normal(size=(1, n_nodes, n_nodes)),
-                    name="mu", broadcastable=(True, False, False))
-sig0 = theano.shared(.1 + .04 * np.random.normal(size=(1, n_nodes, n_nodes)),
-                    name="sig", broadcastable=(True, False, False))
-x = tsr.matrix(name='x', dtype='int8')
-s = tsr.matrix(name='s', dtype='int8')
-
-# Construct Theano graph
-theta = srng.normal((n_samples, n_nodes, n_nodes)) * sig + mu
-y = tsr.maximum(tsr.dot(x, theta), 1e-3)
-infect = tsr.log(1. - tsr.exp(-y[0:-1])).dimshuffle(1, 0, 2)
-lkl_pos = tsr.sum(infect * (x[1:] & s[1:])) / n_samples
-lkl_neg = tsr.sum(-y[0:-1].dimshuffle(1, 0, 2) * (~x[1:] & s[1:])) / n_samples
-lkl = lkl_pos + lkl_neg
-kl = tsr.sum(tsr.log(sig / sig0) + (sig0**2 + (mu0 - mu)**2)/(2*sig)**2)
-res = lkl + kl
-
-gmu, gsig = theano.gradient.grad(lkl, [mu, sig])
-gmukl, gsigkl = theano.grad(kl, [mu, sig])
-
-# Compile into functions
-loglkl_full = theano.function([x, s], lkl)
-train = theano.function(inputs=[x, s], outputs=res,
-                        updates=((mu, tsr.clip(mu + lr * gmu, 0, 1)),
-                                 (sig, tsr.clip(sig + lr * gsig, 1e-3, 1))))
-train_kl = theano.function(inputs=[], outputs=[],
-                           updates=((mu, tsr.clip(mu + lr * gmukl, 0, 1)),
-                                   (sig, tsr.clip(sig + lr * gsigkl, 1e-3, 1))))
-
-
-###############Maximum Likelihood#####################
-
-x = tsr.matrix(name='x', dtype='int8')
-s = tsr.matrix(name='s', dtype='int8')
-params = theano.shared(.5 + .01*np.random.normal(size=(n_nodes, n_nodes)),
-                        name='params')
-y = tsr.maximum(tsr.dot(x, params), 1e-5)
-infect = tsr.log(1. - tsr.exp(-y[0:-1]))
-lkl_pos = tsr.sum(infect * (x[1:] & s[1:]))
-lkl_neg = tsr.sum(-y[0:-1] * (~x[1:] & s[1:]))
-lkl_mle = lkl_pos + lkl_neg
-gparams = theano.gradient.grad(lkl_mle, params)
-train_mle = theano.function(inputs=[x, s], outputs=lkl_mle, updates=[(params,
-                                        tsr.clip(params + lr * gparams, 0, 1))])
-
-
-if __name__ == "__main__":
-    graph = .5 * np.random.binomial(2, p=.5, size=(n_nodes, n_nodes))
-    for k in range(len(graph)):
-        graph[k, k] = 0
-    p = 0.5
-    graph = np.log(1. / (1 - p * graph))
-
-    graph = np.array([[0, 0, 1], [0, 0, 0.5], [0, 0, 0]])
-    p = 0.5
-    graph = np.log(1. / (1 - p * graph))
-
-    cascades = mn.build_cascade_list(mn.simulate_cascades(n_cascades, graph),
-                                     collapse=True)
-    x_obs, s_obs = cascades[0], cascades[1]
-
-    #mle
-    lkl_plot = []
-    if 1:
-        for i in range(n_epochs):
-            for k in xrange(len(x_obs)/100):
-                xt = x_obs[k*100:(k+1)*100]
-                st = s_obs[k*100:(k+1)*100]
-                lkl = train_mle(xt, st)
-                lkl_plot.append(lkl)
-                print(params.get_value())
-        print(graph)
-        w = params.get_value()
-        for k in range(len(w)):
-            w[k, k] = 0
-        print(w)
-        #import matplotlib.pyplot as plt
-        #plt.plot(lkl_plot)
-        #plt.show()
-
-    #variational inference
-    if 0:
-        for i in range(n_epochs):
-            train_kl()
-            for k in xrange(len(x_obs)/100):
-                cost = train(x_obs[k*100:(k+1)*100], s_obs[k*100:(k+1)*100])
-            print(cost)
-        print(graph)
-        print(mu.get_value())
-        print(sig.get_value())
author	jeanpouget-abadie <jean.pougetabadie@gmail.com>	2015-11-29 17:03:22 -0500
committer	jeanpouget-abadie <jean.pougetabadie@gmail.com>	2015-11-29 17:03:22 -0500
commit	7322c00eafcde38dadbf9d4f05a1572d627355bf (patch)
tree	45db3a6e4d71b0a17be2a87b49a225d0d430cb1b
parent	041aa021657a3c290952b222e3141449638bad19 (diff)
download	cascades-7322c00eafcde38dadbf9d4f05a1572d627355bf.tar.gz