import main as mn
import theano
from theano import tensor as tsr
import theano.tensor.shared_randomstreams
import numpy as np

n_cascades = 1000
n_nodes = 4
n_samples = 100
srng = tsr.shared_randomstreams.RandomStreams(seed=123)
lr = 1e-2
n_epochs = 10


# Declare Theano variables
mu = theano.shared(.5 * np.random.rand(1, n_nodes, n_nodes), name="mu",
                    broadcastable=(True, False, False))
sig = theano.shared(.3 * np.random.rand(1, n_nodes, n_nodes), name="sig",
                    broadcastable=(True, False, False))
mu0 = theano.shared(.5 * np.random.rand(1, n_nodes, n_nodes), name="mu",
                    broadcastable=(True, False, False))
sig0 = theano.shared(.3 * np.random.rand(1, n_nodes, n_nodes), name="sig",
                    broadcastable=(True, False, False))
x = tsr.matrix(name='x', dtype='int8')
s = tsr.matrix(name='s', dtype='int8')

# Construct Theano graph
theta = srng.normal((n_samples, n_nodes, n_nodes)) * sig + mu
y = tsr.clip(tsr.dot(x, theta), 1e-3, 1)
infect = tsr.log(1. - tsr.exp(-y[0:-1])).dimshuffle(1, 0, 2)
lkl_pos = tsr.sum(infect * (x[1:] & s[1:])) / n_samples
lkl_neg = tsr.sum(-y[0:-1].dimshuffle(1, 0, 2) * (~x[1:] & s[1:])) / n_samples

lkl = lkl_pos + lkl_neg
kl = tsr.sum(tsr.log(sig / sig0) + (sig0**2 + (mu0 - mu)**2)/(2*sig)**2)
res = lkl + kl

gmu, gsig = theano.gradient.grad(lkl, [mu, sig])
gmukl, gsigkl = theano.grad(kl, [mu, sig])

# Compile into functions
loglkl_full = theano.function([x, s], lkl)
train = theano.function(inputs=[x, s], outputs=res,
                        updates=((mu, tsr.clip(mu + lr * gmu, 0, 1)),
                                 (sig, tsr.clip(sig + lr * gsig, 1e-3, 1))))
train_kl = theano.function(inputs=[], outputs=[],
                           updates=((mu, tsr.clip(mu + lr * gmukl, 0, 1)),
                                   (sig, tsr.clip(sig + lr * gsigkl, 1e-3, 1))))


if __name__ == "__main__":
    graph = np.random.binomial(2, p=.2, size=(n_nodes, n_nodes))
    for k in range(len(graph)):
        graph[k, k] = 0
    p = 0.5
    graph = np.log(1. / (1 - p * graph))
    cascades = mn.build_cascade_list(mn.simulate_cascades(n_cascades, graph),
                                     collapse=True)
    x_obs, s_obs = cascades[0], cascades[1]
    for i in range(n_epochs):
        train_kl()
        for k in xrange(len(x_obs)/100):
            cost = train(x_obs[k*100:(k+1)*100], s_obs[k*100:(k+1)*100])
        print(cost)
    print(mu.get_value())
    print(graph)