variational inference sgd function

author: jeanpouget-abadie <jean.pougetabadie@gmail.com> 2015-11-15 20:16:25 -0500
committer: jeanpouget-abadie <jean.pougetabadie@gmail.com> 2015-11-15 20:16:25 -0500
commit: ec129983fbac109000d436ce526763206b81aa76 (patch)
tree: ecc83ddf6752e8f4c20cb843ff3173524bf8ec79
parent: 730b35ee58cd616ea626bde7171144c326d7cfd8 (diff)
download: cascades-ec129983fbac109000d436ce526763206b81aa76.tar.gz
2 files changed, 54 insertions, 42 deletions
diff --git a/simulation/main.py b/simulation/main.py
index 402aa8d..4fa8f6c 100644
--- a/simulation/main.py
+++ b/simulation/main.py
@@ -19,7 +19,8 @@ def simulate_cascade(x, graph):
         - susc: the nodes susceptible at the beginning of this time step
         - x: the subset of susc who became infected
     """
-    susc = np.ones(graph.shape[0], dtype=bool)  # t=0, everyone is susceptible
+    susc = np.ones(graph.shape[0], dtype=bool)
+    susc = susc ^ x  # t=0, the source is not susceptible
     yield x, susc
     while np.any(x):
         susc = susc ^ x  # nodes infected at previous step are now inactive
@@ -66,41 +67,14 @@ def cascadeLkl(graph, infect, sus):
 
 
 if __name__ == "__main__":
-    # g = np.array([[0, 1, 1, 0], [1, 0, 0, 1], [1, 0, 0, 1], [0, 1, 1, 0]])
     g = np.array([[0, 0, 1], [0, 0, 0.5], [0, 0, 0]])
     p = 0.5
     g = np.log(1. / (1 - p * g))
-    # error = []
+    cascades = simulate_cascades(100, g)
+    cascade, y_obs = mn.build_matrix(cascades, 0)
+    conf = mn.bootstrap(x, y, n_iter=100)
 
-    def source(graph, t):
-        x0 = np.zeros(graph.shape[0], dtype=bool)
-        a = randint(0, 1)
-        x0[a] = True
-        if random() > t:
-            x0[1-a] = True
-        return x0
-
-    thresh = np.arange(0., 1.1, step=0.2)
-    sizes = np.arange(10, 100, step=10)
-    nsimul = 10
-    r = np.zeros(len(sizes), len(thresh))
-    for t in thresh:
-        for i in nsimul:
-            cascades = simulate_cascades(np.max(sizes), g,
-                                         source=lambda graph: source(graph, t))
-            e = np.zeros(g.shape[0])
-            for j, s in enumerate(sizes):
-                x, y = mn.build_matrix(cascades, 2)
-                e += mn.infer(x[:s], y[:s])
-
-    for i, t in enumerate(thresh):
-        plt.plot(sizes, e[:, i], label=str(t))
-    plt.legend()
+    estimand = np.linalg.norm(np.delete(conf - g[0], 0, axis=1), axis=1)
+    error.append(mn.confidence_interval(*np.histogram(estimand, bins=50)))
+    plt.semilogx(sizes, error)
     plt.show()
-
-
-        # conf = mn.bootstrap(x, y, n_iter=100)
-        # estimand = np.linalg.norm(np.delete(conf - g[0], 0, axis=1), axis=1)
-        # error.append(mn.confidence_interval(*np.histogram(estimand, bins=50)))
-    # plt.semilogx(sizes, error)
-    # plt.show()
diff --git a/simulation/vi.py b/simulation/vi.py
index 9f2ed40..3ba6b96 100644
--- a/simulation/vi.py
+++ b/simulation/vi.py
@@ -1,3 +1,5 @@
+import time
+import main as mn
 import autograd.numpy as np
 from autograd import grad
 
@@ -10,20 +12,56 @@ def h(m):
     return -m
 
 
-def ll(cascades, theta):
+def ll(x, s, theta):
+    """
+    x : infected
+    s : susceptible
+    """
     res = 0
-    for x, s in cascades:
-        for t in range(1, x.shape[1] + 1):
-            w = np.dot(x[t-1], theta)
-            res += g(w)[x[t]].sum() + h(w)[~x[t] & s[t]].sum()
+    for t in range(1, x.shape[1] + 1):
+        w = np.dot(x[t-1], theta)
+        res += g(w)[x[t]].sum() + h(w)[~x[t] & s[t]].sum()
 
 
 def sample(params):
     return np.random.normal(*params)
 
 
-def ll_full(params, cascades):
-    nsamples = 50
-    return np.mean([ll(cascades, sample(params)) for _ in xrange(nsamples)])
+def ll_full(params, x, s, nsamples=50):
+    return np.mean([ll(x, s, sample(params)) for _ in xrange(nsamples)])
+
 
 grad_ll_full = grad(ll_full)
+
+
+def kl(params1, params0):
+    mu0, sig0 = params0
+    mu1, sig1 = params1
+    return np.sum(np.log(sig1/sig0) + (sig0**2 + (mu0 - mu1)**2)/(2*sig1)**2)
+
+
+grad_kl = grad(kl)
+
+
+def sgd(mu1, sig1, mu0, sig0, cascades, n_e, lr=lambda t: 1e-3):
+    g_mu1, g_sig1 = grad_kl((mu1, sig1), (mu0, sig0))
+    for t in xrange(n_e):
+        lrt = lr(t)  # learning rate
+        mu1, sig1 = mu1 + lrt * g_mu1, sig1 + lrt * g_sig1
+        for x, s in cascades:
+            g_mu1, g_sig1 = grad_ll_full((mu1, sig1), x, s)
+            mu1, sig1 = mu1 + lrt * g_mu1, sig1 + lrt * g_sig1
+        res = np.sum(ll_full((mu1, sig1), x, s) for x, s in cascades) +\
+                kl((mu1, sig1), (mu0, sig0))
+        print("Epoch: {}\t LB: {}\t Time: {}".format(t, res, time.time()))
+
+
+if __name__=='__main__':
+    g = np.array([[0, 0, 1], [0, 0, 0.5], [0, 0, 0]])
+    p = 0.5
+    g = np.log(1. / (1 - p * g))
+    cascades = mn.build_cascade_list(mn.simulate_cascades(100, g))
+    params0 = (1. + .2 * np.random.normal(len(g)), \
+                1 + .2 * np.random.normal(len(g)))
+    params1 = (1. + .2 * np.random.normal(len(g)), \
+                1 + .2 * np.random.normal(len(g)))
author	jeanpouget-abadie <jean.pougetabadie@gmail.com>	2015-11-15 20:16:25 -0500
committer	jeanpouget-abadie <jean.pougetabadie@gmail.com>	2015-11-15 20:16:25 -0500
commit	ec129983fbac109000d436ce526763206b81aa76 (patch)
tree	ecc83ddf6752e8f4c20cb843ff3173524bf8ec79
parent	730b35ee58cd616ea626bde7171144c326d7cfd8 (diff)
download	cascades-ec129983fbac109000d436ce526763206b81aa76.tar.gz