moving node-specific methods to new file, adding cascade-level log-likelihood, committing active learning ipython notebook

author: jeanpouget-abadie <jean.pougetabadie@gmail.com> 2015-11-11 13:39:53 -0500
committer: jeanpouget-abadie <jean.pougetabadie@gmail.com> 2015-11-11 13:39:53 -0500
commit: eab2cc05424475a1b14bf950decde1bae8d8cc9a (patch)
tree: d93ff5bd5d2aacc7f91f78816a898143e0933a15 /simulation/main.py
parent: 43b07f68505ee10f2fe9fa94f365b27fb72f953c (diff)
download: cascades-eab2cc05424475a1b14bf950decde1bae8d8cc9a.tar.gz
1 files changed, 25 insertions, 84 deletions
diff --git a/simulation/main.py b/simulation/main.py
index 3e3de4b..3e458a7 100644
--- a/simulation/main.py
+++ b/simulation/main.py
@@ -1,3 +1,5 @@
+import mleNode as mn
+
 import numpy as np
 from numpy.linalg import norm
 import numpy.random as nr
@@ -9,85 +11,6 @@ from random import random, randint
 seaborn.set_style("white")
 
 
-def likelihood(p, x, y):
-    a = np.dot(x, p)
-    return np.log(1. - np.exp(-a[y])).sum() - a[~y].sum()
-
-
-def likelihood_gradient(p, x, y):
-    a = np.dot(x, p)
-    l = np.log(1. - np.exp(-a[y])).sum() - a[~y].sum()
-    g1 = 1. / (np.exp(a[y]) - 1.)
-    g = (x[y] * g1[:, np.newaxis]).sum(0) - x[~y].sum(0)
-    return l, g
-
-
-def test_gradient(x, y):
-    eps = 1e-10
-    for i in xrange(x.shape[1]):
-        p = 0.5 * np.ones(x.shape[1])
-        a = np.dot(x, p)
-        g1 = 1. / (np.exp(a[y]) - 1.)
-        g = (x[y] * g1[:, np.newaxis]).sum(0) - x[~y].sum(0)
-        p[i] += eps
-        f1 = likelihood(p, x, y)
-        p[i] -= 2 * eps
-        f2 = likelihood(p, x, y)
-        print g[i], (f1 - f2) / (2 * eps)
-
-
-def infer(x, y):
-    def f(p):
-        l, g = likelihood_gradient(p, x, y)
-        return -l, -g
-    x0 = np.ones(x.shape[1])
-    bounds = [(1e-10, None)] * x.shape[1]
-    return minimize(f, x0, jac=True, bounds=bounds, method="L-BFGS-B").x
-
-
-def bootstrap(x, y, n_iter=100):
-    rval = np.zeros((n_iter, x.shape[1]))
-    for i in xrange(n_iter):
-        indices = np.random.choice(len(y), replace=False, size=int(len(y)*.9))
-        rval[i] = infer(x[indices], y[indices])
-    return rval
-
-
-def confidence_interval(counts, bins):
-    k = 0
-    while np.sum(counts[len(counts)/2-k:len(counts)/2+k]) <= .95*np.sum(counts):
-        k += 1
-    return bins[len(bins)/2-k], bins[len(bins)/2+k]
-
-
-def build_matrix(cascades, node):
-
-    def aux(cascade, node):
-        xlist, slist = zip(*cascade)
-        indices = [i for i, s in enumerate(slist) if s[node] and i >= 1]
-        if indices:
-            x = np.vstack(xlist[i-1] for i in indices)
-            y = np.array([xlist[i][node] for i in indices])
-            return x, y
-        else:
-            return None
-
-    pairs = (aux(cascade, node) for cascade in cascades)
-    xs, ys = zip(*(pair for pair in pairs if pair))
-    x = np.vstack(xs)
-    y = np.concatenate(ys)
-    return x, y
-
-
-def build_cascade_list(cascades):
-    x, s = [], []
-    for cascade in cascades:
-        xlist, slist = zip(*cascade)
-        x.append(xlist)
-        s.append(slist)
-    return x, s
-
-
 def simulate_cascade(x, graph):
     """
     Simulate an IC cascade given a graph and initial state.
@@ -120,6 +43,24 @@ def simulate_cascades(n, graph, source=uniform_source):
         yield simulate_cascade(x0, graph)
 
 
+def build_cascade_list(cascades, collapse=False):
+    x, s = [], []
+    for cascade in cascades:
+        xlist, slist = zip(*cascade)
+        x.append(np.vstack(xlist))
+        s.append(np.vstack(slist))
+    if not collapse:
+        return x, s
+    else:
+        return np.vstack(x), np.vstack(s)
+
+
+def cascadeLkl(graph, infect, sus):
+    # There is a problem with the current implementation
+    a = np.dot(infect, graph)
+    return np.log(1. - np.exp(-a[(infect)*sus])).sum() - a[(~infect)*sus].sum()
+
+
 if __name__ == "__main__":
     # g = np.array([[0, 1, 1, 0], [1, 0, 0, 1], [1, 0, 0, 1], [0, 1, 1, 0]])
     g = np.array([[0, 0, 1], [0, 0, 0.5], [0, 0, 0]])
@@ -145,17 +86,17 @@ if __name__ == "__main__":
                                          source=lambda graph: source(graph, t))
             e = np.zeros(g.shape[0])
             for j, s in enumerate(sizes):
-                x, y = build_matrix(cascades, 2)
-                e += infer(x[:s], y[:s])
+                x, y = mn.build_matrix(cascades, 2)
+                e += mn.infer(x[:s], y[:s])
 
     for i, t in enumerate(thresh):
-        plt.plot(sizes, m[:, i], label=str(t))
+        plt.plot(sizes, e[:, i], label=str(t))
     plt.legend()
     plt.show()
 
 
-        # conf = bootstrap(x, y, n_iter=100)
+        # conf = mn.bootstrap(x, y, n_iter=100)
         # estimand = np.linalg.norm(np.delete(conf - g[0], 0, axis=1), axis=1)
-        # error.append(confidence_interval(*np.histogram(estimand, bins=50)))
+        # error.append(mn.confidence_interval(*np.histogram(estimand, bins=50)))
     # plt.semilogx(sizes, error)
     # plt.show()
author	jeanpouget-abadie <jean.pougetabadie@gmail.com>	2015-11-11 13:39:53 -0500
committer	jeanpouget-abadie <jean.pougetabadie@gmail.com>	2015-11-11 13:39:53 -0500
commit	eab2cc05424475a1b14bf950decde1bae8d8cc9a (patch)
tree	d93ff5bd5d2aacc7f91f78816a898143e0933a15 /simulation/main.py
parent	43b07f68505ee10f2fe9fa94f365b27fb72f953c (diff)
download	cascades-eab2cc05424475a1b14bf950decde1bae8d8cc9a.tar.gz