Graph inference, first working version

author: Thibaut Horel <thibaut.horel@gmail.com> 2015-10-22 00:14:48 -0400
committer: Thibaut Horel <thibaut.horel@gmail.com> 2015-10-22 00:14:48 -0400
commit: 19ecb7730796f52c341745f85e02af1877ca85ff (patch)
tree: 973f1d300277d74d6142c24948dc04760f3397af /simulation
parent: fd4b442d8e429c97f2d72929f5c211f5f5e4d2b3 (diff)
download: cascades-19ecb7730796f52c341745f85e02af1877ca85ff.tar.gz
1 files changed, 70 insertions, 17 deletions
diff --git a/simulation/main.py b/simulation/main.py
index b7b3eaa..8ef4fd1 100644
--- a/simulation/main.py
+++ b/simulation/main.py
@@ -1,23 +1,59 @@
 import numpy as np
 import numpy.random as nr
+from scipy.optimize import minimize
+import matplotlib.pyplot as plt
+import seaborn
 
+seaborn.set_style("white")
 
-def likelihood(p, cascades, node):
-    x, y = build_matrix(cascades, node)
-    a = 1 - np.exp(-np.dot(x, p))
-    return np.inner(y, np.log(a)) + np.inner((1 - y), np.log(1-a))
+
+def likelihood(p, x, y):
+    a = np.dot(x, p)
+    return np.log(1. - np.exp(-a[y])).sum() - a[~y].sum()
+
+
+def likelihood_gradient(p, x, y):
+    a = np.dot(x, p)
+    l = np.log(1. - np.exp(-a[y])).sum() - a[~y].sum()
+    g1 = 1. / (np.exp(a[y]) - 1.)
+    g = (x[y] * g1[:, np.newaxis]).sum(0) - x[~y].sum(0)
+    return l, g
+
+
+def test_gradient(x, y):
+    eps = 1e-10
+    for i in xrange(x.shape[1]):
+        p = 0.5 * np.ones(x.shape[1])
+        a = np.dot(x, p)
+        g1 = 1. / (np.exp(a[y]) - 1.)
+        g = (x[y] * g1[:, np.newaxis]).sum(0) - x[~y].sum(0)
+        p[i] += eps
+        f1 = likelihood(p, x, y)
+        p[i] -= 2 * eps
+        f2 = likelihood(p, x, y)
+        print g[i], (f1 - f2) / (2 * eps)
+
+
+def infer(x, y):
+    def f(p):
+        l, g = likelihood_gradient(p, x, y)
+        return -l, -g
+    x0 = np.ones(x.shape[1])
+    bounds = [(0, None)] * x.shape[1]
+    return minimize(f, x0, jac=True, bounds=bounds, method="L-BFGS-B").x
 
 
 def build_matrix(cascades, node):
 
     def aux(cascade, node):
-        try:
-            m = np.vstack(x for x, s in cascade if s[node])
-        except ValueError:
+        xlist, slist = zip(*cascade)
+        indices = [i for i, s in enumerate(slist) if s[node] and i >= 1]
+        if indices:
+            x = np.vstack(xlist[i-1] for i in indices)
+            y = np.array([xlist[i][node] for i in indices])
+            return x, y
+        else:
             return None
-        x = m[:-1, :]
-        y = m[1:, node]
-        return x, y
 
     pairs = (aux(cascade, node) for cascade in cascades)
     xs, ys = zip(*(pair for pair in pairs if pair))
@@ -27,26 +63,43 @@ def build_matrix(cascades, node):
 
 
 def simulate_cascade(x, graph):
-    susc = x ^ np.ones(graph.shape[0]).astype(bool)
+    """
+    Simulate an IC cascade given a graph and initial state.
+
+    For each time step we yield:
+        - susc: the nodes susceptible at the beginning of this time step
+        - x: the subset of susc who became infected
+    """
+    susc = np.ones(graph.shape[0], dtype=bool)  # t=0, everyone is susceptible
     yield x, susc
     while np.any(x):
+        susc = susc ^ x  # nodes infected at previous step are now inactive
+        if not np.any(susc):
+            break
         x = 1 - np.exp(-np.dot(graph.T, x))
         y = nr.random(x.shape[0])
         x = (x >= y) & susc
         yield x, susc
-        susc ^= x
 
 
 def simulate_cascades(n, graph):
     for _ in xrange(n):
-        x = np.zeros(g.shape[0], dtype=bool)
-        x[nr.randint(0, g.shape[0])] = True
-        yield simulate_cascade(x, graph)
+        x0 = np.zeros(g.shape[0], dtype=bool)
+        x0[nr.randint(0, g.shape[0])] = True
+        yield simulate_cascade(x0, graph)
 
 
 if __name__ == "__main__":
     g = np.array([[0, 1, 1, 0], [1, 0, 0, 1], [1, 0, 0, 1], [0, 1, 1, 0]])
     p = 0.5
     g = np.log(1. / (1 - p * g))
-    cascades = simulate_cascades(10, g)
-    print likelihood(p * np.ones(g.shape[0]), cascades, 3)
+    sizes = [100, 500, 1000, 5000, 10000,  50000, 100000, 1000000]
+    error = []
+    for i in sizes:
+        cascades = simulate_cascades(i, g)
+        x, y = build_matrix(cascades, 0)
+        r = infer(x, y)
+        r[0] = 0.
+        error.append(np.linalg.norm(r - g[0]))
+    plt.plot(sizes, error)
+    plt.show()
author	Thibaut Horel <thibaut.horel@gmail.com>	2015-10-22 00:14:48 -0400
committer	Thibaut Horel <thibaut.horel@gmail.com>	2015-10-22 00:14:48 -0400
commit	19ecb7730796f52c341745f85e02af1877ca85ff (patch)
tree	973f1d300277d74d6142c24948dc04760f3397af /simulation
parent	fd4b442d8e429c97f2d72929f5c211f5f5e4d2b3 (diff)
download	cascades-19ecb7730796f52c341745f85e02af1877ca85ff.tar.gz