Big code cleanup

author: Thibaut Horel <thibaut.horel@gmail.com> 2015-11-30 19:57:58 -0500
committer: Thibaut Horel <thibaut.horel@gmail.com> 2015-11-30 19:57:58 -0500
commit: f1762904c648b2089031ba6ce46ccaaac4f3514c (patch)
tree: 78b13559034985d8f2d16314a4fce340f2070aba /simulation/mle.py
parent: 52cf8293061a1e35b5b443ef6dc70aa51727cf00 (diff)
download: cascades-f1762904c648b2089031ba6ce46ccaaac4f3514c.tar.gz
1 files changed, 72 insertions, 0 deletions
diff --git a/simulation/mle.py b/simulation/mle.py
new file mode 100644
index 0000000..c6b2e85
--- /dev/null
+++ b/simulation/mle.py
@@ -0,0 +1,72 @@
+import numpy as np
+from scipy.optimize import minimize
+
+
+def likelihood(p, x, y):
+    a = np.dot(x, p)
+    return np.log(1. - np.exp(-a[y])).sum() - a[~y].sum()
+
+
+def likelihood_gradient(p, x, y):
+    a = np.dot(x, p)
+    l = np.log(1. - np.exp(-a[y])).sum() - a[~y].sum()
+    g1 = 1. / (np.exp(a[y]) - 1.)
+    g = (x[y] * g1[:, np.newaxis]).sum(0) - x[~y].sum(0)
+    return l, g
+
+
+def test_gradient(x, y):
+    eps = 1e-10
+    for i in xrange(x.shape[1]):
+        p = 0.5 * np.ones(x.shape[1])
+        a = np.dot(x, p)
+        g1 = 1. / (np.exp(a[y]) - 1.)
+        g = (x[y] * g1[:, np.newaxis]).sum(0) - x[~y].sum(0)
+        p[i] += eps
+        f1 = likelihood(p, x, y)
+        p[i] -= 2 * eps
+        f2 = likelihood(p, x, y)
+        print(g[i], (f1 - f2) / (2 * eps))
+
+
+def infer(x, y):
+    def f(p):
+        l, g = likelihood_gradient(p, x, y)
+        return -l, -g
+    x0 = np.ones(x.shape[1])
+    bounds = [(1e-10, None)] * x.shape[1]
+    return minimize(f, x0, jac=True, bounds=bounds, method="L-BFGS-B").x
+
+
+def bootstrap(x, y, n_iter=100):
+    rval = np.zeros((n_iter, x.shape[1]))
+    for i in xrange(n_iter):
+        indices = np.random.choice(len(y), replace=False, size=int(len(y)*.9))
+        rval[i] = infer(x[indices], y[indices])
+    return rval
+
+
+def confidence_interval(counts, bins):
+    k = 0
+    while np.sum(counts[len(counts)/2-k:len(counts)/2+k]) <= .95*np.sum(counts):
+        k += 1
+    return bins[len(bins)/2-k], bins[len(bins)/2+k]
+
+
+def build_matrix(cascades, node):
+
+    def aux(cascade, node):
+        xlist, slist = zip(*cascade)
+        indices = [i for i, s in enumerate(slist) if s[node] and i >= 1]
+        if indices:
+            x = np.vstack(xlist[i-1] for i in indices)
+            y = np.array([xlist[i][node] for i in indices])
+            return x, y
+        else:
+            return None
+
+    pairs = (aux(cascade, node) for cascade in cascades)
+    xs, ys = zip(*(pair for pair in pairs if pair))
+    x = np.vstack(xs)
+    y = np.concatenate(ys)
+    return x, y
author	Thibaut Horel <thibaut.horel@gmail.com>	2015-11-30 19:57:58 -0500
committer	Thibaut Horel <thibaut.horel@gmail.com>	2015-11-30 19:57:58 -0500
commit	f1762904c648b2089031ba6ce46ccaaac4f3514c (patch)
tree	78b13559034985d8f2d16314a4fce340f2070aba /simulation/mle.py
parent	52cf8293061a1e35b5b443ef6dc70aa51727cf00 (diff)
download	cascades-f1762904c648b2089031ba6ce46ccaaac4f3514c.tar.gz