aboutsummaryrefslogtreecommitdiffstats
path: root/simulation/mle.py
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2015-11-30 19:57:58 -0500
committerThibaut Horel <thibaut.horel@gmail.com>2015-11-30 19:57:58 -0500
commitf1762904c648b2089031ba6ce46ccaaac4f3514c (patch)
tree78b13559034985d8f2d16314a4fce340f2070aba /simulation/mle.py
parent52cf8293061a1e35b5b443ef6dc70aa51727cf00 (diff)
downloadcascades-f1762904c648b2089031ba6ce46ccaaac4f3514c.tar.gz
Big code cleanup
Diffstat (limited to 'simulation/mle.py')
-rw-r--r--simulation/mle.py72
1 files changed, 72 insertions, 0 deletions
diff --git a/simulation/mle.py b/simulation/mle.py
new file mode 100644
index 0000000..c6b2e85
--- /dev/null
+++ b/simulation/mle.py
@@ -0,0 +1,72 @@
+import numpy as np
+from scipy.optimize import minimize
+
+
+def likelihood(p, x, y):
+ a = np.dot(x, p)
+ return np.log(1. - np.exp(-a[y])).sum() - a[~y].sum()
+
+
+def likelihood_gradient(p, x, y):
+ a = np.dot(x, p)
+ l = np.log(1. - np.exp(-a[y])).sum() - a[~y].sum()
+ g1 = 1. / (np.exp(a[y]) - 1.)
+ g = (x[y] * g1[:, np.newaxis]).sum(0) - x[~y].sum(0)
+ return l, g
+
+
+def test_gradient(x, y):
+ eps = 1e-10
+ for i in xrange(x.shape[1]):
+ p = 0.5 * np.ones(x.shape[1])
+ a = np.dot(x, p)
+ g1 = 1. / (np.exp(a[y]) - 1.)
+ g = (x[y] * g1[:, np.newaxis]).sum(0) - x[~y].sum(0)
+ p[i] += eps
+ f1 = likelihood(p, x, y)
+ p[i] -= 2 * eps
+ f2 = likelihood(p, x, y)
+ print(g[i], (f1 - f2) / (2 * eps))
+
+
+def infer(x, y):
+ def f(p):
+ l, g = likelihood_gradient(p, x, y)
+ return -l, -g
+ x0 = np.ones(x.shape[1])
+ bounds = [(1e-10, None)] * x.shape[1]
+ return minimize(f, x0, jac=True, bounds=bounds, method="L-BFGS-B").x
+
+
+def bootstrap(x, y, n_iter=100):
+ rval = np.zeros((n_iter, x.shape[1]))
+ for i in xrange(n_iter):
+ indices = np.random.choice(len(y), replace=False, size=int(len(y)*.9))
+ rval[i] = infer(x[indices], y[indices])
+ return rval
+
+
+def confidence_interval(counts, bins):
+ k = 0
+ while np.sum(counts[len(counts)/2-k:len(counts)/2+k]) <= .95*np.sum(counts):
+ k += 1
+ return bins[len(bins)/2-k], bins[len(bins)/2+k]
+
+
+def build_matrix(cascades, node):
+
+ def aux(cascade, node):
+ xlist, slist = zip(*cascade)
+ indices = [i for i, s in enumerate(slist) if s[node] and i >= 1]
+ if indices:
+ x = np.vstack(xlist[i-1] for i in indices)
+ y = np.array([xlist[i][node] for i in indices])
+ return x, y
+ else:
+ return None
+
+ pairs = (aux(cascade, node) for cascade in cascades)
+ xs, ys = zip(*(pair for pair in pairs if pair))
+ x = np.vstack(xs)
+ y = np.concatenate(ys)
+ return x, y