diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2015-11-30 19:57:58 -0500 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2015-11-30 19:57:58 -0500 |
| commit | f1762904c648b2089031ba6ce46ccaaac4f3514c (patch) | |
| tree | 78b13559034985d8f2d16314a4fce340f2070aba /simulation/mle.py | |
| parent | 52cf8293061a1e35b5b443ef6dc70aa51727cf00 (diff) | |
| download | cascades-f1762904c648b2089031ba6ce46ccaaac4f3514c.tar.gz | |
Big code cleanup
Diffstat (limited to 'simulation/mle.py')
| -rw-r--r-- | simulation/mle.py | 72 |
1 files changed, 72 insertions, 0 deletions
diff --git a/simulation/mle.py b/simulation/mle.py new file mode 100644 index 0000000..c6b2e85 --- /dev/null +++ b/simulation/mle.py @@ -0,0 +1,72 @@ +import numpy as np +from scipy.optimize import minimize + + +def likelihood(p, x, y): + a = np.dot(x, p) + return np.log(1. - np.exp(-a[y])).sum() - a[~y].sum() + + +def likelihood_gradient(p, x, y): + a = np.dot(x, p) + l = np.log(1. - np.exp(-a[y])).sum() - a[~y].sum() + g1 = 1. / (np.exp(a[y]) - 1.) + g = (x[y] * g1[:, np.newaxis]).sum(0) - x[~y].sum(0) + return l, g + + +def test_gradient(x, y): + eps = 1e-10 + for i in xrange(x.shape[1]): + p = 0.5 * np.ones(x.shape[1]) + a = np.dot(x, p) + g1 = 1. / (np.exp(a[y]) - 1.) + g = (x[y] * g1[:, np.newaxis]).sum(0) - x[~y].sum(0) + p[i] += eps + f1 = likelihood(p, x, y) + p[i] -= 2 * eps + f2 = likelihood(p, x, y) + print(g[i], (f1 - f2) / (2 * eps)) + + +def infer(x, y): + def f(p): + l, g = likelihood_gradient(p, x, y) + return -l, -g + x0 = np.ones(x.shape[1]) + bounds = [(1e-10, None)] * x.shape[1] + return minimize(f, x0, jac=True, bounds=bounds, method="L-BFGS-B").x + + +def bootstrap(x, y, n_iter=100): + rval = np.zeros((n_iter, x.shape[1])) + for i in xrange(n_iter): + indices = np.random.choice(len(y), replace=False, size=int(len(y)*.9)) + rval[i] = infer(x[indices], y[indices]) + return rval + + +def confidence_interval(counts, bins): + k = 0 + while np.sum(counts[len(counts)/2-k:len(counts)/2+k]) <= .95*np.sum(counts): + k += 1 + return bins[len(bins)/2-k], bins[len(bins)/2+k] + + +def build_matrix(cascades, node): + + def aux(cascade, node): + xlist, slist = zip(*cascade) + indices = [i for i, s in enumerate(slist) if s[node] and i >= 1] + if indices: + x = np.vstack(xlist[i-1] for i in indices) + y = np.array([xlist[i][node] for i in indices]) + return x, y + else: + return None + + pairs = (aux(cascade, node) for cascade in cascades) + xs, ys = zip(*(pair for pair in pairs if pair)) + x = np.vstack(xs) + y = np.concatenate(ys) + return x, y |
