From eab2cc05424475a1b14bf950decde1bae8d8cc9a Mon Sep 17 00:00:00 2001 From: jeanpouget-abadie Date: Wed, 11 Nov 2015 13:39:53 -0500 Subject: moving node-specific methods to new file, adding cascade-level log-likelihood, committing active learning ipython notebook --- simulation/main.py | 109 ++++++++++++----------------------------------------- 1 file changed, 25 insertions(+), 84 deletions(-) (limited to 'simulation/main.py') diff --git a/simulation/main.py b/simulation/main.py index 3e3de4b..3e458a7 100644 --- a/simulation/main.py +++ b/simulation/main.py @@ -1,3 +1,5 @@ +import mleNode as mn + import numpy as np from numpy.linalg import norm import numpy.random as nr @@ -9,85 +11,6 @@ from random import random, randint seaborn.set_style("white") -def likelihood(p, x, y): - a = np.dot(x, p) - return np.log(1. - np.exp(-a[y])).sum() - a[~y].sum() - - -def likelihood_gradient(p, x, y): - a = np.dot(x, p) - l = np.log(1. - np.exp(-a[y])).sum() - a[~y].sum() - g1 = 1. / (np.exp(a[y]) - 1.) - g = (x[y] * g1[:, np.newaxis]).sum(0) - x[~y].sum(0) - return l, g - - -def test_gradient(x, y): - eps = 1e-10 - for i in xrange(x.shape[1]): - p = 0.5 * np.ones(x.shape[1]) - a = np.dot(x, p) - g1 = 1. / (np.exp(a[y]) - 1.) - g = (x[y] * g1[:, np.newaxis]).sum(0) - x[~y].sum(0) - p[i] += eps - f1 = likelihood(p, x, y) - p[i] -= 2 * eps - f2 = likelihood(p, x, y) - print g[i], (f1 - f2) / (2 * eps) - - -def infer(x, y): - def f(p): - l, g = likelihood_gradient(p, x, y) - return -l, -g - x0 = np.ones(x.shape[1]) - bounds = [(1e-10, None)] * x.shape[1] - return minimize(f, x0, jac=True, bounds=bounds, method="L-BFGS-B").x - - -def bootstrap(x, y, n_iter=100): - rval = np.zeros((n_iter, x.shape[1])) - for i in xrange(n_iter): - indices = np.random.choice(len(y), replace=False, size=int(len(y)*.9)) - rval[i] = infer(x[indices], y[indices]) - return rval - - -def confidence_interval(counts, bins): - k = 0 - while np.sum(counts[len(counts)/2-k:len(counts)/2+k]) <= .95*np.sum(counts): - k += 1 - return bins[len(bins)/2-k], bins[len(bins)/2+k] - - -def build_matrix(cascades, node): - - def aux(cascade, node): - xlist, slist = zip(*cascade) - indices = [i for i, s in enumerate(slist) if s[node] and i >= 1] - if indices: - x = np.vstack(xlist[i-1] for i in indices) - y = np.array([xlist[i][node] for i in indices]) - return x, y - else: - return None - - pairs = (aux(cascade, node) for cascade in cascades) - xs, ys = zip(*(pair for pair in pairs if pair)) - x = np.vstack(xs) - y = np.concatenate(ys) - return x, y - - -def build_cascade_list(cascades): - x, s = [], [] - for cascade in cascades: - xlist, slist = zip(*cascade) - x.append(xlist) - s.append(slist) - return x, s - - def simulate_cascade(x, graph): """ Simulate an IC cascade given a graph and initial state. @@ -120,6 +43,24 @@ def simulate_cascades(n, graph, source=uniform_source): yield simulate_cascade(x0, graph) +def build_cascade_list(cascades, collapse=False): + x, s = [], [] + for cascade in cascades: + xlist, slist = zip(*cascade) + x.append(np.vstack(xlist)) + s.append(np.vstack(slist)) + if not collapse: + return x, s + else: + return np.vstack(x), np.vstack(s) + + +def cascadeLkl(graph, infect, sus): + # There is a problem with the current implementation + a = np.dot(infect, graph) + return np.log(1. - np.exp(-a[(infect)*sus])).sum() - a[(~infect)*sus].sum() + + if __name__ == "__main__": # g = np.array([[0, 1, 1, 0], [1, 0, 0, 1], [1, 0, 0, 1], [0, 1, 1, 0]]) g = np.array([[0, 0, 1], [0, 0, 0.5], [0, 0, 0]]) @@ -145,17 +86,17 @@ if __name__ == "__main__": source=lambda graph: source(graph, t)) e = np.zeros(g.shape[0]) for j, s in enumerate(sizes): - x, y = build_matrix(cascades, 2) - e += infer(x[:s], y[:s]) + x, y = mn.build_matrix(cascades, 2) + e += mn.infer(x[:s], y[:s]) for i, t in enumerate(thresh): - plt.plot(sizes, m[:, i], label=str(t)) + plt.plot(sizes, e[:, i], label=str(t)) plt.legend() plt.show() - # conf = bootstrap(x, y, n_iter=100) + # conf = mn.bootstrap(x, y, n_iter=100) # estimand = np.linalg.norm(np.delete(conf - g[0], 0, axis=1), axis=1) - # error.append(confidence_interval(*np.histogram(estimand, bins=50))) + # error.append(mn.confidence_interval(*np.histogram(estimand, bins=50))) # plt.semilogx(sizes, error) # plt.show() -- cgit v1.2.3-70-g09d2