diff options
| -rw-r--r-- | simulation/main.py | 29 |
1 files changed, 22 insertions, 7 deletions
diff --git a/simulation/main.py b/simulation/main.py index 8ef4fd1..7aa107f 100644 --- a/simulation/main.py +++ b/simulation/main.py @@ -43,6 +43,21 @@ def infer(x, y): return minimize(f, x0, jac=True, bounds=bounds, method="L-BFGS-B").x +def bootstrap(x, y, n_iter=100): + rval = np.zeros((n_iter, x.shape[1])) + for i in xrange(n_iter): + indices = np.random.choice(len(y), replace=False, size=int(len(y)*.9)) + rval[i] = infer(x[indices], y[indices]) + return rval + + +def confidence_interval(counts, bins): + k = 0 + while np.sum(counts[len(counts)/2-k:len(counts)/2+k]) <= .95*np.sum(counts): + k += 1 + return bins[len(bins)/2-k], bins[len(bins)/2+k] + + def build_matrix(cascades, node): def aux(cascade, node): @@ -84,8 +99,8 @@ def simulate_cascade(x, graph): def simulate_cascades(n, graph): for _ in xrange(n): - x0 = np.zeros(g.shape[0], dtype=bool) - x0[nr.randint(0, g.shape[0])] = True + x0 = np.zeros(graph.shape[0], dtype=bool) + x0[nr.randint(0, graph.shape[0])] = True yield simulate_cascade(x0, graph) @@ -93,13 +108,13 @@ if __name__ == "__main__": g = np.array([[0, 1, 1, 0], [1, 0, 0, 1], [1, 0, 0, 1], [0, 1, 1, 0]]) p = 0.5 g = np.log(1. / (1 - p * g)) - sizes = [100, 500, 1000, 5000, 10000, 50000, 100000, 1000000] + sizes = [100, 500, 1000, 5000, 10000] error = [] for i in sizes: cascades = simulate_cascades(i, g) x, y = build_matrix(cascades, 0) - r = infer(x, y) - r[0] = 0. - error.append(np.linalg.norm(r - g[0])) - plt.plot(sizes, error) + conf = bootstrap(x, y, n_iter=100) + estimand = np.linalg.norm(np.delete(conf - g[0], 0, axis=1), axis=1) + error.append(confidence_interval(*np.histogram(estimand, bins=50))) + plt.semilogx(sizes, error) plt.show() |
