diff options
Diffstat (limited to 'simulation')
| -rw-r--r-- | simulation/main.py | 87 |
1 files changed, 70 insertions, 17 deletions
diff --git a/simulation/main.py b/simulation/main.py index b7b3eaa..8ef4fd1 100644 --- a/simulation/main.py +++ b/simulation/main.py @@ -1,23 +1,59 @@ import numpy as np import numpy.random as nr +from scipy.optimize import minimize +import matplotlib.pyplot as plt +import seaborn +seaborn.set_style("white") -def likelihood(p, cascades, node): - x, y = build_matrix(cascades, node) - a = 1 - np.exp(-np.dot(x, p)) - return np.inner(y, np.log(a)) + np.inner((1 - y), np.log(1-a)) + +def likelihood(p, x, y): + a = np.dot(x, p) + return np.log(1. - np.exp(-a[y])).sum() - a[~y].sum() + + +def likelihood_gradient(p, x, y): + a = np.dot(x, p) + l = np.log(1. - np.exp(-a[y])).sum() - a[~y].sum() + g1 = 1. / (np.exp(a[y]) - 1.) + g = (x[y] * g1[:, np.newaxis]).sum(0) - x[~y].sum(0) + return l, g + + +def test_gradient(x, y): + eps = 1e-10 + for i in xrange(x.shape[1]): + p = 0.5 * np.ones(x.shape[1]) + a = np.dot(x, p) + g1 = 1. / (np.exp(a[y]) - 1.) + g = (x[y] * g1[:, np.newaxis]).sum(0) - x[~y].sum(0) + p[i] += eps + f1 = likelihood(p, x, y) + p[i] -= 2 * eps + f2 = likelihood(p, x, y) + print g[i], (f1 - f2) / (2 * eps) + + +def infer(x, y): + def f(p): + l, g = likelihood_gradient(p, x, y) + return -l, -g + x0 = np.ones(x.shape[1]) + bounds = [(0, None)] * x.shape[1] + return minimize(f, x0, jac=True, bounds=bounds, method="L-BFGS-B").x def build_matrix(cascades, node): def aux(cascade, node): - try: - m = np.vstack(x for x, s in cascade if s[node]) - except ValueError: + xlist, slist = zip(*cascade) + indices = [i for i, s in enumerate(slist) if s[node] and i >= 1] + if indices: + x = np.vstack(xlist[i-1] for i in indices) + y = np.array([xlist[i][node] for i in indices]) + return x, y + else: return None - x = m[:-1, :] - y = m[1:, node] - return x, y pairs = (aux(cascade, node) for cascade in cascades) xs, ys = zip(*(pair for pair in pairs if pair)) @@ -27,26 +63,43 @@ def build_matrix(cascades, node): def simulate_cascade(x, graph): - susc = x ^ np.ones(graph.shape[0]).astype(bool) + """ + Simulate an IC cascade given a graph and initial state. + + For each time step we yield: + - susc: the nodes susceptible at the beginning of this time step + - x: the subset of susc who became infected + """ + susc = np.ones(graph.shape[0], dtype=bool) # t=0, everyone is susceptible yield x, susc while np.any(x): + susc = susc ^ x # nodes infected at previous step are now inactive + if not np.any(susc): + break x = 1 - np.exp(-np.dot(graph.T, x)) y = nr.random(x.shape[0]) x = (x >= y) & susc yield x, susc - susc ^= x def simulate_cascades(n, graph): for _ in xrange(n): - x = np.zeros(g.shape[0], dtype=bool) - x[nr.randint(0, g.shape[0])] = True - yield simulate_cascade(x, graph) + x0 = np.zeros(g.shape[0], dtype=bool) + x0[nr.randint(0, g.shape[0])] = True + yield simulate_cascade(x0, graph) if __name__ == "__main__": g = np.array([[0, 1, 1, 0], [1, 0, 0, 1], [1, 0, 0, 1], [0, 1, 1, 0]]) p = 0.5 g = np.log(1. / (1 - p * g)) - cascades = simulate_cascades(10, g) - print likelihood(p * np.ones(g.shape[0]), cascades, 3) + sizes = [100, 500, 1000, 5000, 10000, 50000, 100000, 1000000] + error = [] + for i in sizes: + cascades = simulate_cascades(i, g) + x, y = build_matrix(cascades, 0) + r = infer(x, y) + r[0] = 0. + error.append(np.linalg.norm(r - g[0])) + plt.plot(sizes, error) + plt.show() |
