import numpy as np from numpy.linalg import norm import numpy.random as nr from scipy.optimize import minimize import matplotlib.pyplot as plt import seaborn from random import random, randint seaborn.set_style("white") def likelihood(p, x, y): a = np.dot(x, p) return np.log(1. - np.exp(-a[y])).sum() - a[~y].sum() def likelihood_gradient(p, x, y): a = np.dot(x, p) l = np.log(1. - np.exp(-a[y])).sum() - a[~y].sum() g1 = 1. / (np.exp(a[y]) - 1.) g = (x[y] * g1[:, np.newaxis]).sum(0) - x[~y].sum(0) return l, g def test_gradient(x, y): eps = 1e-10 for i in xrange(x.shape[1]): p = 0.5 * np.ones(x.shape[1]) a = np.dot(x, p) g1 = 1. / (np.exp(a[y]) - 1.) g = (x[y] * g1[:, np.newaxis]).sum(0) - x[~y].sum(0) p[i] += eps f1 = likelihood(p, x, y) p[i] -= 2 * eps f2 = likelihood(p, x, y) print g[i], (f1 - f2) / (2 * eps) def infer(x, y): def f(p): l, g = likelihood_gradient(p, x, y) return -l, -g x0 = np.ones(x.shape[1]) bounds = [(1e-10, None)] * x.shape[1] return minimize(f, x0, jac=True, bounds=bounds, method="L-BFGS-B").x def bootstrap(x, y, n_iter=100): rval = np.zeros((n_iter, x.shape[1])) for i in xrange(n_iter): indices = np.random.choice(len(y), replace=False, size=int(len(y)*.9)) rval[i] = infer(x[indices], y[indices]) return rval def confidence_interval(counts, bins): k = 0 while np.sum(counts[len(counts)/2-k:len(counts)/2+k]) <= .95*np.sum(counts): k += 1 return bins[len(bins)/2-k], bins[len(bins)/2+k] def build_matrix(cascades, node): def aux(cascade, node): xlist, slist = zip(*cascade) indices = [i for i, s in enumerate(slist) if s[node] and i >= 1] if indices: x = np.vstack(xlist[i-1] for i in indices) y = np.array([xlist[i][node] for i in indices]) return x, y else: return None pairs = (aux(cascade, node) for cascade in cascades) xs, ys = zip(*(pair for pair in pairs if pair)) x = np.vstack(xs) y = np.concatenate(ys) return x, y def build_cascade_list(cascades): x, s = [], [] for cascade in cascades: xlist, slist = zip(*cascade) x.append(xlist) s.append(slist) return x, s def simulate_cascade(x, graph): """ Simulate an IC cascade given a graph and initial state. For each time step we yield: - susc: the nodes susceptible at the beginning of this time step - x: the subset of susc who became infected """ susc = np.ones(graph.shape[0], dtype=bool) # t=0, everyone is susceptible yield x, susc while np.any(x): susc = susc ^ x # nodes infected at previous step are now inactive if not np.any(susc): break x = 1 - np.exp(-np.dot(graph.T, x)) y = nr.random(x.shape[0]) x = (x >= y) & susc yield x, susc def uniform_source(graph): x0 = np.zeros(graph.shape[0], dtype=bool) x0[nr.randint(0, graph.shape[0])] = True return x0 def simulate_cascades(n, graph, source=uniform_source): for _ in xrange(n): x0 = source(graph) yield simulate_cascade(x0, graph) if __name__ == "__main__": # g = np.array([[0, 1, 1, 0], [1, 0, 0, 1], [1, 0, 0, 1], [0, 1, 1, 0]]) g = np.array([[0, 0, 1], [0, 0, 0.5], [0, 0, 0]]) p = 0.5 g = np.log(1. / (1 - p * g)) # error = [] def source(graph, t): x0 = np.zeros(graph.shape[0], dtype=bool) a = randint(0, 1) x0[a] = True if random() > t: x0[1-a] = True return x0 thresh = np.arange(0., 1.1, step=0.2) sizes = np.arange(10, 100, step=10) nsimul = 10 r = np.zeros(len(sizes), len(thresh)) for t in thresh: for i in nsimul: cascades = simulate_cascades(np.max(sizes), g, source=lambda graph: source(graph, t)) e = np.zeros(g.shape[0]) for j, s in enumerate(sizes): x, y = build_matrix(cascades, 2) e += infer(x[:s], y[:s]) for i, t in enumerate(thresh): plt.plot(sizes, m[:, i], label=str(t)) plt.legend() plt.show() # conf = bootstrap(x, y, n_iter=100) # estimand = np.linalg.norm(np.delete(conf - g[0], 0, axis=1), axis=1) # error.append(confidence_interval(*np.histogram(estimand, bins=50))) # plt.semilogx(sizes, error) # plt.show()