import mleNode as mn import numpy as np from numpy.linalg import norm import numpy.random as nr from scipy.optimize import minimize import matplotlib.pyplot as plt import seaborn from random import random, randint seaborn.set_style("white") def simulate_cascade(x, graph): """ Simulate an IC cascade given a graph and initial state. For each time step we yield: - susc: the nodes susceptible at the beginning of this time step - x: the subset of susc who became infected """ susc = np.ones(graph.shape[0], dtype=bool) susc = susc ^ x # t=0, the source is not susceptible yield x, susc while np.any(x): susc = susc ^ x # nodes infected at previous step are now inactive if not np.any(susc): break x = 1 - np.exp(-np.dot(graph.T, x)) y = nr.random(x.shape[0]) x = (x >= y) & susc yield x, susc def uniform_source(graph, *args, **kwargs): x0 = np.zeros(graph.shape[0], dtype=bool) x0[nr.randint(0, graph.shape[0])] = True return x0 def simulate_cascades(n, graph, source=uniform_source): for t in xrange(n): x0 = source(graph, t) yield simulate_cascade(x0, graph) def build_cascade_list(cascades, collapse=False): x, s = [], [] for cascade in cascades: xlist, slist = zip(*cascade) x.append(np.vstack(xlist)) s.append(np.vstack(slist)) if not collapse: return x, s else: return np.vstack(x), np.vstack(s) def cascadeLkl(graph, infect, sus): # There is a problem with the current implementation # Note that you need to take into account the time diff between the label # and the values being conditioned. Note also that the matrix if stacked as # such will require to keep track of the state 0 of each cascade. a = np.dot(infect, graph) return np.log(1. - np.exp(-a[(infect[1:])*sus[1:]])).sum() \ - a[(~infect[1:])*sus].sum() if __name__ == "__main__": g = np.array([[0, 0, 1], [0, 0, 0.5], [0, 0, 0]]) p = 0.5 g = np.log(1. / (1 - p * g)) cascades = simulate_cascades(100, g) cascade, y_obs = mn.build_matrix(cascades, 0) conf = mn.bootstrap(x, y, n_iter=100) estimand = np.linalg.norm(np.delete(conf - g[0], 0, axis=1), axis=1) error.append(mn.confidence_interval(*np.histogram(estimand, bins=50))) plt.semilogx(sizes, error) plt.show()