from cPickle import load from math import exp, sin from collections import Counter from csv import reader, writer from data import parse import sys import networkx as nx import numpy as np def get_fatals(): with open(sys.argv[1]) as fh: fh.readline() r = reader(fh) d = {i + 1: parse(row[7]) for (i, row) in enumerate(r)} d = {k: v for k, v in d.iteritems() if v} return d.items() def cause(lamb, alpha, mu): G = nx.DiGraph() roots, droots, infections = 0, 0, 0 fatal_droots, fatal_infections, fatal_roots = 0, 0, 0 fatals = get_fatals() for ((n1, t1), s) in event_edges.iteritems(): G.add_node((n1, t1)) if not s: droots += 1 if (n1, t1) in fatals: fatal_droots += 1 continue background_rate = lamb * (1 + 0.43 * sin(0.0172 * t1 + 4.36)) parents = sorted([(n2, t2, alpha / d ** 2 * mu * exp(-mu * (t1 - t2)), d) for (n2, t2, d) in s], reverse=True, key=lambda x: x[2]) parent_rate = sum(e[2] for e in parents) # if parents[0][2] > background_rate: # G.add_edge(tuple(parents[0][:2]), (n1, t1), # weight=parents[0][3]) if background_rate > parent_rate: roots += 1 if (n1, t1) in fatals: fatal_roots += 1 else: G.add_edge(tuple(parents[0][:2]), (n1, t1), weight=parents[0][3]) # l.append(prl[0][2] / br) infections += 1 if (n1, t1) in fatals: fatal_infections += 1 return (droots, roots, infections, fatal_droots, fatal_roots, fatal_infections, G) def analyze_graph(G): csizes = [len(c) for c in nx.weakly_connected_components(G)] print "cascades: {0}, min: {1}, max: {2}, mean: {3}, median: {4}".format( len(csizes), np.min(csizes), np.max(csizes), np.mean(csizes), np.median(csizes)) counts = Counter(csizes) w = writer(open("components_dist.csv", "w")) w.writerows(counts.most_common()) edges = list(G.edges_iter(data=True)) print "edges: {0}".format(len(edges)) times = [e[1][1] - e[0][1] for e in edges] distances = [e[2]["weight"] for e in edges] print "times, min: {0}, max: {1}, mean: {2}, median: {3}".format( np.min(times), np.max(times), np.mean(times), np.median(times)) print "distances, min: {0}, max: {1}, mean: {2}, median: {3}".format( np.min(distances), np.max(distances), np.mean(distances), np.median(distances)) e = writer(open("edges.csv", "w")) e.writerows((e[0][0], e[0][1], e[1][0], e[1][1], e[2]["weight"]) for e in edges) if __name__ == "__main__": nodes, edges, events, event_edges = load(open("data-all.pickle", "rb")) lamb, alpha, mu = 1.18909761267e-05, 0.00781529533133, 0.00373882477787 print "mu: {0}, alpha: {1}, beta: {2}".format(lamb, alpha, mu) (droots, roots, infections, fatal_droots, fatal_roots, fatal_infections, G) = cause(lamb, alpha, mu) r = "events: {0}, droots: {1}, roots: {2}, infections: {3}, "\ "fatal_droots: {4}, fatal_roots: {5}, "\ "fatal_infections: {6}".format(len(event_edges), droots, roots, infections, fatal_droots, fatal_roots, fatal_infections) print r analyze_graph(G)