from cPickle import load from math import exp, sin from collections import Counter from csv import reader, writer from data import parse import sys import networkx as nx import numpy as np def get_fatals(): with open(sys.argv[1]) as fh: fh.readline() r = reader(fh) d = {i + 1: parse(row[7]) for (i, row) in enumerate(r)} d = {k: v for k, v in d.iteritems() if v} return d.items() def cause(lamb, alpha, mu): G = nx.DiGraph() roots, droots, infections = 0, 0, 0 fatal_droots, fatal_infections, fatal_roots = 0, 0, 0 fatals = get_fatals() for ((n1, t1), s) in event_edges.iteritems(): G.add_node((n1, t1)) if not s: droots += 1 if (n1, t1) in fatals: fatal_droots += 1 continue background_rate = lamb * (1 + 0.43 * sin(0.0172 * t1 + 4.36)) parents = sorted([(n2, t2, alpha / d * mu * exp(-mu * (t1 - t2)), d) for (n2, t2, d) in s], reverse=True, key=lambda x: x[2]) parent_rate = sum(e[2] for e in parents) # if sum(e[2] for e in prl[:1]) > br: # G.add_edge((n1, t1), tuple(prl[0][:2])) if background_rate > parent_rate: roots += 1 if (n1, t1) in fatals: fatal_roots += 1 else: G.add_edge(tuple(parents[0][:2]), (n1, t1), weight=parents[0][3]) # l.append(prl[0][2] / br) infections += 1 if (n1, t1) in fatals: fatal_infections += 1 return (droots, roots, infections, fatal_droots, fatal_roots, fatal_infections, G) def analyze_graph(G): csizes = [len(c) for c in nx.weakly_connected_components(G)] print "cascades: {0}, min: {1}, max: {2}, mean: {3}, median: {4}".format( len(csizes), np.min(csizes), np.max(csizes), np.mean(csizes), np.median(csizes)) # counts = Counter(l) # w = writer(open("components_dist.csv", "w")) # w.writerows(counts.most_common()) edges = list(G.edges_iter(data=True)) print "edges: {0}".format(len(edges)) times = [e[1][1] - e[0][1] for e in edges] distances = [e[2]["weight"] for e in edges] print "times, min: {0}, max: {1}, mean: {2}, median: {3}".format( np.min(times), np.max(times), np.mean(times), np.median(times)) print "distances, min: {0}, max: {1}, mean: {2}, median: {3}".format( np.min(distances), np.max(distances), np.mean(distances), np.median(distances)) # e = writer(open("edges.csv", "w")) # e.writerows(edges) if __name__ == "__main__": nodes, edges, events, event_edges = load(open("data-dist1.pickle", "rb")) lamb, alpha, mu = 1.86602117779e-05, 0.0433473674726, 0.00109325510695 # lamb, alpha, mu = 1.87717287808e-05, 5.12006113875e+14, 4.20918377797e-20 (droots, roots, infections, fatal_droots, fatal_roots, fatal_infections, G) = cause(lamb, alpha, mu) r = "events: {0}, droots: {1}, roots: {2}, infections: {3}, "\ "fatal_droots: {4}, fatal_roots: {5}, "\ "fatal_infections: {6}".format(len(event_edges), droots, roots, infections, fatal_droots, fatal_roots, fatal_infections) print r analyze_graph(G)