1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
|
from cPickle import load
from math import exp, sin
from collections import Counter
from csv import reader, writer
from data import parse
import sys
import networkx as nx
import numpy as np
def get_fatals():
with open(sys.argv[1]) as fh:
fh.readline()
r = reader(fh)
d = {i + 1: parse(row[7]) for (i, row) in enumerate(r)}
d = {k: v for k, v in d.iteritems() if v}
return d.items()
def cause(lamb, alpha, mu):
G = nx.DiGraph()
roots, droots, infections = 0, 0, 0
fatal_droots, fatal_infections, fatal_roots = 0, 0, 0
fatals = get_fatals()
for ((n1, t1), s) in event_edges.iteritems():
G.add_node((n1, t1))
if not s:
droots += 1
if (n1, t1) in fatals:
fatal_droots += 1
continue
background_rate = lamb * (1 + 0.43 * sin(0.0172 * t1 + 4.36))
parents = sorted([(n2, t2, alpha / d * mu * exp(-mu * (t1 - t2)), d)
for (n2, t2, d) in s], reverse=True,
key=lambda x: x[2])
parent_rate = sum(e[2] for e in parents)
# if sum(e[2] for e in prl[:1]) > br:
# G.add_edge((n1, t1), tuple(prl[0][:2]))
if background_rate > parent_rate:
roots += 1
if (n1, t1) in fatals:
fatal_roots += 1
else:
G.add_edge(tuple(parents[0][:2]), (n1, t1),
weight=parents[0][3])
# l.append(prl[0][2] / br)
infections += 1
if (n1, t1) in fatals:
fatal_infections += 1
return (droots, roots, infections, fatal_droots,
fatal_roots, fatal_infections, G)
def analyze_graph(G):
csizes = [len(c) for c in nx.weakly_connected_components(G)]
print "cascades: {0}, min: {1}, max: {2}, mean: {3}, median: {4}".format(
len(csizes), np.min(csizes), np.max(csizes), np.mean(csizes),
np.median(csizes))
# counts = Counter(l)
# w = writer(open("components_dist.csv", "w"))
# w.writerows(counts.most_common())
edges = list(G.edges_iter(data=True))
print "edges: {0}".format(len(edges))
times = [e[1][1] - e[0][1] for e in edges]
distances = [e[2]["weight"] for e in edges]
print "times, min: {0}, max: {1}, mean: {2}, median: {3}".format(
np.min(times), np.max(times), np.mean(times), np.median(times))
print "distances, min: {0}, max: {1}, mean: {2}, median: {3}".format(
np.min(distances), np.max(distances), np.mean(distances),
np.median(distances))
# e = writer(open("edges.csv", "w"))
# e.writerows(edges)
if __name__ == "__main__":
nodes, edges, events, event_edges = load(open("data-dist1.pickle", "rb"))
lamb, alpha, mu = 1.86602117779e-05, 0.0433473674726, 0.00109325510695
# lamb, alpha, mu = 1.87717287808e-05, 5.12006113875e+14, 4.20918377797e-20
(droots, roots, infections, fatal_droots,
fatal_roots, fatal_infections, G) = cause(lamb, alpha, mu)
r = "events: {0}, droots: {1}, roots: {2}, infections: {3}, "\
"fatal_droots: {4}, fatal_roots: {5}, "\
"fatal_infections: {6}".format(len(event_edges), droots, roots,
infections, fatal_droots, fatal_roots,
fatal_infections)
print r
analyze_graph(G)
|