diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2015-09-13 15:40:02 -0400 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2015-09-13 15:40:13 -0400 |
| commit | fffb68b515a5cae198d8b756b562ddea6f2c814c (patch) | |
| tree | 0ecd1c27d5f52c7e27bbf36b39491c984274a06a | |
| parent | 754bc37b60d5efbcf0ddad7b991f6effafe2237d (diff) | |
| download | criminal_cascades-fffb68b515a5cae198d8b756b562ddea6f2c814c.tar.gz | |
Digging into the data
| -rw-r--r-- | hawkes/cause.py | 83 | ||||
| -rw-r--r-- | hawkes/main.py | 4 | ||||
| -rw-r--r-- | hawkes/sanity.py | 3 |
3 files changed, 71 insertions, 19 deletions
diff --git a/hawkes/cause.py b/hawkes/cause.py index 0cce9d2..fddbfa9 100644 --- a/hawkes/cause.py +++ b/hawkes/cause.py @@ -1,24 +1,79 @@ from cPickle import load -from math import exp +from math import exp, sin +from csv import reader +from data2 import parse +import sys +import networkx as nx +import matplotlib.pyplot as plt -def main(a): - lamb, alpha, mu = a - dr = 0 - r = 0 +def fatal(): + with open(sys.argv[1]) as fh: + fh.readline() + r = reader(fh) + d = {i + 1: parse(row[7]) for (i, row) in enumerate(r)} + d = {k: v for k, v in d.iteritems() if v} + return d + + +def main(lamb, alpha, mu): + G = nx.DiGraph() + r, dr, i = 0, 0, 0 + drf, iff, rf = 0, 0, 0 + dnf, rnf, inf = 0, 0, 0 + si = 0 + f = fatal().items() + l = [] for ((n1, t1), s) in event_edges.iteritems(): + G.add_node((n1, t1)) if not s: dr += 1 - if lamb > sum(alpha * w * mu * exp(-mu * (t1 - t2)) - for (n2, t2, w) in s): + if (n1, t1) in f: + drf += 1 + else: + dnf += 1 + continue + br = lamb * (1 + 0.43 * sin(0.0172 * t1 + 4.36)) + prl = sorted([(n2, t2, alpha / d * mu * exp(-mu * (t1 - t2))) + for (n2, t2, d) in s], reverse=True) + pr = sum(e[2] for e in prl) + #if sum(e[2] for e in prl[:1]) > br: + # G.add_edge((n1, t1), tuple(prl[0][:2])) + if br > pr: r += 1 - return lamb, alpha, mu, dr, r + if (n1, t1) in f: + rf += 1 + else: + rnf += 1 + else: + G.add_edge((n1, t1), tuple(prl[0][:2])) + l.append(prl[0][2] / br) + i += 1 + if (n1, t1) in f: + iff += 1 + else: + inf += 1 + print "nedges:", G.number_of_edges() + cs = {} + for c in nx.weakly_connected_components(G): + cs[len(c)] = cs.get(len(c), 0) + 1 + cs = sorted(cs.iteritems(), key=lambda x: x[0]) + x, y = zip(*cs) + print cs + plt.loglog(x, y, "-") + plt.xlabel("Cascade size") + plt.ylabel("Number of cascades") + plt.savefig("dist.pdf") + l.sort(reverse=True) + plt.plot(l) + plt.show() + return (lamb, alpha, mu, dr, r, i, drf, rf, iff, + dnf, rnf, inf, si, len(event_edges)) if __name__ == "__main__": - nodes, edges, events, event_edges = load(open("data.pickle", "rb")) - for i, line in enumerate(open("values-sorted.txt")): - if i > 100: - break - lamb, alpha, mu, v = map(float, line.strip().split()) - print main((lamb, alpha, mu)) + nodes, edges, events, event_edges = load(open("data2.pickle", "rb")) + lamb, alpha, mu = 1.1847510744e-05, 0.00316718040144, 0.00393069204339 + # print len(event_edges), sum(len(e) for e in events.itervalues()) + # print len(fatal()) + print main(lamb, alpha, mu) diff --git a/hawkes/main.py b/hawkes/main.py index cddc85b..40993a1 100644 --- a/hawkes/main.py +++ b/hawkes/main.py @@ -110,8 +110,8 @@ def optimize_with_gss(x, y, z, niter=100): return ll(x, y, z) for _ in xrange(niter): - y, fc = gss(g, 0, 1, tol=1e-10) - z, fc = gss(h, 0, 1, tol=1e-10) + y, fc = gss(g, 0, 100, tol=1e-10) + z, fc = gss(h, 0, 100, tol=1e-10) x, fc = gss(f, 0, 1e-3, tol=1e-10) print x, y, z, fc sys.stdout.flush() diff --git a/hawkes/sanity.py b/hawkes/sanity.py index 529203c..b6f25eb 100644 --- a/hawkes/sanity.py +++ b/hawkes/sanity.py @@ -7,7 +7,6 @@ import sys def parse_row(row): return set(e for e in map(parse, row[2:]) if e) - if __name__ == "__main__": nodes, edges, events, event_edges = load(open("data2.pickle", "rb")) with open(sys.argv[1]) as fh: @@ -15,5 +14,3 @@ if __name__ == "__main__": reader = reader(fh) d = {parse(row[1]): parse_row(row) for row in reader} d = {k: v for (k, v) in d.iteritems() if v} - for k in d: - print len(d[k]) |
