summaryrefslogtreecommitdiffstats
path: root/hawkes/cause.py
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2015-09-13 15:40:02 -0400
committerThibaut Horel <thibaut.horel@gmail.com>2015-09-13 15:40:13 -0400
commitfffb68b515a5cae198d8b756b562ddea6f2c814c (patch)
tree0ecd1c27d5f52c7e27bbf36b39491c984274a06a /hawkes/cause.py
parent754bc37b60d5efbcf0ddad7b991f6effafe2237d (diff)
downloadcriminal_cascades-fffb68b515a5cae198d8b756b562ddea6f2c814c.tar.gz
Digging into the data
Diffstat (limited to 'hawkes/cause.py')
-rw-r--r--hawkes/cause.py83
1 files changed, 69 insertions, 14 deletions
diff --git a/hawkes/cause.py b/hawkes/cause.py
index 0cce9d2..fddbfa9 100644
--- a/hawkes/cause.py
+++ b/hawkes/cause.py
@@ -1,24 +1,79 @@
from cPickle import load
-from math import exp
+from math import exp, sin
+from csv import reader
+from data2 import parse
+import sys
+import networkx as nx
+import matplotlib.pyplot as plt
-def main(a):
- lamb, alpha, mu = a
- dr = 0
- r = 0
+def fatal():
+ with open(sys.argv[1]) as fh:
+ fh.readline()
+ r = reader(fh)
+ d = {i + 1: parse(row[7]) for (i, row) in enumerate(r)}
+ d = {k: v for k, v in d.iteritems() if v}
+ return d
+
+
+def main(lamb, alpha, mu):
+ G = nx.DiGraph()
+ r, dr, i = 0, 0, 0
+ drf, iff, rf = 0, 0, 0
+ dnf, rnf, inf = 0, 0, 0
+ si = 0
+ f = fatal().items()
+ l = []
for ((n1, t1), s) in event_edges.iteritems():
+ G.add_node((n1, t1))
if not s:
dr += 1
- if lamb > sum(alpha * w * mu * exp(-mu * (t1 - t2))
- for (n2, t2, w) in s):
+ if (n1, t1) in f:
+ drf += 1
+ else:
+ dnf += 1
+ continue
+ br = lamb * (1 + 0.43 * sin(0.0172 * t1 + 4.36))
+ prl = sorted([(n2, t2, alpha / d * mu * exp(-mu * (t1 - t2)))
+ for (n2, t2, d) in s], reverse=True)
+ pr = sum(e[2] for e in prl)
+ #if sum(e[2] for e in prl[:1]) > br:
+ # G.add_edge((n1, t1), tuple(prl[0][:2]))
+ if br > pr:
r += 1
- return lamb, alpha, mu, dr, r
+ if (n1, t1) in f:
+ rf += 1
+ else:
+ rnf += 1
+ else:
+ G.add_edge((n1, t1), tuple(prl[0][:2]))
+ l.append(prl[0][2] / br)
+ i += 1
+ if (n1, t1) in f:
+ iff += 1
+ else:
+ inf += 1
+ print "nedges:", G.number_of_edges()
+ cs = {}
+ for c in nx.weakly_connected_components(G):
+ cs[len(c)] = cs.get(len(c), 0) + 1
+ cs = sorted(cs.iteritems(), key=lambda x: x[0])
+ x, y = zip(*cs)
+ print cs
+ plt.loglog(x, y, "-")
+ plt.xlabel("Cascade size")
+ plt.ylabel("Number of cascades")
+ plt.savefig("dist.pdf")
+ l.sort(reverse=True)
+ plt.plot(l)
+ plt.show()
+ return (lamb, alpha, mu, dr, r, i, drf, rf, iff,
+ dnf, rnf, inf, si, len(event_edges))
if __name__ == "__main__":
- nodes, edges, events, event_edges = load(open("data.pickle", "rb"))
- for i, line in enumerate(open("values-sorted.txt")):
- if i > 100:
- break
- lamb, alpha, mu, v = map(float, line.strip().split())
- print main((lamb, alpha, mu))
+ nodes, edges, events, event_edges = load(open("data2.pickle", "rb"))
+ lamb, alpha, mu = 1.1847510744e-05, 0.00316718040144, 0.00393069204339
+ # print len(event_edges), sum(len(e) for e in events.itervalues())
+ # print len(fatal())
+ print main(lamb, alpha, mu)