summaryrefslogtreecommitdiffstats
path: root/hawkes
diff options
context:
space:
mode:
Diffstat (limited to 'hawkes')
-rw-r--r--hawkes/cause.py24
-rw-r--r--hawkes/data.py65
-rw-r--r--hawkes/main.py43
-rw-r--r--hawkes/plot.py26
-rw-r--r--hawkes/refine.py43
-rw-r--r--hawkes/sanity.py5
6 files changed, 206 insertions, 0 deletions
diff --git a/hawkes/cause.py b/hawkes/cause.py
new file mode 100644
index 0000000..0cce9d2
--- /dev/null
+++ b/hawkes/cause.py
@@ -0,0 +1,24 @@
+from cPickle import load
+from math import exp
+
+
+def main(a):
+ lamb, alpha, mu = a
+ dr = 0
+ r = 0
+ for ((n1, t1), s) in event_edges.iteritems():
+ if not s:
+ dr += 1
+ if lamb > sum(alpha * w * mu * exp(-mu * (t1 - t2))
+ for (n2, t2, w) in s):
+ r += 1
+ return lamb, alpha, mu, dr, r
+
+
+if __name__ == "__main__":
+ nodes, edges, events, event_edges = load(open("data.pickle", "rb"))
+ for i, line in enumerate(open("values-sorted.txt")):
+ if i > 100:
+ break
+ lamb, alpha, mu, v = map(float, line.strip().split())
+ print main((lamb, alpha, mu))
diff --git a/hawkes/data.py b/hawkes/data.py
new file mode 100644
index 0000000..4d7744e
--- /dev/null
+++ b/hawkes/data.py
@@ -0,0 +1,65 @@
+from csv import DictReader
+import sys
+from itertools import product
+from cPickle import dump
+
+MAX_TIME = 3012
+
+
+def parse(s):
+ return None if s == "NA" else int(float(s))
+
+
+def load_nodes(filename):
+ with open(filename) as fh:
+ reader = DictReader(fh)
+ d = {parse(row["name"]): parse(row["fatal_day"]) for row in reader}
+ for n, t in d.iteritems():
+ if t is None:
+ d[n] = MAX_TIME
+ return d
+
+
+def load_edges(filename):
+ events = {}
+ edges = {}
+ with open(filename) as fh:
+ reader = DictReader(fh)
+ for row in reader:
+ fro, to, t, weight = map(parse, [row["from"], row["to"],
+ row["t1"], row["w1"]])
+ d = edges.get(fro, dict())
+ d[to] = weight
+ edges[fro] = d
+ s = events.get(fro, set())
+ s.add(t)
+ events[fro] = s
+ return edges, events
+
+
+def compute_event_edges(events, edges):
+ event_edges = {}
+
+ for fro in events:
+ for t in events[fro]:
+ event_edges[(fro, t)] = set()
+
+ for fro in edges:
+ for to in edges[fro]:
+ try:
+ e1, e2 = events[fro], events[to]
+ except KeyError:
+ continue
+ for t1, t2 in product(e1, e2):
+ if t1 < t2:
+ s = event_edges[(to, t2)]
+ s.add((fro, t1, edges[fro][to]))
+ event_edges[(to, t2)] = s
+ return event_edges
+
+
+if __name__ == "__main__":
+ nodes = load_nodes(sys.argv[1])
+ edges, events = load_edges(sys.argv[2])
+ event_edges = compute_event_edges(events, edges)
+ dump((nodes, edges, events, event_edges), open("data.pickle", "wb"))
diff --git a/hawkes/main.py b/hawkes/main.py
new file mode 100644
index 0000000..65586ad
--- /dev/null
+++ b/hawkes/main.py
@@ -0,0 +1,43 @@
+from cPickle import load
+from math import log, exp
+#import numpy as np
+#from scipy.optimize import basinhopping
+from itertools import product
+
+
+def iter_events(events):
+ for n, s in events.iteritems():
+ for t in s:
+ yield (n, t)
+
+
+def ll(a):
+ lamb, alpha, mu = a
+
+ r1 = sum(log(lamb + sum(alpha * w * mu * exp(-mu * (t1 - t2))
+ for (n2, t2, w) in s))
+ for ((n1, t1), s) in event_edges.iteritems())
+ r2 = sum(sum(alpha * w * (1 - exp(-mu * (nodes[n2] - t1)))
+ for n2, w in edges[n1].iteritems() if nodes[n2] > t1)
+ for (n1, t1) in iter_events(events))
+ r3 = lamb * sum(nodes.itervalues())
+ return -(r1 - r2 - r3)
+
+
+if __name__ == "__main__":
+ nodes, edges, events, event_edges = load(open("data.pickle", "rb"))
+ d = {}
+ for line in open("values.txt"):
+ v = map(float, line.strip().split())
+ d[tuple(v[:3])] = v[3]
+
+ # lamb = [20. ** i for i in range(-15, 15)]
+ # alpha = [20. ** i for i in range(-15, 15)]
+ # mu = [20. ** i for i in range(-15, 15)]
+ # for l, a, m in product(lamb, alpha, mu):
+ # if (l, a, m) in d:
+ # continue
+ # print l, a, m, ll((l, a, m))
+ print ll((2, 10000000000., 0.000000000000000000001))
+ #r = basinhopping(ll, init, disp=True, stepsize=0.1, T=10000., niter=500)
+ #print r.x
diff --git a/hawkes/plot.py b/hawkes/plot.py
new file mode 100644
index 0000000..24d3f35
--- /dev/null
+++ b/hawkes/plot.py
@@ -0,0 +1,26 @@
+from mpl_toolkits.mplot3d import Axes3D
+from matplotlib import cm
+import matplotlib.pyplot as plt
+
+fig = plt.figure()
+ax = fig.gca(projection='3d')
+d = {}
+for line in open("values.txt"):
+ v = map(float, line.strip().split())
+ try:
+ if (v[0], v[2]) in d:
+ d[(v[0], v[2])] = min(d[(v[0], v[2])], v[3])
+ else:
+ d[(v[0], v[2])] = v[3]
+ except:
+ continue
+
+x, y, z = [], [], []
+for k, v in d.iteritems():
+ x.append(k[0] / 1000000.)
+ y.append(k[1] / 1000000.)
+ z.append(v / 100000.)
+
+print x
+surf = ax.plot_trisurf(x, y, z)
+plt.show()
diff --git a/hawkes/refine.py b/hawkes/refine.py
new file mode 100644
index 0000000..81fd547
--- /dev/null
+++ b/hawkes/refine.py
@@ -0,0 +1,43 @@
+from cPickle import load
+from itertools import product
+from math import exp, log
+
+
+def iter_events(events):
+ for n, s in events.iteritems():
+ for t in s:
+ yield (n, t)
+
+
+def inprod(a, b):
+ return tuple(float(x * y) for x, y in zip(a, b))
+
+
+def ll(a):
+ lamb, alpha, mu = a
+
+ r1 = sum(log(lamb + sum(alpha * w * mu * exp(-mu * (t1 - t2))
+ for (n2, t2, w) in s))
+ for ((n1, t1), s) in event_edges.iteritems())
+ r2 = sum(sum(alpha * w * (1 - exp(-mu * (nodes[n2] - t1)))
+ for n2, w in edges[n1].iteritems() if nodes[n2] > t1)
+ for (n1, t1) in iter_events(events))
+ r3 = lamb * sum(nodes.itervalues())
+ return -(r1 - r2 - r3)
+
+
+if __name__ == "__main__":
+ nodes, edges, events, event_edges = load(open("data.pickle", "rb"))
+ d = {}
+ for line in open("values.txt"):
+ v = map(float, line.strip().split())
+ d[tuple(v[:3])] = v[3]
+ l = d.items()
+ l.sort(key=lambda x: x[1])
+ for a, _ in l[:10]:
+ t = [1. / i for i in range(2, 6)] + [float(i) for i in range(1, 6)]
+ for b in product(t, repeat=3):
+ l, al, m = inprod(a, b)
+ if (l, al, m) in d:
+ continue
+ print l, al, m, ll((l, al, m))
diff --git a/hawkes/sanity.py b/hawkes/sanity.py
new file mode 100644
index 0000000..c837757
--- /dev/null
+++ b/hawkes/sanity.py
@@ -0,0 +1,5 @@
+from cPickle import load
+
+if __name__ == "__main__":
+ nodes, edges, events, event_edges = load(open("data.pickle", "rb"))
+ print len(nodes)