diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2015-08-23 03:47:58 -0700 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2015-08-23 03:47:58 -0700 |
| commit | 0b0d26b2c0ed7d6f0b69f550f6584dabae184ba1 (patch) | |
| tree | b21a4b25d907d8106ce4189d8a770acd6abe457f /hawkes | |
| parent | ef61ece9773e8a865b57f60ca1e1b9faa903af23 (diff) | |
| download | criminal_cascades-0b0d26b2c0ed7d6f0b69f550f6584dabae184ba1.tar.gz | |
Code for hawkes model
Diffstat (limited to 'hawkes')
| -rw-r--r-- | hawkes/cause.py | 24 | ||||
| -rw-r--r-- | hawkes/data.py | 65 | ||||
| -rw-r--r-- | hawkes/main.py | 43 | ||||
| -rw-r--r-- | hawkes/plot.py | 26 | ||||
| -rw-r--r-- | hawkes/refine.py | 43 | ||||
| -rw-r--r-- | hawkes/sanity.py | 5 |
6 files changed, 206 insertions, 0 deletions
diff --git a/hawkes/cause.py b/hawkes/cause.py new file mode 100644 index 0000000..0cce9d2 --- /dev/null +++ b/hawkes/cause.py @@ -0,0 +1,24 @@ +from cPickle import load +from math import exp + + +def main(a): + lamb, alpha, mu = a + dr = 0 + r = 0 + for ((n1, t1), s) in event_edges.iteritems(): + if not s: + dr += 1 + if lamb > sum(alpha * w * mu * exp(-mu * (t1 - t2)) + for (n2, t2, w) in s): + r += 1 + return lamb, alpha, mu, dr, r + + +if __name__ == "__main__": + nodes, edges, events, event_edges = load(open("data.pickle", "rb")) + for i, line in enumerate(open("values-sorted.txt")): + if i > 100: + break + lamb, alpha, mu, v = map(float, line.strip().split()) + print main((lamb, alpha, mu)) diff --git a/hawkes/data.py b/hawkes/data.py new file mode 100644 index 0000000..4d7744e --- /dev/null +++ b/hawkes/data.py @@ -0,0 +1,65 @@ +from csv import DictReader +import sys +from itertools import product +from cPickle import dump + +MAX_TIME = 3012 + + +def parse(s): + return None if s == "NA" else int(float(s)) + + +def load_nodes(filename): + with open(filename) as fh: + reader = DictReader(fh) + d = {parse(row["name"]): parse(row["fatal_day"]) for row in reader} + for n, t in d.iteritems(): + if t is None: + d[n] = MAX_TIME + return d + + +def load_edges(filename): + events = {} + edges = {} + with open(filename) as fh: + reader = DictReader(fh) + for row in reader: + fro, to, t, weight = map(parse, [row["from"], row["to"], + row["t1"], row["w1"]]) + d = edges.get(fro, dict()) + d[to] = weight + edges[fro] = d + s = events.get(fro, set()) + s.add(t) + events[fro] = s + return edges, events + + +def compute_event_edges(events, edges): + event_edges = {} + + for fro in events: + for t in events[fro]: + event_edges[(fro, t)] = set() + + for fro in edges: + for to in edges[fro]: + try: + e1, e2 = events[fro], events[to] + except KeyError: + continue + for t1, t2 in product(e1, e2): + if t1 < t2: + s = event_edges[(to, t2)] + s.add((fro, t1, edges[fro][to])) + event_edges[(to, t2)] = s + return event_edges + + +if __name__ == "__main__": + nodes = load_nodes(sys.argv[1]) + edges, events = load_edges(sys.argv[2]) + event_edges = compute_event_edges(events, edges) + dump((nodes, edges, events, event_edges), open("data.pickle", "wb")) diff --git a/hawkes/main.py b/hawkes/main.py new file mode 100644 index 0000000..65586ad --- /dev/null +++ b/hawkes/main.py @@ -0,0 +1,43 @@ +from cPickle import load +from math import log, exp +#import numpy as np +#from scipy.optimize import basinhopping +from itertools import product + + +def iter_events(events): + for n, s in events.iteritems(): + for t in s: + yield (n, t) + + +def ll(a): + lamb, alpha, mu = a + + r1 = sum(log(lamb + sum(alpha * w * mu * exp(-mu * (t1 - t2)) + for (n2, t2, w) in s)) + for ((n1, t1), s) in event_edges.iteritems()) + r2 = sum(sum(alpha * w * (1 - exp(-mu * (nodes[n2] - t1))) + for n2, w in edges[n1].iteritems() if nodes[n2] > t1) + for (n1, t1) in iter_events(events)) + r3 = lamb * sum(nodes.itervalues()) + return -(r1 - r2 - r3) + + +if __name__ == "__main__": + nodes, edges, events, event_edges = load(open("data.pickle", "rb")) + d = {} + for line in open("values.txt"): + v = map(float, line.strip().split()) + d[tuple(v[:3])] = v[3] + + # lamb = [20. ** i for i in range(-15, 15)] + # alpha = [20. ** i for i in range(-15, 15)] + # mu = [20. ** i for i in range(-15, 15)] + # for l, a, m in product(lamb, alpha, mu): + # if (l, a, m) in d: + # continue + # print l, a, m, ll((l, a, m)) + print ll((2, 10000000000., 0.000000000000000000001)) + #r = basinhopping(ll, init, disp=True, stepsize=0.1, T=10000., niter=500) + #print r.x diff --git a/hawkes/plot.py b/hawkes/plot.py new file mode 100644 index 0000000..24d3f35 --- /dev/null +++ b/hawkes/plot.py @@ -0,0 +1,26 @@ +from mpl_toolkits.mplot3d import Axes3D +from matplotlib import cm +import matplotlib.pyplot as plt + +fig = plt.figure() +ax = fig.gca(projection='3d') +d = {} +for line in open("values.txt"): + v = map(float, line.strip().split()) + try: + if (v[0], v[2]) in d: + d[(v[0], v[2])] = min(d[(v[0], v[2])], v[3]) + else: + d[(v[0], v[2])] = v[3] + except: + continue + +x, y, z = [], [], [] +for k, v in d.iteritems(): + x.append(k[0] / 1000000.) + y.append(k[1] / 1000000.) + z.append(v / 100000.) + +print x +surf = ax.plot_trisurf(x, y, z) +plt.show() diff --git a/hawkes/refine.py b/hawkes/refine.py new file mode 100644 index 0000000..81fd547 --- /dev/null +++ b/hawkes/refine.py @@ -0,0 +1,43 @@ +from cPickle import load +from itertools import product +from math import exp, log + + +def iter_events(events): + for n, s in events.iteritems(): + for t in s: + yield (n, t) + + +def inprod(a, b): + return tuple(float(x * y) for x, y in zip(a, b)) + + +def ll(a): + lamb, alpha, mu = a + + r1 = sum(log(lamb + sum(alpha * w * mu * exp(-mu * (t1 - t2)) + for (n2, t2, w) in s)) + for ((n1, t1), s) in event_edges.iteritems()) + r2 = sum(sum(alpha * w * (1 - exp(-mu * (nodes[n2] - t1))) + for n2, w in edges[n1].iteritems() if nodes[n2] > t1) + for (n1, t1) in iter_events(events)) + r3 = lamb * sum(nodes.itervalues()) + return -(r1 - r2 - r3) + + +if __name__ == "__main__": + nodes, edges, events, event_edges = load(open("data.pickle", "rb")) + d = {} + for line in open("values.txt"): + v = map(float, line.strip().split()) + d[tuple(v[:3])] = v[3] + l = d.items() + l.sort(key=lambda x: x[1]) + for a, _ in l[:10]: + t = [1. / i for i in range(2, 6)] + [float(i) for i in range(1, 6)] + for b in product(t, repeat=3): + l, al, m = inprod(a, b) + if (l, al, m) in d: + continue + print l, al, m, ll((l, al, m)) diff --git a/hawkes/sanity.py b/hawkes/sanity.py new file mode 100644 index 0000000..c837757 --- /dev/null +++ b/hawkes/sanity.py @@ -0,0 +1,5 @@ +from cPickle import load + +if __name__ == "__main__": + nodes, edges, events, event_edges = load(open("data.pickle", "rb")) + print len(nodes) |
