diff options
| -rw-r--r-- | hawkes/data2.py | 65 | ||||
| -rw-r--r-- | hawkes/main.py | 133 | ||||
| -rw-r--r-- | hawkes/refine.py | 2 | ||||
| -rw-r--r-- | hawkes/sanity.py | 18 |
4 files changed, 198 insertions, 20 deletions
diff --git a/hawkes/data2.py b/hawkes/data2.py new file mode 100644 index 0000000..c091e7a --- /dev/null +++ b/hawkes/data2.py @@ -0,0 +1,65 @@ +from csv import DictReader +import sys +from itertools import product +from cPickle import dump + +MAX_TIME = 3012 + + +def parse(s): + return None if s == "NA" else int(float(s)) + + +def load_nodes(filename): + with open(filename) as fh: + reader = DictReader(fh) + d = {parse(row["name"]): parse(row["fatal_day"]) for row in reader} + for n, t in d.iteritems(): + if t is None: + d[n] = MAX_TIME + return d + + +def load_edges(filename): + events = {} + edges = {} + with open(filename) as fh: + reader = DictReader(fh) + for row in reader: + fro, to, t, dist = map(parse, [row["from"], row["to"], + row["t1"], row["dist"]]) + d = edges.get(fro, dict()) + d[to] = dist + edges[fro] = d + s = events.get(fro, set()) + s.add(t) + events[fro] = s + return edges, events + + +def compute_event_edges(events, edges): + event_edges = {} + + for fro in events: + for t in events[fro]: + event_edges[(fro, t)] = set() + + for fro in edges: + for to in edges[fro]: + try: + e1, e2 = events[fro], events[to] + except KeyError: + continue + for t1, t2 in product(e1, e2): + if t1 < t2: + s = event_edges[(to, t2)] + s.add((fro, t1, edges[fro][to])) + event_edges[(to, t2)] = s + return event_edges + + +if __name__ == "__main__": + nodes = load_nodes(sys.argv[1]) + edges, events = load_edges(sys.argv[2]) + event_edges = compute_event_edges(events, edges) + dump((nodes, edges, events, event_edges), open("data2.pickle", "wb")) diff --git a/hawkes/main.py b/hawkes/main.py index 65586ad..d98f6a0 100644 --- a/hawkes/main.py +++ b/hawkes/main.py @@ -1,8 +1,31 @@ from cPickle import load -from math import log, exp -#import numpy as np -#from scipy.optimize import basinhopping +from math import log, exp, sqrt +from random import random, gauss +import sys from itertools import product +from multiprocessing import Pool +# import numpy as np +# from scipy.optimize import basinhopping + +GR = (sqrt(5) - 1) / 2 + + +def gss(f, a, b, tol=1e-20): + """golden section search to minimize along one coordinate""" + c = b - GR * (b - a) + d = a + GR * (b - a) + while abs(c - d) > tol: + fc = f(c) + fd = f(d) + if fc < fd: + b = d + d = c + c = b - GR * (b - a) + else: + a = c + c = d + d = a + GR * (b - a) + return (b + a) / 2 def iter_events(events): @@ -11,9 +34,7 @@ def iter_events(events): yield (n, t) -def ll(a): - lamb, alpha, mu = a - +def ll_old(lamb, alpha, mu): r1 = sum(log(lamb + sum(alpha * w * mu * exp(-mu * (t1 - t2)) for (n2, t2, w) in s)) for ((n1, t1), s) in event_edges.iteritems()) @@ -24,20 +45,98 @@ def ll(a): return -(r1 - r2 - r3) -if __name__ == "__main__": - nodes, edges, events, event_edges = load(open("data.pickle", "rb")) +def ll(lamb, alpha, mu): + r1 = sum(log(lamb + sum(alpha / d * mu * exp(-mu * (t1 - t2)) + for (n2, t2, d) in s)) + for ((n1, t1), s) in event_edges.iteritems()) + r2 = sum(sum(alpha / d * (1 - exp(-mu * (nodes[n2] - t1))) + for n2, d in edges[n1].iteritems() if nodes[n2] > t1) + for (n1, t1) in iter_events(events)) + r3 = lamb * sum(nodes.itervalues()) + return -(r1 - r2 - r3) + + +def sa(x, y, z, sigma=0.1, niter=100): + T = 1 + e = 1.01 + for _ in xrange(niter): + fo = ll(x, y, z) + #print T, sigma, x, y, z, fo + yn = max(y + gauss(0, sigma * y + 1e-10), 0) + zn = max(z + gauss(0, sigma * z + 1e-10), 0) + fn = ll(x, yn, zn) + if fn < fo or exp((fo - fn) / T) > random(): + y = yn + z = zn + sigma *= e + else: + sigma /= e + if sigma < 1e-5: + break + T *= 0.99 + return y, z + + +def optimize_with_sa(x, y, z): + #y, z = sa(x, y, z) + while True: + x = gss(f, 0, 1, tol=1e-10) + y, z = sa(x, y, z) + print x, y, z, ll(x, y, z) + sys.stdout.flush() + + +def optimize_with_gss(x, y, z): + while True: + x = gss(f, 0, 1, tol=1e-20) + y = gss(g, 0, 1, tol=1e-20) + z = gss(h, 0, 1, tol=1e-20) + print x, y, z, ll(x, y, z) + sys.stdout.flush() + + +def coarse_search(): d = {} for line in open("values.txt"): v = map(float, line.strip().split()) d[tuple(v[:3])] = v[3] + p = Pool(5) + lamb = [20. ** i for i in range(-15, 15)] + alpha = [20. ** i for i in range(-15, 15)] + mu = [20. ** i for i in range(-15, 15)] + + def aux(x): + if x in d: + return + l, a, m = x + print l, a, m, ll(l, a, m) - # lamb = [20. ** i for i in range(-15, 15)] - # alpha = [20. ** i for i in range(-15, 15)] - # mu = [20. ** i for i in range(-15, 15)] + p.map(aux, product(lamb, alpha, mu)) # for l, a, m in product(lamb, alpha, mu): - # if (l, a, m) in d: - # continue - # print l, a, m, ll((l, a, m)) - print ll((2, 10000000000., 0.000000000000000000001)) - #r = basinhopping(ll, init, disp=True, stepsize=0.1, T=10000., niter=500) - #print r.x + # print l, a, m, ll(*(l, a, m)) + + +if __name__ == "__main__": + nodes, edges, events, event_edges = load(open("data2.pickle", "rb")) + + def f(x): + global y, z + return ll(x, y, z) + + def g(y): + global x, z + return ll(x, y, z) + + def h(z): + global x, y + return ll(x, y, z) + + # x, y, z = map(float, sys.argv[1:]) + x = 1.8e-5 + y = 1. + z = 1. + #x, y, z = 1.8387870804e-05, 0.0383954874502, 0.00108402386463 + x, y, z = 0.000125, 0.000125, 0.000125 + #optimize_with_sa(x, y, z) + #optimize_with_gss(x, y, z) + coarse_search() diff --git a/hawkes/refine.py b/hawkes/refine.py index 81fd547..675bee3 100644 --- a/hawkes/refine.py +++ b/hawkes/refine.py @@ -29,7 +29,7 @@ def ll(a): if __name__ == "__main__": nodes, edges, events, event_edges = load(open("data.pickle", "rb")) d = {} - for line in open("values.txt"): + for line in open("test.txt"): v = map(float, line.strip().split()) d[tuple(v[:3])] = v[3] l = d.items() diff --git a/hawkes/sanity.py b/hawkes/sanity.py index c837757..529203c 100644 --- a/hawkes/sanity.py +++ b/hawkes/sanity.py @@ -1,5 +1,19 @@ from cPickle import load +from csv import reader +from data2 import parse +import sys + + +def parse_row(row): + return set(e for e in map(parse, row[2:]) if e) + if __name__ == "__main__": - nodes, edges, events, event_edges = load(open("data.pickle", "rb")) - print len(nodes) + nodes, edges, events, event_edges = load(open("data2.pickle", "rb")) + with open(sys.argv[1]) as fh: + fh.readline() + reader = reader(fh) + d = {parse(row[1]): parse_row(row) for row in reader} + d = {k: v for (k, v) in d.iteritems() if v} + for k in d: + print len(d[k]) |
