summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2015-09-09 19:47:07 -0400
committerThibaut Horel <thibaut.horel@gmail.com>2015-09-09 19:47:07 -0400
commit26c65bfeeb22a46e6b688ea3765b9a6e0479f748 (patch)
tree2fa4a9ecfc2cd6e817642a2fbc79adf02e313f95
parent485ebd6d5b34ff2d8f6761f2dda44ea3e2b32aa6 (diff)
downloadcriminal_cascades-26c65bfeeb22a46e6b688ea3765b9a6e0479f748.tar.gz
Add optimization code
-rw-r--r--hawkes/data2.py65
-rw-r--r--hawkes/main.py133
-rw-r--r--hawkes/refine.py2
-rw-r--r--hawkes/sanity.py18
4 files changed, 198 insertions, 20 deletions
diff --git a/hawkes/data2.py b/hawkes/data2.py
new file mode 100644
index 0000000..c091e7a
--- /dev/null
+++ b/hawkes/data2.py
@@ -0,0 +1,65 @@
+from csv import DictReader
+import sys
+from itertools import product
+from cPickle import dump
+
+MAX_TIME = 3012
+
+
+def parse(s):
+ return None if s == "NA" else int(float(s))
+
+
+def load_nodes(filename):
+ with open(filename) as fh:
+ reader = DictReader(fh)
+ d = {parse(row["name"]): parse(row["fatal_day"]) for row in reader}
+ for n, t in d.iteritems():
+ if t is None:
+ d[n] = MAX_TIME
+ return d
+
+
+def load_edges(filename):
+ events = {}
+ edges = {}
+ with open(filename) as fh:
+ reader = DictReader(fh)
+ for row in reader:
+ fro, to, t, dist = map(parse, [row["from"], row["to"],
+ row["t1"], row["dist"]])
+ d = edges.get(fro, dict())
+ d[to] = dist
+ edges[fro] = d
+ s = events.get(fro, set())
+ s.add(t)
+ events[fro] = s
+ return edges, events
+
+
+def compute_event_edges(events, edges):
+ event_edges = {}
+
+ for fro in events:
+ for t in events[fro]:
+ event_edges[(fro, t)] = set()
+
+ for fro in edges:
+ for to in edges[fro]:
+ try:
+ e1, e2 = events[fro], events[to]
+ except KeyError:
+ continue
+ for t1, t2 in product(e1, e2):
+ if t1 < t2:
+ s = event_edges[(to, t2)]
+ s.add((fro, t1, edges[fro][to]))
+ event_edges[(to, t2)] = s
+ return event_edges
+
+
+if __name__ == "__main__":
+ nodes = load_nodes(sys.argv[1])
+ edges, events = load_edges(sys.argv[2])
+ event_edges = compute_event_edges(events, edges)
+ dump((nodes, edges, events, event_edges), open("data2.pickle", "wb"))
diff --git a/hawkes/main.py b/hawkes/main.py
index 65586ad..d98f6a0 100644
--- a/hawkes/main.py
+++ b/hawkes/main.py
@@ -1,8 +1,31 @@
from cPickle import load
-from math import log, exp
-#import numpy as np
-#from scipy.optimize import basinhopping
+from math import log, exp, sqrt
+from random import random, gauss
+import sys
from itertools import product
+from multiprocessing import Pool
+# import numpy as np
+# from scipy.optimize import basinhopping
+
+GR = (sqrt(5) - 1) / 2
+
+
+def gss(f, a, b, tol=1e-20):
+ """golden section search to minimize along one coordinate"""
+ c = b - GR * (b - a)
+ d = a + GR * (b - a)
+ while abs(c - d) > tol:
+ fc = f(c)
+ fd = f(d)
+ if fc < fd:
+ b = d
+ d = c
+ c = b - GR * (b - a)
+ else:
+ a = c
+ c = d
+ d = a + GR * (b - a)
+ return (b + a) / 2
def iter_events(events):
@@ -11,9 +34,7 @@ def iter_events(events):
yield (n, t)
-def ll(a):
- lamb, alpha, mu = a
-
+def ll_old(lamb, alpha, mu):
r1 = sum(log(lamb + sum(alpha * w * mu * exp(-mu * (t1 - t2))
for (n2, t2, w) in s))
for ((n1, t1), s) in event_edges.iteritems())
@@ -24,20 +45,98 @@ def ll(a):
return -(r1 - r2 - r3)
-if __name__ == "__main__":
- nodes, edges, events, event_edges = load(open("data.pickle", "rb"))
+def ll(lamb, alpha, mu):
+ r1 = sum(log(lamb + sum(alpha / d * mu * exp(-mu * (t1 - t2))
+ for (n2, t2, d) in s))
+ for ((n1, t1), s) in event_edges.iteritems())
+ r2 = sum(sum(alpha / d * (1 - exp(-mu * (nodes[n2] - t1)))
+ for n2, d in edges[n1].iteritems() if nodes[n2] > t1)
+ for (n1, t1) in iter_events(events))
+ r3 = lamb * sum(nodes.itervalues())
+ return -(r1 - r2 - r3)
+
+
+def sa(x, y, z, sigma=0.1, niter=100):
+ T = 1
+ e = 1.01
+ for _ in xrange(niter):
+ fo = ll(x, y, z)
+ #print T, sigma, x, y, z, fo
+ yn = max(y + gauss(0, sigma * y + 1e-10), 0)
+ zn = max(z + gauss(0, sigma * z + 1e-10), 0)
+ fn = ll(x, yn, zn)
+ if fn < fo or exp((fo - fn) / T) > random():
+ y = yn
+ z = zn
+ sigma *= e
+ else:
+ sigma /= e
+ if sigma < 1e-5:
+ break
+ T *= 0.99
+ return y, z
+
+
+def optimize_with_sa(x, y, z):
+ #y, z = sa(x, y, z)
+ while True:
+ x = gss(f, 0, 1, tol=1e-10)
+ y, z = sa(x, y, z)
+ print x, y, z, ll(x, y, z)
+ sys.stdout.flush()
+
+
+def optimize_with_gss(x, y, z):
+ while True:
+ x = gss(f, 0, 1, tol=1e-20)
+ y = gss(g, 0, 1, tol=1e-20)
+ z = gss(h, 0, 1, tol=1e-20)
+ print x, y, z, ll(x, y, z)
+ sys.stdout.flush()
+
+
+def coarse_search():
d = {}
for line in open("values.txt"):
v = map(float, line.strip().split())
d[tuple(v[:3])] = v[3]
+ p = Pool(5)
+ lamb = [20. ** i for i in range(-15, 15)]
+ alpha = [20. ** i for i in range(-15, 15)]
+ mu = [20. ** i for i in range(-15, 15)]
+
+ def aux(x):
+ if x in d:
+ return
+ l, a, m = x
+ print l, a, m, ll(l, a, m)
- # lamb = [20. ** i for i in range(-15, 15)]
- # alpha = [20. ** i for i in range(-15, 15)]
- # mu = [20. ** i for i in range(-15, 15)]
+ p.map(aux, product(lamb, alpha, mu))
# for l, a, m in product(lamb, alpha, mu):
- # if (l, a, m) in d:
- # continue
- # print l, a, m, ll((l, a, m))
- print ll((2, 10000000000., 0.000000000000000000001))
- #r = basinhopping(ll, init, disp=True, stepsize=0.1, T=10000., niter=500)
- #print r.x
+ # print l, a, m, ll(*(l, a, m))
+
+
+if __name__ == "__main__":
+ nodes, edges, events, event_edges = load(open("data2.pickle", "rb"))
+
+ def f(x):
+ global y, z
+ return ll(x, y, z)
+
+ def g(y):
+ global x, z
+ return ll(x, y, z)
+
+ def h(z):
+ global x, y
+ return ll(x, y, z)
+
+ # x, y, z = map(float, sys.argv[1:])
+ x = 1.8e-5
+ y = 1.
+ z = 1.
+ #x, y, z = 1.8387870804e-05, 0.0383954874502, 0.00108402386463
+ x, y, z = 0.000125, 0.000125, 0.000125
+ #optimize_with_sa(x, y, z)
+ #optimize_with_gss(x, y, z)
+ coarse_search()
diff --git a/hawkes/refine.py b/hawkes/refine.py
index 81fd547..675bee3 100644
--- a/hawkes/refine.py
+++ b/hawkes/refine.py
@@ -29,7 +29,7 @@ def ll(a):
if __name__ == "__main__":
nodes, edges, events, event_edges = load(open("data.pickle", "rb"))
d = {}
- for line in open("values.txt"):
+ for line in open("test.txt"):
v = map(float, line.strip().split())
d[tuple(v[:3])] = v[3]
l = d.items()
diff --git a/hawkes/sanity.py b/hawkes/sanity.py
index c837757..529203c 100644
--- a/hawkes/sanity.py
+++ b/hawkes/sanity.py
@@ -1,5 +1,19 @@
from cPickle import load
+from csv import reader
+from data2 import parse
+import sys
+
+
+def parse_row(row):
+ return set(e for e in map(parse, row[2:]) if e)
+
if __name__ == "__main__":
- nodes, edges, events, event_edges = load(open("data.pickle", "rb"))
- print len(nodes)
+ nodes, edges, events, event_edges = load(open("data2.pickle", "rb"))
+ with open(sys.argv[1]) as fh:
+ fh.readline()
+ reader = reader(fh)
+ d = {parse(row[1]): parse_row(row) for row in reader}
+ d = {k: v for (k, v) in d.iteritems() if v}
+ for k in d:
+ print len(d[k])