summaryrefslogtreecommitdiffstats
path: root/hawkes_experiments
diff options
context:
space:
mode:
Diffstat (limited to 'hawkes_experiments')
-rw-r--r--hawkes_experiments/README.txt7
-rw-r--r--hawkes_experiments/cause.py72
-rw-r--r--hawkes_experiments/data.py71
-rw-r--r--hawkes_experiments/main.py151
-rw-r--r--hawkes_experiments/plot.py26
-rw-r--r--hawkes_experiments/refine.py70
-rw-r--r--hawkes_experiments/sanity.py16
7 files changed, 413 insertions, 0 deletions
diff --git a/hawkes_experiments/README.txt b/hawkes_experiments/README.txt
new file mode 100644
index 0000000..3991d3b
--- /dev/null
+++ b/hawkes_experiments/README.txt
@@ -0,0 +1,7 @@
+
+
+data and data2 - reformatting the data files
+
+main - most of the optimization
+
+cause - assign responsibility for each infection \ No newline at end of file
diff --git a/hawkes_experiments/cause.py b/hawkes_experiments/cause.py
new file mode 100644
index 0000000..17ec884
--- /dev/null
+++ b/hawkes_experiments/cause.py
@@ -0,0 +1,72 @@
+from cPickle import load
+from math import exp, sin
+from collections import Counter
+from csv import reader, writer
+from data2 import parse
+import sys
+import networkx as nx
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+def get_fatals():
+ with open(sys.argv[1]) as fh:
+ fh.readline()
+ r = reader(fh)
+ d = {i + 1: parse(row[7]) for (i, row) in enumerate(r)}
+ d = {k: v for k, v in d.iteritems() if v}
+ return d.items()
+
+
+def cause(lamb, alpha, mu):
+ G = nx.DiGraph()
+ roots, droots, infections = 0, 0, 0
+ fatal_droots, fatal_infections, fatal_roots = 0, 0, 0
+ fatals = get_fatals()
+ for ((n1, t1), s) in event_edges.iteritems():
+ G.add_node((n1, t1))
+ if not s:
+ droots += 1
+ if (n1, t1) in fatals:
+ fatal_droots += 1
+ continue
+ background_rate = lamb * (1 + 0.43 * sin(0.0172 * t1 + 4.36))
+ neighbors = sorted([(n2, t2, alpha / d * mu * exp(-mu * (t1 - t2)))
+ for (n2, t2, d) in s], reverse=True)
+ neighbor_rate = sum(e[2] for e in neighbors)
+ # if sum(e[2] for e in prl[:1]) > br:
+ # G.add_edge((n1, t1), tuple(prl[0][:2]))
+ if background_rate > neighbor_rate:
+ roots += 1
+ if (n1, t1) in fatals:
+ fatal_roots += 1
+ else:
+ G.add_edge((n1, t1), tuple(neighbors[0][:2]))
+ # l.append(prl[0][2] / br)
+ infections += 1
+ if (n1, t1) in fatals:
+ fatal_infections += 1
+ # l.sort(reverse=True)
+ # plt.plot(l)
+ # plt.show()
+ return (droots, roots, infections, fatal_droots,
+ fatal_roots, fatal_infections, G)
+
+
+def analyze_graph(G):
+ counts = Counter(len(c) for c in nx.weakly_connected_components(G))
+ w = writer(open("components_dist.csv", "w"))
+ w.writerows(counts.most_common())
+ edges = ((n1, t1, n2, t2) for ((n1, t1), (n2, t2)) in G.edges_iter())
+ e = writer(open("edges.csv", "w"))
+ e.writerows(edges)
+
+
+if __name__ == "__main__":
+ nodes, edges, events, event_edges = load(open("data2.pickle", "rb"))
+ lamb, alpha, mu = 1.1847510744e-05, 0.00316718040144, 0.00393069204339
+ # print len(event_edges), sum(len(e) for e in events.itervalues())
+ # print len(fatal())
+ (doors, roots, infections, fatal_droots,
+ fatal_roots, fatal_infections, G) = cause(lamb, alpha, mu)
+ analyze_graph(G)
diff --git a/hawkes_experiments/data.py b/hawkes_experiments/data.py
new file mode 100644
index 0000000..0f6135b
--- /dev/null
+++ b/hawkes_experiments/data.py
@@ -0,0 +1,71 @@
+from csv import DictReader
+import sys
+from itertools import product
+from cPickle import dump
+from math import cos
+
+MAX_TIME = 3012
+
+
+def parse(s):
+ return None if s == "NA" else int(float(s))
+
+
+def fluctuation_int(t):
+ if t is None:
+ t = MAX_TIME
+ return (t, t + 0.43 / 0.0172 * (cos(4.36) - cos(0.0172 * t + 4.36)))
+
+
+def load_nodes(filename):
+ with open(filename) as fh:
+ reader = DictReader(fh)
+ d = {parse(row["name"]): parse(row["fatal_day"]) for row in reader}
+ for n, t in d.iteritems():
+ d[n] = fluctuation_int(t)
+ return d
+
+
+def load_edges(filename):
+ events = {}
+ edges = {}
+ with open(filename) as fh:
+ reader = DictReader(fh)
+ for row in reader:
+ fro, to, t, dist = map(parse, [row["from"], row["to"],
+ row["t1"], row["dist"]])
+ d = edges.get(fro, dict())
+ d[to] = dist
+ edges[fro] = d
+ s = events.get(fro, set())
+ s.add(t)
+ events[fro] = s
+ return edges, events
+
+
+def compute_event_edges(events, edges):
+ event_edges = {}
+
+ for fro in events:
+ for t in events[fro]:
+ event_edges[(fro, t)] = set()
+
+ for fro in edges:
+ for to in edges[fro]:
+ try:
+ e1, e2 = events[fro], events[to]
+ except KeyError:
+ continue
+ for t1, t2 in product(e1, e2):
+ if t1 < t2:
+ s = event_edges[(to, t2)]
+ s.add((fro, t1, edges[fro][to]))
+ event_edges[(to, t2)] = s
+ return event_edges
+
+
+if __name__ == "__main__":
+ nodes = load_nodes(sys.argv[1])
+ edges, events = load_edges(sys.argv[2])
+ event_edges = compute_event_edges(events, edges)
+ dump((nodes, edges, events, event_edges), open("data-all.pickle", "wb"))
diff --git a/hawkes_experiments/main.py b/hawkes_experiments/main.py
new file mode 100644
index 0000000..8f30dcf
--- /dev/null
+++ b/hawkes_experiments/main.py
@@ -0,0 +1,151 @@
+from cPickle import load
+from math import log, exp, sqrt, sin
+from random import random, gauss
+import sys
+from itertools import product
+from multiprocessing import Pool
+
+GR = (sqrt(5) - 1) / 2
+
+
+def gss(f, a, b, tol=1e-20):
+ """golden section search to minimize along one coordinate"""
+ c = b - GR * (b - a)
+ d = a + GR * (b - a)
+ while abs(c - d) > tol:
+ fc = f(c)
+ fd = f(d)
+ if fc < fd:
+ b = d
+ d = c
+ c = b - GR * (b - a)
+ else:
+ a = c
+ c = d
+ d = a + GR * (b - a)
+ sys.stderr.write("gss:" + " ".join(map(str, [a, b, fc, fd])) + "\n")
+ sys.stderr.flush()
+ return (b + a) / 2, fc
+
+
+def iter_events(events):
+ for n, s in events.iteritems():
+ for t in s:
+ yield (n, t)
+
+
+def approx(x):
+ if x > 1e-10:
+ return 1 - exp(-x)
+ else:
+ return x
+
+
+def ll(lamb, alpha, mu):
+ r1 = sum(log(lamb * (1 + 0.43 * sin(0.0172 * t1 + 4.36))
+ + sum(alpha / d * mu * exp(-mu * (t1 - t2))
+ for (n2, t2, d) in s))
+ for ((n1, t1), s) in event_edges.iteritems())
+ r2 = sum(sum(alpha / d * approx(mu * (nodes[n2][0] - t1))
+ for n2, d in edges[n1].iteritems()
+ if nodes[n2][0] > t1)
+ for (n1, t1) in iter_events(events))
+ r3 = lamb * sum(node[1] for node in nodes.itervalues())
+ print r1, r2, r3
+ return -(r1 - r2 - r3)
+
+
+def sa(x, y, z, sigma=0.5, niter=1000, fc=None):
+ T = 0.1
+ e = 1.1
+ if fc:
+ fo = fc
+ else:
+ fo = ll(x, y, z)
+ for _ in xrange(niter):
+ sys.stderr.write("sa: " + " ".join(map(str, [T, sigma, x, y, z, fo]))
+ + "\n")
+ sys.stderr.flush()
+ yn = max(y + gauss(0, sigma * y), 0)
+ zn = max(z + gauss(0, sigma * z), 0)
+ fn = ll(x, yn, zn)
+ if fn < fo or exp((fo - fn) / T) > random():
+ y = yn
+ z = zn
+ sigma *= 2
+ fo = fn
+ else:
+ sigma /= e
+ if sigma < 1e-5:
+ break
+ T *= 0.99
+ return y, z, fo
+
+
+def optimize_with_sa(x, y, z, niter=10):
+
+ def f(x):
+ return ll(x, y, z)
+
+ # y, z = sa(x, y, z)
+ y, z, fc = sa(x, y, z)
+ for _ in xrange(niter):
+ x, fc = gss(f, 0, 1e-3, tol=1e-10)
+ y, z, fc = sa(x, y, z, fc=fc)
+ print x, y, z, fc
+ sys.stdout.flush()
+
+
+def optimize_with_gss(x, y, z, niter=5):
+
+ def f(x):
+ return ll(x, y, z)
+
+ def g(y):
+ return ll(x, y, z)
+
+ def h(z):
+ return ll(x, y, z)
+
+ for _ in xrange(niter):
+ y, fc = gss(g, 0, 1e50, tol=1e-10)
+ z, fc = gss(h, 0, 1e50, tol=1e-10)
+ x, fc = gss(f, 0, 1e-3, tol=1e-10)
+ print x, y, z, fc
+ sys.stdout.flush()
+
+
+def coarse_search():
+ d = {}
+ for line in open("values.txt"):
+ v = map(float, line.strip().split())
+ d[tuple(v[:3])] = v[3]
+ p = Pool(5)
+ lamb = [20. ** i for i in range(-10, 0)]
+ alpha = [20. ** i for i in range(-15, 15)]
+ mu = [20. ** i for i in range(-15, 15)]
+
+ def aux(x):
+ if x in d:
+ return
+ l, a, m = x
+ print l, a, m, ll(l, a, m)
+ sys.stdout.flush()
+
+ p.map(aux, product(lamb, alpha, mu))
+
+
+if __name__ == "__main__":
+ nodes, edges, events, event_edges = load(open("data-dist1.pickle", "rb"))
+ x = 1.25e-5
+ y = 1.2e16
+ z = 1.5e-20
+ sa(x, y, z)
+
+ # with open(sys.argv[1]) as fh:
+ # l = [map(float, line.strip().split()[:3]) for line in fh]
+ # for e in l:
+ # optimize_with_gss(*e)
+
+ # print ll(x, y, z)
+ # coarse_search()
diff --git a/hawkes_experiments/plot.py b/hawkes_experiments/plot.py
new file mode 100644
index 0000000..24d3f35
--- /dev/null
+++ b/hawkes_experiments/plot.py
@@ -0,0 +1,26 @@
+from mpl_toolkits.mplot3d import Axes3D
+from matplotlib import cm
+import matplotlib.pyplot as plt
+
+fig = plt.figure()
+ax = fig.gca(projection='3d')
+d = {}
+for line in open("values.txt"):
+ v = map(float, line.strip().split())
+ try:
+ if (v[0], v[2]) in d:
+ d[(v[0], v[2])] = min(d[(v[0], v[2])], v[3])
+ else:
+ d[(v[0], v[2])] = v[3]
+ except:
+ continue
+
+x, y, z = [], [], []
+for k, v in d.iteritems():
+ x.append(k[0] / 1000000.)
+ y.append(k[1] / 1000000.)
+ z.append(v / 100000.)
+
+print x
+surf = ax.plot_trisurf(x, y, z)
+plt.show()
diff --git a/hawkes_experiments/refine.py b/hawkes_experiments/refine.py
new file mode 100644
index 0000000..02bbe07
--- /dev/null
+++ b/hawkes_experiments/refine.py
@@ -0,0 +1,70 @@
+from cPickle import load
+from itertools import product
+from math import exp, log, sin
+import sys
+from multiprocessing import Pool
+
+
+def iter_events(events):
+ for n, s in events.iteritems():
+ for t in s:
+ yield (n, t)
+
+
+def inprod(a, b):
+ return tuple(float(x * y) for x, y in zip(a, b))
+
+
+def approx(x):
+ if x > 1e-10:
+ return 1 - exp(-x)
+ else:
+ return x
+
+
+def ll(lamb, alpha, mu):
+ r1 = sum(log(lamb * (1 + 0.43 * sin(0.0172 * t1 + 4.36))
+ + sum(alpha / d * mu * exp(-mu * (t1 - t2))
+ for (n2, t2, d) in s))
+ for ((n1, t1), s) in event_edges.iteritems())
+ r2 = sum(sum(alpha / d * approx(mu * (nodes[n2][0] - t1))
+ for n2, d in edges[n1].iteritems()
+ if nodes[n2][0] > t1)
+ for (n1, t1) in iter_events(events))
+ r3 = lamb * sum(node[1] for node in nodes.itervalues())
+ return -(r1 - r2 - r3)
+
+
+def get_values():
+ d = {}
+ for line in open(sys.argv[1]):
+ v = map(float, line.strip().split())
+ d[tuple(v[:3])] = v[3]
+ l = d.items()
+ l.sort(key=lambda x: x[1])
+ for line in open("refine.txt"):
+ v = map(float, line.strip().split())
+ d[tuple(v[:3])] = v[3]
+ for a, _ in l[:100]:
+ t = [1. / i for i in range(2, 4)] + [float(i) for i in range(1, 4)]
+ for b in product(t, repeat=3):
+ l, al, m = inprod(a, b)
+ if (l, al, m) in d:
+ continue
+ yield (l, al, m)
+
+
+def refine():
+ p = Pool(5)
+
+ def aux(x):
+ l, a, m = x
+ print l, a, m, ll(l, a, m)
+ sys.stdout.flush()
+
+ p.map(aux, get_values())
+
+
+if __name__ == "__main__":
+ nodes, edges, events, event_edges = load(open("data-all.pickle", "rb"))
+ refine()
diff --git a/hawkes_experiments/sanity.py b/hawkes_experiments/sanity.py
new file mode 100644
index 0000000..b6f25eb
--- /dev/null
+++ b/hawkes_experiments/sanity.py
@@ -0,0 +1,16 @@
+from cPickle import load
+from csv import reader
+from data2 import parse
+import sys
+
+
+def parse_row(row):
+ return set(e for e in map(parse, row[2:]) if e)
+
+if __name__ == "__main__":
+ nodes, edges, events, event_edges = load(open("data2.pickle", "rb"))
+ with open(sys.argv[1]) as fh:
+ fh.readline()
+ reader = reader(fh)
+ d = {parse(row[1]): parse_row(row) for row in reader}
+ d = {k: v for (k, v) in d.iteritems() if v}