diff options
Diffstat (limited to 'hawkes_experiments')
| -rw-r--r-- | hawkes_experiments/README.txt | 7 | ||||
| -rw-r--r-- | hawkes_experiments/cause.py | 72 | ||||
| -rw-r--r-- | hawkes_experiments/data.py | 71 | ||||
| -rw-r--r-- | hawkes_experiments/main.py | 151 | ||||
| -rw-r--r-- | hawkes_experiments/plot.py | 26 | ||||
| -rw-r--r-- | hawkes_experiments/refine.py | 70 | ||||
| -rw-r--r-- | hawkes_experiments/sanity.py | 16 |
7 files changed, 413 insertions, 0 deletions
diff --git a/hawkes_experiments/README.txt b/hawkes_experiments/README.txt new file mode 100644 index 0000000..3991d3b --- /dev/null +++ b/hawkes_experiments/README.txt @@ -0,0 +1,7 @@ + + +data and data2 - reformatting the data files + +main - most of the optimization + +cause - assign responsibility for each infection
\ No newline at end of file diff --git a/hawkes_experiments/cause.py b/hawkes_experiments/cause.py new file mode 100644 index 0000000..17ec884 --- /dev/null +++ b/hawkes_experiments/cause.py @@ -0,0 +1,72 @@ +from cPickle import load +from math import exp, sin +from collections import Counter +from csv import reader, writer +from data2 import parse +import sys +import networkx as nx +import matplotlib.pyplot as plt +import numpy as np + + +def get_fatals(): + with open(sys.argv[1]) as fh: + fh.readline() + r = reader(fh) + d = {i + 1: parse(row[7]) for (i, row) in enumerate(r)} + d = {k: v for k, v in d.iteritems() if v} + return d.items() + + +def cause(lamb, alpha, mu): + G = nx.DiGraph() + roots, droots, infections = 0, 0, 0 + fatal_droots, fatal_infections, fatal_roots = 0, 0, 0 + fatals = get_fatals() + for ((n1, t1), s) in event_edges.iteritems(): + G.add_node((n1, t1)) + if not s: + droots += 1 + if (n1, t1) in fatals: + fatal_droots += 1 + continue + background_rate = lamb * (1 + 0.43 * sin(0.0172 * t1 + 4.36)) + neighbors = sorted([(n2, t2, alpha / d * mu * exp(-mu * (t1 - t2))) + for (n2, t2, d) in s], reverse=True) + neighbor_rate = sum(e[2] for e in neighbors) + # if sum(e[2] for e in prl[:1]) > br: + # G.add_edge((n1, t1), tuple(prl[0][:2])) + if background_rate > neighbor_rate: + roots += 1 + if (n1, t1) in fatals: + fatal_roots += 1 + else: + G.add_edge((n1, t1), tuple(neighbors[0][:2])) + # l.append(prl[0][2] / br) + infections += 1 + if (n1, t1) in fatals: + fatal_infections += 1 + # l.sort(reverse=True) + # plt.plot(l) + # plt.show() + return (droots, roots, infections, fatal_droots, + fatal_roots, fatal_infections, G) + + +def analyze_graph(G): + counts = Counter(len(c) for c in nx.weakly_connected_components(G)) + w = writer(open("components_dist.csv", "w")) + w.writerows(counts.most_common()) + edges = ((n1, t1, n2, t2) for ((n1, t1), (n2, t2)) in G.edges_iter()) + e = writer(open("edges.csv", "w")) + e.writerows(edges) + + +if __name__ == "__main__": + nodes, edges, events, event_edges = load(open("data2.pickle", "rb")) + lamb, alpha, mu = 1.1847510744e-05, 0.00316718040144, 0.00393069204339 + # print len(event_edges), sum(len(e) for e in events.itervalues()) + # print len(fatal()) + (doors, roots, infections, fatal_droots, + fatal_roots, fatal_infections, G) = cause(lamb, alpha, mu) + analyze_graph(G) diff --git a/hawkes_experiments/data.py b/hawkes_experiments/data.py new file mode 100644 index 0000000..0f6135b --- /dev/null +++ b/hawkes_experiments/data.py @@ -0,0 +1,71 @@ +from csv import DictReader +import sys +from itertools import product +from cPickle import dump +from math import cos + +MAX_TIME = 3012 + + +def parse(s): + return None if s == "NA" else int(float(s)) + + +def fluctuation_int(t): + if t is None: + t = MAX_TIME + return (t, t + 0.43 / 0.0172 * (cos(4.36) - cos(0.0172 * t + 4.36))) + + +def load_nodes(filename): + with open(filename) as fh: + reader = DictReader(fh) + d = {parse(row["name"]): parse(row["fatal_day"]) for row in reader} + for n, t in d.iteritems(): + d[n] = fluctuation_int(t) + return d + + +def load_edges(filename): + events = {} + edges = {} + with open(filename) as fh: + reader = DictReader(fh) + for row in reader: + fro, to, t, dist = map(parse, [row["from"], row["to"], + row["t1"], row["dist"]]) + d = edges.get(fro, dict()) + d[to] = dist + edges[fro] = d + s = events.get(fro, set()) + s.add(t) + events[fro] = s + return edges, events + + +def compute_event_edges(events, edges): + event_edges = {} + + for fro in events: + for t in events[fro]: + event_edges[(fro, t)] = set() + + for fro in edges: + for to in edges[fro]: + try: + e1, e2 = events[fro], events[to] + except KeyError: + continue + for t1, t2 in product(e1, e2): + if t1 < t2: + s = event_edges[(to, t2)] + s.add((fro, t1, edges[fro][to])) + event_edges[(to, t2)] = s + return event_edges + + +if __name__ == "__main__": + nodes = load_nodes(sys.argv[1]) + edges, events = load_edges(sys.argv[2]) + event_edges = compute_event_edges(events, edges) + dump((nodes, edges, events, event_edges), open("data-all.pickle", "wb")) diff --git a/hawkes_experiments/main.py b/hawkes_experiments/main.py new file mode 100644 index 0000000..8f30dcf --- /dev/null +++ b/hawkes_experiments/main.py @@ -0,0 +1,151 @@ +from cPickle import load +from math import log, exp, sqrt, sin +from random import random, gauss +import sys +from itertools import product +from multiprocessing import Pool + +GR = (sqrt(5) - 1) / 2 + + +def gss(f, a, b, tol=1e-20): + """golden section search to minimize along one coordinate""" + c = b - GR * (b - a) + d = a + GR * (b - a) + while abs(c - d) > tol: + fc = f(c) + fd = f(d) + if fc < fd: + b = d + d = c + c = b - GR * (b - a) + else: + a = c + c = d + d = a + GR * (b - a) + sys.stderr.write("gss:" + " ".join(map(str, [a, b, fc, fd])) + "\n") + sys.stderr.flush() + return (b + a) / 2, fc + + +def iter_events(events): + for n, s in events.iteritems(): + for t in s: + yield (n, t) + + +def approx(x): + if x > 1e-10: + return 1 - exp(-x) + else: + return x + + +def ll(lamb, alpha, mu): + r1 = sum(log(lamb * (1 + 0.43 * sin(0.0172 * t1 + 4.36)) + + sum(alpha / d * mu * exp(-mu * (t1 - t2)) + for (n2, t2, d) in s)) + for ((n1, t1), s) in event_edges.iteritems()) + r2 = sum(sum(alpha / d * approx(mu * (nodes[n2][0] - t1)) + for n2, d in edges[n1].iteritems() + if nodes[n2][0] > t1) + for (n1, t1) in iter_events(events)) + r3 = lamb * sum(node[1] for node in nodes.itervalues()) + print r1, r2, r3 + return -(r1 - r2 - r3) + + +def sa(x, y, z, sigma=0.5, niter=1000, fc=None): + T = 0.1 + e = 1.1 + if fc: + fo = fc + else: + fo = ll(x, y, z) + for _ in xrange(niter): + sys.stderr.write("sa: " + " ".join(map(str, [T, sigma, x, y, z, fo])) + + "\n") + sys.stderr.flush() + yn = max(y + gauss(0, sigma * y), 0) + zn = max(z + gauss(0, sigma * z), 0) + fn = ll(x, yn, zn) + if fn < fo or exp((fo - fn) / T) > random(): + y = yn + z = zn + sigma *= 2 + fo = fn + else: + sigma /= e + if sigma < 1e-5: + break + T *= 0.99 + return y, z, fo + + +def optimize_with_sa(x, y, z, niter=10): + + def f(x): + return ll(x, y, z) + + # y, z = sa(x, y, z) + y, z, fc = sa(x, y, z) + for _ in xrange(niter): + x, fc = gss(f, 0, 1e-3, tol=1e-10) + y, z, fc = sa(x, y, z, fc=fc) + print x, y, z, fc + sys.stdout.flush() + + +def optimize_with_gss(x, y, z, niter=5): + + def f(x): + return ll(x, y, z) + + def g(y): + return ll(x, y, z) + + def h(z): + return ll(x, y, z) + + for _ in xrange(niter): + y, fc = gss(g, 0, 1e50, tol=1e-10) + z, fc = gss(h, 0, 1e50, tol=1e-10) + x, fc = gss(f, 0, 1e-3, tol=1e-10) + print x, y, z, fc + sys.stdout.flush() + + +def coarse_search(): + d = {} + for line in open("values.txt"): + v = map(float, line.strip().split()) + d[tuple(v[:3])] = v[3] + p = Pool(5) + lamb = [20. ** i for i in range(-10, 0)] + alpha = [20. ** i for i in range(-15, 15)] + mu = [20. ** i for i in range(-15, 15)] + + def aux(x): + if x in d: + return + l, a, m = x + print l, a, m, ll(l, a, m) + sys.stdout.flush() + + p.map(aux, product(lamb, alpha, mu)) + + +if __name__ == "__main__": + nodes, edges, events, event_edges = load(open("data-dist1.pickle", "rb")) + x = 1.25e-5 + y = 1.2e16 + z = 1.5e-20 + sa(x, y, z) + + # with open(sys.argv[1]) as fh: + # l = [map(float, line.strip().split()[:3]) for line in fh] + # for e in l: + # optimize_with_gss(*e) + + # print ll(x, y, z) + # coarse_search() diff --git a/hawkes_experiments/plot.py b/hawkes_experiments/plot.py new file mode 100644 index 0000000..24d3f35 --- /dev/null +++ b/hawkes_experiments/plot.py @@ -0,0 +1,26 @@ +from mpl_toolkits.mplot3d import Axes3D +from matplotlib import cm +import matplotlib.pyplot as plt + +fig = plt.figure() +ax = fig.gca(projection='3d') +d = {} +for line in open("values.txt"): + v = map(float, line.strip().split()) + try: + if (v[0], v[2]) in d: + d[(v[0], v[2])] = min(d[(v[0], v[2])], v[3]) + else: + d[(v[0], v[2])] = v[3] + except: + continue + +x, y, z = [], [], [] +for k, v in d.iteritems(): + x.append(k[0] / 1000000.) + y.append(k[1] / 1000000.) + z.append(v / 100000.) + +print x +surf = ax.plot_trisurf(x, y, z) +plt.show() diff --git a/hawkes_experiments/refine.py b/hawkes_experiments/refine.py new file mode 100644 index 0000000..02bbe07 --- /dev/null +++ b/hawkes_experiments/refine.py @@ -0,0 +1,70 @@ +from cPickle import load +from itertools import product +from math import exp, log, sin +import sys +from multiprocessing import Pool + + +def iter_events(events): + for n, s in events.iteritems(): + for t in s: + yield (n, t) + + +def inprod(a, b): + return tuple(float(x * y) for x, y in zip(a, b)) + + +def approx(x): + if x > 1e-10: + return 1 - exp(-x) + else: + return x + + +def ll(lamb, alpha, mu): + r1 = sum(log(lamb * (1 + 0.43 * sin(0.0172 * t1 + 4.36)) + + sum(alpha / d * mu * exp(-mu * (t1 - t2)) + for (n2, t2, d) in s)) + for ((n1, t1), s) in event_edges.iteritems()) + r2 = sum(sum(alpha / d * approx(mu * (nodes[n2][0] - t1)) + for n2, d in edges[n1].iteritems() + if nodes[n2][0] > t1) + for (n1, t1) in iter_events(events)) + r3 = lamb * sum(node[1] for node in nodes.itervalues()) + return -(r1 - r2 - r3) + + +def get_values(): + d = {} + for line in open(sys.argv[1]): + v = map(float, line.strip().split()) + d[tuple(v[:3])] = v[3] + l = d.items() + l.sort(key=lambda x: x[1]) + for line in open("refine.txt"): + v = map(float, line.strip().split()) + d[tuple(v[:3])] = v[3] + for a, _ in l[:100]: + t = [1. / i for i in range(2, 4)] + [float(i) for i in range(1, 4)] + for b in product(t, repeat=3): + l, al, m = inprod(a, b) + if (l, al, m) in d: + continue + yield (l, al, m) + + +def refine(): + p = Pool(5) + + def aux(x): + l, a, m = x + print l, a, m, ll(l, a, m) + sys.stdout.flush() + + p.map(aux, get_values()) + + +if __name__ == "__main__": + nodes, edges, events, event_edges = load(open("data-all.pickle", "rb")) + refine() diff --git a/hawkes_experiments/sanity.py b/hawkes_experiments/sanity.py new file mode 100644 index 0000000..b6f25eb --- /dev/null +++ b/hawkes_experiments/sanity.py @@ -0,0 +1,16 @@ +from cPickle import load +from csv import reader +from data2 import parse +import sys + + +def parse_row(row): + return set(e for e in map(parse, row[2:]) if e) + +if __name__ == "__main__": + nodes, edges, events, event_edges = load(open("data2.pickle", "rb")) + with open(sys.argv[1]) as fh: + fh.readline() + reader = reader(fh) + d = {parse(row[1]): parse_row(row) for row in reader} + d = {k: v for (k, v) in d.iteritems() if v} |
