diff options
Diffstat (limited to 'hawkes')
| -rw-r--r-- | hawkes/README.txt | 7 | ||||
| -rw-r--r-- | hawkes/cause.py | 72 | ||||
| -rw-r--r-- | hawkes/data.py | 71 | ||||
| -rw-r--r-- | hawkes/main.py | 151 | ||||
| -rw-r--r-- | hawkes/plot.py | 26 | ||||
| -rw-r--r-- | hawkes/refine.py | 70 | ||||
| -rw-r--r-- | hawkes/sanity.py | 16 |
7 files changed, 0 insertions, 413 deletions
diff --git a/hawkes/README.txt b/hawkes/README.txt deleted file mode 100644 index 3991d3b..0000000 --- a/hawkes/README.txt +++ /dev/null @@ -1,7 +0,0 @@ - - -data and data2 - reformatting the data files - -main - most of the optimization - -cause - assign responsibility for each infection
\ No newline at end of file diff --git a/hawkes/cause.py b/hawkes/cause.py deleted file mode 100644 index 17ec884..0000000 --- a/hawkes/cause.py +++ /dev/null @@ -1,72 +0,0 @@ -from cPickle import load -from math import exp, sin -from collections import Counter -from csv import reader, writer -from data2 import parse -import sys -import networkx as nx -import matplotlib.pyplot as plt -import numpy as np - - -def get_fatals(): - with open(sys.argv[1]) as fh: - fh.readline() - r = reader(fh) - d = {i + 1: parse(row[7]) for (i, row) in enumerate(r)} - d = {k: v for k, v in d.iteritems() if v} - return d.items() - - -def cause(lamb, alpha, mu): - G = nx.DiGraph() - roots, droots, infections = 0, 0, 0 - fatal_droots, fatal_infections, fatal_roots = 0, 0, 0 - fatals = get_fatals() - for ((n1, t1), s) in event_edges.iteritems(): - G.add_node((n1, t1)) - if not s: - droots += 1 - if (n1, t1) in fatals: - fatal_droots += 1 - continue - background_rate = lamb * (1 + 0.43 * sin(0.0172 * t1 + 4.36)) - neighbors = sorted([(n2, t2, alpha / d * mu * exp(-mu * (t1 - t2))) - for (n2, t2, d) in s], reverse=True) - neighbor_rate = sum(e[2] for e in neighbors) - # if sum(e[2] for e in prl[:1]) > br: - # G.add_edge((n1, t1), tuple(prl[0][:2])) - if background_rate > neighbor_rate: - roots += 1 - if (n1, t1) in fatals: - fatal_roots += 1 - else: - G.add_edge((n1, t1), tuple(neighbors[0][:2])) - # l.append(prl[0][2] / br) - infections += 1 - if (n1, t1) in fatals: - fatal_infections += 1 - # l.sort(reverse=True) - # plt.plot(l) - # plt.show() - return (droots, roots, infections, fatal_droots, - fatal_roots, fatal_infections, G) - - -def analyze_graph(G): - counts = Counter(len(c) for c in nx.weakly_connected_components(G)) - w = writer(open("components_dist.csv", "w")) - w.writerows(counts.most_common()) - edges = ((n1, t1, n2, t2) for ((n1, t1), (n2, t2)) in G.edges_iter()) - e = writer(open("edges.csv", "w")) - e.writerows(edges) - - -if __name__ == "__main__": - nodes, edges, events, event_edges = load(open("data2.pickle", "rb")) - lamb, alpha, mu = 1.1847510744e-05, 0.00316718040144, 0.00393069204339 - # print len(event_edges), sum(len(e) for e in events.itervalues()) - # print len(fatal()) - (doors, roots, infections, fatal_droots, - fatal_roots, fatal_infections, G) = cause(lamb, alpha, mu) - analyze_graph(G) diff --git a/hawkes/data.py b/hawkes/data.py deleted file mode 100644 index 0f6135b..0000000 --- a/hawkes/data.py +++ /dev/null @@ -1,71 +0,0 @@ -from csv import DictReader -import sys -from itertools import product -from cPickle import dump -from math import cos - -MAX_TIME = 3012 - - -def parse(s): - return None if s == "NA" else int(float(s)) - - -def fluctuation_int(t): - if t is None: - t = MAX_TIME - return (t, t + 0.43 / 0.0172 * (cos(4.36) - cos(0.0172 * t + 4.36))) - - -def load_nodes(filename): - with open(filename) as fh: - reader = DictReader(fh) - d = {parse(row["name"]): parse(row["fatal_day"]) for row in reader} - for n, t in d.iteritems(): - d[n] = fluctuation_int(t) - return d - - -def load_edges(filename): - events = {} - edges = {} - with open(filename) as fh: - reader = DictReader(fh) - for row in reader: - fro, to, t, dist = map(parse, [row["from"], row["to"], - row["t1"], row["dist"]]) - d = edges.get(fro, dict()) - d[to] = dist - edges[fro] = d - s = events.get(fro, set()) - s.add(t) - events[fro] = s - return edges, events - - -def compute_event_edges(events, edges): - event_edges = {} - - for fro in events: - for t in events[fro]: - event_edges[(fro, t)] = set() - - for fro in edges: - for to in edges[fro]: - try: - e1, e2 = events[fro], events[to] - except KeyError: - continue - for t1, t2 in product(e1, e2): - if t1 < t2: - s = event_edges[(to, t2)] - s.add((fro, t1, edges[fro][to])) - event_edges[(to, t2)] = s - return event_edges - - -if __name__ == "__main__": - nodes = load_nodes(sys.argv[1]) - edges, events = load_edges(sys.argv[2]) - event_edges = compute_event_edges(events, edges) - dump((nodes, edges, events, event_edges), open("data-all.pickle", "wb")) diff --git a/hawkes/main.py b/hawkes/main.py deleted file mode 100644 index 8f30dcf..0000000 --- a/hawkes/main.py +++ /dev/null @@ -1,151 +0,0 @@ -from cPickle import load -from math import log, exp, sqrt, sin -from random import random, gauss -import sys -from itertools import product -from multiprocessing import Pool - -GR = (sqrt(5) - 1) / 2 - - -def gss(f, a, b, tol=1e-20): - """golden section search to minimize along one coordinate""" - c = b - GR * (b - a) - d = a + GR * (b - a) - while abs(c - d) > tol: - fc = f(c) - fd = f(d) - if fc < fd: - b = d - d = c - c = b - GR * (b - a) - else: - a = c - c = d - d = a + GR * (b - a) - sys.stderr.write("gss:" + " ".join(map(str, [a, b, fc, fd])) + "\n") - sys.stderr.flush() - return (b + a) / 2, fc - - -def iter_events(events): - for n, s in events.iteritems(): - for t in s: - yield (n, t) - - -def approx(x): - if x > 1e-10: - return 1 - exp(-x) - else: - return x - - -def ll(lamb, alpha, mu): - r1 = sum(log(lamb * (1 + 0.43 * sin(0.0172 * t1 + 4.36)) - + sum(alpha / d * mu * exp(-mu * (t1 - t2)) - for (n2, t2, d) in s)) - for ((n1, t1), s) in event_edges.iteritems()) - r2 = sum(sum(alpha / d * approx(mu * (nodes[n2][0] - t1)) - for n2, d in edges[n1].iteritems() - if nodes[n2][0] > t1) - for (n1, t1) in iter_events(events)) - r3 = lamb * sum(node[1] for node in nodes.itervalues()) - print r1, r2, r3 - return -(r1 - r2 - r3) - - -def sa(x, y, z, sigma=0.5, niter=1000, fc=None): - T = 0.1 - e = 1.1 - if fc: - fo = fc - else: - fo = ll(x, y, z) - for _ in xrange(niter): - sys.stderr.write("sa: " + " ".join(map(str, [T, sigma, x, y, z, fo])) - + "\n") - sys.stderr.flush() - yn = max(y + gauss(0, sigma * y), 0) - zn = max(z + gauss(0, sigma * z), 0) - fn = ll(x, yn, zn) - if fn < fo or exp((fo - fn) / T) > random(): - y = yn - z = zn - sigma *= 2 - fo = fn - else: - sigma /= e - if sigma < 1e-5: - break - T *= 0.99 - return y, z, fo - - -def optimize_with_sa(x, y, z, niter=10): - - def f(x): - return ll(x, y, z) - - # y, z = sa(x, y, z) - y, z, fc = sa(x, y, z) - for _ in xrange(niter): - x, fc = gss(f, 0, 1e-3, tol=1e-10) - y, z, fc = sa(x, y, z, fc=fc) - print x, y, z, fc - sys.stdout.flush() - - -def optimize_with_gss(x, y, z, niter=5): - - def f(x): - return ll(x, y, z) - - def g(y): - return ll(x, y, z) - - def h(z): - return ll(x, y, z) - - for _ in xrange(niter): - y, fc = gss(g, 0, 1e50, tol=1e-10) - z, fc = gss(h, 0, 1e50, tol=1e-10) - x, fc = gss(f, 0, 1e-3, tol=1e-10) - print x, y, z, fc - sys.stdout.flush() - - -def coarse_search(): - d = {} - for line in open("values.txt"): - v = map(float, line.strip().split()) - d[tuple(v[:3])] = v[3] - p = Pool(5) - lamb = [20. ** i for i in range(-10, 0)] - alpha = [20. ** i for i in range(-15, 15)] - mu = [20. ** i for i in range(-15, 15)] - - def aux(x): - if x in d: - return - l, a, m = x - print l, a, m, ll(l, a, m) - sys.stdout.flush() - - p.map(aux, product(lamb, alpha, mu)) - - -if __name__ == "__main__": - nodes, edges, events, event_edges = load(open("data-dist1.pickle", "rb")) - x = 1.25e-5 - y = 1.2e16 - z = 1.5e-20 - sa(x, y, z) - - # with open(sys.argv[1]) as fh: - # l = [map(float, line.strip().split()[:3]) for line in fh] - # for e in l: - # optimize_with_gss(*e) - - # print ll(x, y, z) - # coarse_search() diff --git a/hawkes/plot.py b/hawkes/plot.py deleted file mode 100644 index 24d3f35..0000000 --- a/hawkes/plot.py +++ /dev/null @@ -1,26 +0,0 @@ -from mpl_toolkits.mplot3d import Axes3D -from matplotlib import cm -import matplotlib.pyplot as plt - -fig = plt.figure() -ax = fig.gca(projection='3d') -d = {} -for line in open("values.txt"): - v = map(float, line.strip().split()) - try: - if (v[0], v[2]) in d: - d[(v[0], v[2])] = min(d[(v[0], v[2])], v[3]) - else: - d[(v[0], v[2])] = v[3] - except: - continue - -x, y, z = [], [], [] -for k, v in d.iteritems(): - x.append(k[0] / 1000000.) - y.append(k[1] / 1000000.) - z.append(v / 100000.) - -print x -surf = ax.plot_trisurf(x, y, z) -plt.show() diff --git a/hawkes/refine.py b/hawkes/refine.py deleted file mode 100644 index 02bbe07..0000000 --- a/hawkes/refine.py +++ /dev/null @@ -1,70 +0,0 @@ -from cPickle import load -from itertools import product -from math import exp, log, sin -import sys -from multiprocessing import Pool - - -def iter_events(events): - for n, s in events.iteritems(): - for t in s: - yield (n, t) - - -def inprod(a, b): - return tuple(float(x * y) for x, y in zip(a, b)) - - -def approx(x): - if x > 1e-10: - return 1 - exp(-x) - else: - return x - - -def ll(lamb, alpha, mu): - r1 = sum(log(lamb * (1 + 0.43 * sin(0.0172 * t1 + 4.36)) - + sum(alpha / d * mu * exp(-mu * (t1 - t2)) - for (n2, t2, d) in s)) - for ((n1, t1), s) in event_edges.iteritems()) - r2 = sum(sum(alpha / d * approx(mu * (nodes[n2][0] - t1)) - for n2, d in edges[n1].iteritems() - if nodes[n2][0] > t1) - for (n1, t1) in iter_events(events)) - r3 = lamb * sum(node[1] for node in nodes.itervalues()) - return -(r1 - r2 - r3) - - -def get_values(): - d = {} - for line in open(sys.argv[1]): - v = map(float, line.strip().split()) - d[tuple(v[:3])] = v[3] - l = d.items() - l.sort(key=lambda x: x[1]) - for line in open("refine.txt"): - v = map(float, line.strip().split()) - d[tuple(v[:3])] = v[3] - for a, _ in l[:100]: - t = [1. / i for i in range(2, 4)] + [float(i) for i in range(1, 4)] - for b in product(t, repeat=3): - l, al, m = inprod(a, b) - if (l, al, m) in d: - continue - yield (l, al, m) - - -def refine(): - p = Pool(5) - - def aux(x): - l, a, m = x - print l, a, m, ll(l, a, m) - sys.stdout.flush() - - p.map(aux, get_values()) - - -if __name__ == "__main__": - nodes, edges, events, event_edges = load(open("data-all.pickle", "rb")) - refine() diff --git a/hawkes/sanity.py b/hawkes/sanity.py deleted file mode 100644 index b6f25eb..0000000 --- a/hawkes/sanity.py +++ /dev/null @@ -1,16 +0,0 @@ -from cPickle import load -from csv import reader -from data2 import parse -import sys - - -def parse_row(row): - return set(e for e in map(parse, row[2:]) if e) - -if __name__ == "__main__": - nodes, edges, events, event_edges = load(open("data2.pickle", "rb")) - with open(sys.argv[1]) as fh: - fh.readline() - reader = reader(fh) - d = {parse(row[1]): parse_row(row) for row in reader} - d = {k: v for (k, v) in d.iteritems() if v} |
