summaryrefslogtreecommitdiffstats
path: root/hawkes
diff options
context:
space:
mode:
Diffstat (limited to 'hawkes')
-rw-r--r--hawkes/README.txt7
-rw-r--r--hawkes/cause.py72
-rw-r--r--hawkes/data.py71
-rw-r--r--hawkes/main.py151
-rw-r--r--hawkes/plot.py26
-rw-r--r--hawkes/refine.py70
-rw-r--r--hawkes/sanity.py16
7 files changed, 0 insertions, 413 deletions
diff --git a/hawkes/README.txt b/hawkes/README.txt
deleted file mode 100644
index 3991d3b..0000000
--- a/hawkes/README.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-
-
-data and data2 - reformatting the data files
-
-main - most of the optimization
-
-cause - assign responsibility for each infection \ No newline at end of file
diff --git a/hawkes/cause.py b/hawkes/cause.py
deleted file mode 100644
index 17ec884..0000000
--- a/hawkes/cause.py
+++ /dev/null
@@ -1,72 +0,0 @@
-from cPickle import load
-from math import exp, sin
-from collections import Counter
-from csv import reader, writer
-from data2 import parse
-import sys
-import networkx as nx
-import matplotlib.pyplot as plt
-import numpy as np
-
-
-def get_fatals():
- with open(sys.argv[1]) as fh:
- fh.readline()
- r = reader(fh)
- d = {i + 1: parse(row[7]) for (i, row) in enumerate(r)}
- d = {k: v for k, v in d.iteritems() if v}
- return d.items()
-
-
-def cause(lamb, alpha, mu):
- G = nx.DiGraph()
- roots, droots, infections = 0, 0, 0
- fatal_droots, fatal_infections, fatal_roots = 0, 0, 0
- fatals = get_fatals()
- for ((n1, t1), s) in event_edges.iteritems():
- G.add_node((n1, t1))
- if not s:
- droots += 1
- if (n1, t1) in fatals:
- fatal_droots += 1
- continue
- background_rate = lamb * (1 + 0.43 * sin(0.0172 * t1 + 4.36))
- neighbors = sorted([(n2, t2, alpha / d * mu * exp(-mu * (t1 - t2)))
- for (n2, t2, d) in s], reverse=True)
- neighbor_rate = sum(e[2] for e in neighbors)
- # if sum(e[2] for e in prl[:1]) > br:
- # G.add_edge((n1, t1), tuple(prl[0][:2]))
- if background_rate > neighbor_rate:
- roots += 1
- if (n1, t1) in fatals:
- fatal_roots += 1
- else:
- G.add_edge((n1, t1), tuple(neighbors[0][:2]))
- # l.append(prl[0][2] / br)
- infections += 1
- if (n1, t1) in fatals:
- fatal_infections += 1
- # l.sort(reverse=True)
- # plt.plot(l)
- # plt.show()
- return (droots, roots, infections, fatal_droots,
- fatal_roots, fatal_infections, G)
-
-
-def analyze_graph(G):
- counts = Counter(len(c) for c in nx.weakly_connected_components(G))
- w = writer(open("components_dist.csv", "w"))
- w.writerows(counts.most_common())
- edges = ((n1, t1, n2, t2) for ((n1, t1), (n2, t2)) in G.edges_iter())
- e = writer(open("edges.csv", "w"))
- e.writerows(edges)
-
-
-if __name__ == "__main__":
- nodes, edges, events, event_edges = load(open("data2.pickle", "rb"))
- lamb, alpha, mu = 1.1847510744e-05, 0.00316718040144, 0.00393069204339
- # print len(event_edges), sum(len(e) for e in events.itervalues())
- # print len(fatal())
- (doors, roots, infections, fatal_droots,
- fatal_roots, fatal_infections, G) = cause(lamb, alpha, mu)
- analyze_graph(G)
diff --git a/hawkes/data.py b/hawkes/data.py
deleted file mode 100644
index 0f6135b..0000000
--- a/hawkes/data.py
+++ /dev/null
@@ -1,71 +0,0 @@
-from csv import DictReader
-import sys
-from itertools import product
-from cPickle import dump
-from math import cos
-
-MAX_TIME = 3012
-
-
-def parse(s):
- return None if s == "NA" else int(float(s))
-
-
-def fluctuation_int(t):
- if t is None:
- t = MAX_TIME
- return (t, t + 0.43 / 0.0172 * (cos(4.36) - cos(0.0172 * t + 4.36)))
-
-
-def load_nodes(filename):
- with open(filename) as fh:
- reader = DictReader(fh)
- d = {parse(row["name"]): parse(row["fatal_day"]) for row in reader}
- for n, t in d.iteritems():
- d[n] = fluctuation_int(t)
- return d
-
-
-def load_edges(filename):
- events = {}
- edges = {}
- with open(filename) as fh:
- reader = DictReader(fh)
- for row in reader:
- fro, to, t, dist = map(parse, [row["from"], row["to"],
- row["t1"], row["dist"]])
- d = edges.get(fro, dict())
- d[to] = dist
- edges[fro] = d
- s = events.get(fro, set())
- s.add(t)
- events[fro] = s
- return edges, events
-
-
-def compute_event_edges(events, edges):
- event_edges = {}
-
- for fro in events:
- for t in events[fro]:
- event_edges[(fro, t)] = set()
-
- for fro in edges:
- for to in edges[fro]:
- try:
- e1, e2 = events[fro], events[to]
- except KeyError:
- continue
- for t1, t2 in product(e1, e2):
- if t1 < t2:
- s = event_edges[(to, t2)]
- s.add((fro, t1, edges[fro][to]))
- event_edges[(to, t2)] = s
- return event_edges
-
-
-if __name__ == "__main__":
- nodes = load_nodes(sys.argv[1])
- edges, events = load_edges(sys.argv[2])
- event_edges = compute_event_edges(events, edges)
- dump((nodes, edges, events, event_edges), open("data-all.pickle", "wb"))
diff --git a/hawkes/main.py b/hawkes/main.py
deleted file mode 100644
index 8f30dcf..0000000
--- a/hawkes/main.py
+++ /dev/null
@@ -1,151 +0,0 @@
-from cPickle import load
-from math import log, exp, sqrt, sin
-from random import random, gauss
-import sys
-from itertools import product
-from multiprocessing import Pool
-
-GR = (sqrt(5) - 1) / 2
-
-
-def gss(f, a, b, tol=1e-20):
- """golden section search to minimize along one coordinate"""
- c = b - GR * (b - a)
- d = a + GR * (b - a)
- while abs(c - d) > tol:
- fc = f(c)
- fd = f(d)
- if fc < fd:
- b = d
- d = c
- c = b - GR * (b - a)
- else:
- a = c
- c = d
- d = a + GR * (b - a)
- sys.stderr.write("gss:" + " ".join(map(str, [a, b, fc, fd])) + "\n")
- sys.stderr.flush()
- return (b + a) / 2, fc
-
-
-def iter_events(events):
- for n, s in events.iteritems():
- for t in s:
- yield (n, t)
-
-
-def approx(x):
- if x > 1e-10:
- return 1 - exp(-x)
- else:
- return x
-
-
-def ll(lamb, alpha, mu):
- r1 = sum(log(lamb * (1 + 0.43 * sin(0.0172 * t1 + 4.36))
- + sum(alpha / d * mu * exp(-mu * (t1 - t2))
- for (n2, t2, d) in s))
- for ((n1, t1), s) in event_edges.iteritems())
- r2 = sum(sum(alpha / d * approx(mu * (nodes[n2][0] - t1))
- for n2, d in edges[n1].iteritems()
- if nodes[n2][0] > t1)
- for (n1, t1) in iter_events(events))
- r3 = lamb * sum(node[1] for node in nodes.itervalues())
- print r1, r2, r3
- return -(r1 - r2 - r3)
-
-
-def sa(x, y, z, sigma=0.5, niter=1000, fc=None):
- T = 0.1
- e = 1.1
- if fc:
- fo = fc
- else:
- fo = ll(x, y, z)
- for _ in xrange(niter):
- sys.stderr.write("sa: " + " ".join(map(str, [T, sigma, x, y, z, fo]))
- + "\n")
- sys.stderr.flush()
- yn = max(y + gauss(0, sigma * y), 0)
- zn = max(z + gauss(0, sigma * z), 0)
- fn = ll(x, yn, zn)
- if fn < fo or exp((fo - fn) / T) > random():
- y = yn
- z = zn
- sigma *= 2
- fo = fn
- else:
- sigma /= e
- if sigma < 1e-5:
- break
- T *= 0.99
- return y, z, fo
-
-
-def optimize_with_sa(x, y, z, niter=10):
-
- def f(x):
- return ll(x, y, z)
-
- # y, z = sa(x, y, z)
- y, z, fc = sa(x, y, z)
- for _ in xrange(niter):
- x, fc = gss(f, 0, 1e-3, tol=1e-10)
- y, z, fc = sa(x, y, z, fc=fc)
- print x, y, z, fc
- sys.stdout.flush()
-
-
-def optimize_with_gss(x, y, z, niter=5):
-
- def f(x):
- return ll(x, y, z)
-
- def g(y):
- return ll(x, y, z)
-
- def h(z):
- return ll(x, y, z)
-
- for _ in xrange(niter):
- y, fc = gss(g, 0, 1e50, tol=1e-10)
- z, fc = gss(h, 0, 1e50, tol=1e-10)
- x, fc = gss(f, 0, 1e-3, tol=1e-10)
- print x, y, z, fc
- sys.stdout.flush()
-
-
-def coarse_search():
- d = {}
- for line in open("values.txt"):
- v = map(float, line.strip().split())
- d[tuple(v[:3])] = v[3]
- p = Pool(5)
- lamb = [20. ** i for i in range(-10, 0)]
- alpha = [20. ** i for i in range(-15, 15)]
- mu = [20. ** i for i in range(-15, 15)]
-
- def aux(x):
- if x in d:
- return
- l, a, m = x
- print l, a, m, ll(l, a, m)
- sys.stdout.flush()
-
- p.map(aux, product(lamb, alpha, mu))
-
-
-if __name__ == "__main__":
- nodes, edges, events, event_edges = load(open("data-dist1.pickle", "rb"))
- x = 1.25e-5
- y = 1.2e16
- z = 1.5e-20
- sa(x, y, z)
-
- # with open(sys.argv[1]) as fh:
- # l = [map(float, line.strip().split()[:3]) for line in fh]
- # for e in l:
- # optimize_with_gss(*e)
-
- # print ll(x, y, z)
- # coarse_search()
diff --git a/hawkes/plot.py b/hawkes/plot.py
deleted file mode 100644
index 24d3f35..0000000
--- a/hawkes/plot.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from mpl_toolkits.mplot3d import Axes3D
-from matplotlib import cm
-import matplotlib.pyplot as plt
-
-fig = plt.figure()
-ax = fig.gca(projection='3d')
-d = {}
-for line in open("values.txt"):
- v = map(float, line.strip().split())
- try:
- if (v[0], v[2]) in d:
- d[(v[0], v[2])] = min(d[(v[0], v[2])], v[3])
- else:
- d[(v[0], v[2])] = v[3]
- except:
- continue
-
-x, y, z = [], [], []
-for k, v in d.iteritems():
- x.append(k[0] / 1000000.)
- y.append(k[1] / 1000000.)
- z.append(v / 100000.)
-
-print x
-surf = ax.plot_trisurf(x, y, z)
-plt.show()
diff --git a/hawkes/refine.py b/hawkes/refine.py
deleted file mode 100644
index 02bbe07..0000000
--- a/hawkes/refine.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from cPickle import load
-from itertools import product
-from math import exp, log, sin
-import sys
-from multiprocessing import Pool
-
-
-def iter_events(events):
- for n, s in events.iteritems():
- for t in s:
- yield (n, t)
-
-
-def inprod(a, b):
- return tuple(float(x * y) for x, y in zip(a, b))
-
-
-def approx(x):
- if x > 1e-10:
- return 1 - exp(-x)
- else:
- return x
-
-
-def ll(lamb, alpha, mu):
- r1 = sum(log(lamb * (1 + 0.43 * sin(0.0172 * t1 + 4.36))
- + sum(alpha / d * mu * exp(-mu * (t1 - t2))
- for (n2, t2, d) in s))
- for ((n1, t1), s) in event_edges.iteritems())
- r2 = sum(sum(alpha / d * approx(mu * (nodes[n2][0] - t1))
- for n2, d in edges[n1].iteritems()
- if nodes[n2][0] > t1)
- for (n1, t1) in iter_events(events))
- r3 = lamb * sum(node[1] for node in nodes.itervalues())
- return -(r1 - r2 - r3)
-
-
-def get_values():
- d = {}
- for line in open(sys.argv[1]):
- v = map(float, line.strip().split())
- d[tuple(v[:3])] = v[3]
- l = d.items()
- l.sort(key=lambda x: x[1])
- for line in open("refine.txt"):
- v = map(float, line.strip().split())
- d[tuple(v[:3])] = v[3]
- for a, _ in l[:100]:
- t = [1. / i for i in range(2, 4)] + [float(i) for i in range(1, 4)]
- for b in product(t, repeat=3):
- l, al, m = inprod(a, b)
- if (l, al, m) in d:
- continue
- yield (l, al, m)
-
-
-def refine():
- p = Pool(5)
-
- def aux(x):
- l, a, m = x
- print l, a, m, ll(l, a, m)
- sys.stdout.flush()
-
- p.map(aux, get_values())
-
-
-if __name__ == "__main__":
- nodes, edges, events, event_edges = load(open("data-all.pickle", "rb"))
- refine()
diff --git a/hawkes/sanity.py b/hawkes/sanity.py
deleted file mode 100644
index b6f25eb..0000000
--- a/hawkes/sanity.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from cPickle import load
-from csv import reader
-from data2 import parse
-import sys
-
-
-def parse_row(row):
- return set(e for e in map(parse, row[2:]) if e)
-
-if __name__ == "__main__":
- nodes, edges, events, event_edges = load(open("data2.pickle", "rb"))
- with open(sys.argv[1]) as fh:
- fh.readline()
- reader = reader(fh)
- d = {parse(row[1]): parse_row(row) for row in reader}
- d = {k: v for (k, v) in d.iteritems() if v}