diff options
Diffstat (limited to 'hawkes/data.py')
| -rw-r--r-- | hawkes/data.py | 20 |
1 files changed, 14 insertions, 6 deletions
diff --git a/hawkes/data.py b/hawkes/data.py index 4d7744e..e5c33f8 100644 --- a/hawkes/data.py +++ b/hawkes/data.py @@ -2,6 +2,7 @@ from csv import DictReader import sys from itertools import product from cPickle import dump +from math import cos MAX_TIME = 3012 @@ -10,13 +11,18 @@ def parse(s): return None if s == "NA" else int(float(s)) +def fluctuation_int(t): + if t is None: + t = MAX_TIME + return t + 0.43 / 0.0172 * (cos(4.36) - cos(0.0172 * t + 4.36)) + + def load_nodes(filename): with open(filename) as fh: reader = DictReader(fh) d = {parse(row["name"]): parse(row["fatal_day"]) for row in reader} for n, t in d.iteritems(): - if t is None: - d[n] = MAX_TIME + d[n] = fluctuation_int(t) return d @@ -26,10 +32,12 @@ def load_edges(filename): with open(filename) as fh: reader = DictReader(fh) for row in reader: - fro, to, t, weight = map(parse, [row["from"], row["to"], - row["t1"], row["w1"]]) + fro, to, t, dist = map(parse, [row["from"], row["to"], + row["t1"], row["dist"]]) + if dist >= 2: + continue d = edges.get(fro, dict()) - d[to] = weight + d[to] = dist edges[fro] = d s = events.get(fro, set()) s.add(t) @@ -62,4 +70,4 @@ if __name__ == "__main__": nodes = load_nodes(sys.argv[1]) edges, events = load_edges(sys.argv[2]) event_edges = compute_event_edges(events, edges) - dump((nodes, edges, events, event_edges), open("data.pickle", "wb")) + dump((nodes, edges, events, event_edges), open("data-dist1.pickle", "wb")) |
