diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2015-09-14 23:08:02 -0400 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2015-09-14 23:08:02 -0400 |
| commit | ab0b1f3cefedb35327a19ec1b6afd560bfdf802d (patch) | |
| tree | b777f3e2c0ac0e712d8c5faab5107b1d236e2c3a /hawkes_experiments/data.py | |
| parent | 960676226862d2d68c7a9c04c56d4f8157803025 (diff) | |
| download | criminal_cascades-ab0b1f3cefedb35327a19ec1b6afd560bfdf802d.tar.gz | |
Import supplements and repo reorganization
Diffstat (limited to 'hawkes_experiments/data.py')
| -rw-r--r-- | hawkes_experiments/data.py | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/hawkes_experiments/data.py b/hawkes_experiments/data.py new file mode 100644 index 0000000..0f6135b --- /dev/null +++ b/hawkes_experiments/data.py @@ -0,0 +1,71 @@ +from csv import DictReader +import sys +from itertools import product +from cPickle import dump +from math import cos + +MAX_TIME = 3012 + + +def parse(s): + return None if s == "NA" else int(float(s)) + + +def fluctuation_int(t): + if t is None: + t = MAX_TIME + return (t, t + 0.43 / 0.0172 * (cos(4.36) - cos(0.0172 * t + 4.36))) + + +def load_nodes(filename): + with open(filename) as fh: + reader = DictReader(fh) + d = {parse(row["name"]): parse(row["fatal_day"]) for row in reader} + for n, t in d.iteritems(): + d[n] = fluctuation_int(t) + return d + + +def load_edges(filename): + events = {} + edges = {} + with open(filename) as fh: + reader = DictReader(fh) + for row in reader: + fro, to, t, dist = map(parse, [row["from"], row["to"], + row["t1"], row["dist"]]) + d = edges.get(fro, dict()) + d[to] = dist + edges[fro] = d + s = events.get(fro, set()) + s.add(t) + events[fro] = s + return edges, events + + +def compute_event_edges(events, edges): + event_edges = {} + + for fro in events: + for t in events[fro]: + event_edges[(fro, t)] = set() + + for fro in edges: + for to in edges[fro]: + try: + e1, e2 = events[fro], events[to] + except KeyError: + continue + for t1, t2 in product(e1, e2): + if t1 < t2: + s = event_edges[(to, t2)] + s.add((fro, t1, edges[fro][to])) + event_edges[(to, t2)] = s + return event_edges + + +if __name__ == "__main__": + nodes = load_nodes(sys.argv[1]) + edges, events = load_edges(sys.argv[2]) + event_edges = compute_event_edges(events, edges) + dump((nodes, edges, events, event_edges), open("data-all.pickle", "wb")) |
