diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2015-09-14 23:08:02 -0400 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2015-09-14 23:08:02 -0400 |
| commit | ab0b1f3cefedb35327a19ec1b6afd560bfdf802d (patch) | |
| tree | b777f3e2c0ac0e712d8c5faab5107b1d236e2c3a /hawkes/data.py | |
| parent | 960676226862d2d68c7a9c04c56d4f8157803025 (diff) | |
| download | criminal_cascades-ab0b1f3cefedb35327a19ec1b6afd560bfdf802d.tar.gz | |
Import supplements and repo reorganization
Diffstat (limited to 'hawkes/data.py')
| -rw-r--r-- | hawkes/data.py | 71 |
1 files changed, 0 insertions, 71 deletions
diff --git a/hawkes/data.py b/hawkes/data.py deleted file mode 100644 index 0f6135b..0000000 --- a/hawkes/data.py +++ /dev/null @@ -1,71 +0,0 @@ -from csv import DictReader -import sys -from itertools import product -from cPickle import dump -from math import cos - -MAX_TIME = 3012 - - -def parse(s): - return None if s == "NA" else int(float(s)) - - -def fluctuation_int(t): - if t is None: - t = MAX_TIME - return (t, t + 0.43 / 0.0172 * (cos(4.36) - cos(0.0172 * t + 4.36))) - - -def load_nodes(filename): - with open(filename) as fh: - reader = DictReader(fh) - d = {parse(row["name"]): parse(row["fatal_day"]) for row in reader} - for n, t in d.iteritems(): - d[n] = fluctuation_int(t) - return d - - -def load_edges(filename): - events = {} - edges = {} - with open(filename) as fh: - reader = DictReader(fh) - for row in reader: - fro, to, t, dist = map(parse, [row["from"], row["to"], - row["t1"], row["dist"]]) - d = edges.get(fro, dict()) - d[to] = dist - edges[fro] = d - s = events.get(fro, set()) - s.add(t) - events[fro] = s - return edges, events - - -def compute_event_edges(events, edges): - event_edges = {} - - for fro in events: - for t in events[fro]: - event_edges[(fro, t)] = set() - - for fro in edges: - for to in edges[fro]: - try: - e1, e2 = events[fro], events[to] - except KeyError: - continue - for t1, t2 in product(e1, e2): - if t1 < t2: - s = event_edges[(to, t2)] - s.add((fro, t1, edges[fro][to])) - event_edges[(to, t2)] = s - return event_edges - - -if __name__ == "__main__": - nodes = load_nodes(sys.argv[1]) - edges, events = load_edges(sys.argv[2]) - event_edges = compute_event_edges(events, edges) - dump((nodes, edges, events, event_edges), open("data-all.pickle", "wb")) |
