diff options
Diffstat (limited to 'hawkes/data2.py')
| -rw-r--r-- | hawkes/data2.py | 65 |
1 files changed, 65 insertions, 0 deletions
diff --git a/hawkes/data2.py b/hawkes/data2.py new file mode 100644 index 0000000..c091e7a --- /dev/null +++ b/hawkes/data2.py @@ -0,0 +1,65 @@ +from csv import DictReader +import sys +from itertools import product +from cPickle import dump + +MAX_TIME = 3012 + + +def parse(s): + return None if s == "NA" else int(float(s)) + + +def load_nodes(filename): + with open(filename) as fh: + reader = DictReader(fh) + d = {parse(row["name"]): parse(row["fatal_day"]) for row in reader} + for n, t in d.iteritems(): + if t is None: + d[n] = MAX_TIME + return d + + +def load_edges(filename): + events = {} + edges = {} + with open(filename) as fh: + reader = DictReader(fh) + for row in reader: + fro, to, t, dist = map(parse, [row["from"], row["to"], + row["t1"], row["dist"]]) + d = edges.get(fro, dict()) + d[to] = dist + edges[fro] = d + s = events.get(fro, set()) + s.add(t) + events[fro] = s + return edges, events + + +def compute_event_edges(events, edges): + event_edges = {} + + for fro in events: + for t in events[fro]: + event_edges[(fro, t)] = set() + + for fro in edges: + for to in edges[fro]: + try: + e1, e2 = events[fro], events[to] + except KeyError: + continue + for t1, t2 in product(e1, e2): + if t1 < t2: + s = event_edges[(to, t2)] + s.add((fro, t1, edges[fro][to])) + event_edges[(to, t2)] = s + return event_edges + + +if __name__ == "__main__": + nodes = load_nodes(sys.argv[1]) + edges, events = load_edges(sys.argv[2]) + event_edges = compute_event_edges(events, edges) + dump((nodes, edges, events, event_edges), open("data2.pickle", "wb")) |
