summaryrefslogtreecommitdiffstats
path: root/hawkes_experiments/cause.py
blob: 711e34a258b53de9a4f8b43628d91ed3915fcf32 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from cPickle import load
from math import exp, sin
from collections import Counter
from csv import reader, writer
from data import parse
import sys
import networkx as nx
import numpy as np


def get_fatals():
    with open(sys.argv[1]) as fh:
        fh.readline()
        r = reader(fh)
        d = {i + 1: parse(row[7]) for (i, row) in enumerate(r)}
    d = {k: v for k, v in d.iteritems() if v}
    return d.items()


def cause(lamb, alpha, mu):
    G = nx.DiGraph()
    roots, droots, infections = 0, 0, 0
    fatal_droots, fatal_infections, fatal_roots = 0, 0, 0
    fatals = get_fatals()
    for ((n1, t1), s) in event_edges.iteritems():
        G.add_node((n1, t1))
        if not s:
            droots += 1
            if (n1, t1) in fatals:
                fatal_droots += 1
            continue
        background_rate = lamb * (1 + 0.43 * sin(0.0172 * t1 + 4.36))
        parents = sorted([(n2, t2, alpha / d ** 2 * mu * exp(-mu * (t1 - t2)), d)
                          for (n2, t2, d) in s], reverse=True,
                         key=lambda x: x[2])
        parent_rate = sum(e[2] for e in parents)
        # if parents[0][2] > background_rate:
        #     G.add_edge(tuple(parents[0][:2]), (n1, t1),
        #                weight=parents[0][3])
        if background_rate > parent_rate:
            roots += 1
            if (n1, t1) in fatals:
                fatal_roots += 1
        else:
            G.add_edge(tuple(parents[0][:2]), (n1, t1),
                       weight=parents[0][3])
            # l.append(prl[0][2] / br)
            infections += 1
            if (n1, t1) in fatals:
                fatal_infections += 1
    return (droots, roots, infections, fatal_droots,
            fatal_roots, fatal_infections, G)


def analyze_graph(G):
    csizes = [len(c) for c in nx.weakly_connected_components(G)]
    print "cascades: {0}, min: {1}, max: {2}, mean: {3}, median: {4}".format(
        len(csizes),  np.min(csizes), np.max(csizes), np.mean(csizes),
        np.median(csizes))
    counts = Counter(csizes)
    w = writer(open("components_dist.csv", "w"))
    w.writerows(counts.most_common())
    edges = list(G.edges_iter(data=True))
    print "edges: {0}".format(len(edges))
    times = [e[1][1] - e[0][1] for e in edges]
    distances = [e[2]["weight"] for e in edges]
    print "times, min: {0}, max: {1}, mean: {2}, median: {3}".format(
        np.min(times), np.max(times), np.mean(times), np.median(times))
    print "distances, min: {0}, max: {1}, mean: {2}, median: {3}".format(
        np.min(distances), np.max(distances), np.mean(distances),
        np.median(distances))
    e = writer(open("edges.csv", "w"))
    e.writerows((e[0][0], e[0][1], e[1][0], e[1][1], e[2]["weight"])
                for e in edges)


if __name__ == "__main__":
    nodes, edges, events, event_edges = load(open("data-all.pickle", "rb"))
    lamb, alpha, mu = 1.18909761267e-05, 0.00781529533133, 0.00373882477787
    print "mu: {0}, alpha: {1}, beta: {2}".format(lamb, alpha, mu)
    (droots, roots, infections, fatal_droots,
     fatal_roots, fatal_infections, G) = cause(lamb, alpha, mu)
    r = "events: {0}, droots: {1}, roots: {2}, infections: {3}, "\
        "fatal_droots: {4}, fatal_roots: {5}, "\
        "fatal_infections: {6}".format(len(event_edges), droots, roots,
                                       infections, fatal_droots, fatal_roots,
                                       fatal_infections)
    print r
    analyze_graph(G)