summaryrefslogtreecommitdiffstats
path: root/experiments/main.py
blob: 7556dac1e15778248efd47219a2ab79d45c2ee95 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import networkx as nx
import sys
from scipy.sparse import dok_matrix
import numpy as np
from collections import Counter
from math import log
from scipy.optimize import minimize, basinhopping


def weight(delta, dist, alpha):
    r = 1. / ((1. + (delta / alpha)) ** 1.01) * dist # * (0.01 / alpha)
    return r


def construct_matrix(filename):
    delta_matrix = dok_matrix((8238, 8238), dtype=np.float64)
    dist_matrix = dok_matrix((8238, 8238), dtype=np.float64)
    labels = {}
    idx = 0
    with open(filename) as fh:
        fh.readline()
        for line in fh:
            values = line.strip().split(",")
            i, j, dist, t1, t2 = map(int, values[1:])
            if i not in labels:
                labels[i] = idx
                idx += 1
            if j not in labels:
                labels[j] = idx
                idx += 1
            delta = float(t2 - t1)
            dist = 0 if dist >= 4 else 0.15 * (0.57 ** (dist - 1))
            delta_matrix[labels[i], labels[j]] = delta
            dist_matrix[labels[i], labels[j]] = dist
    return delta_matrix, dist_matrix


def construct_graph(m, threshold=0.5):
    g = nx.DiGraph(weight=0.)
    for ((i, j), w) in m.iteritems():
        g.add_node(i)
        g.add_node(j)
        if w >= threshold:
            g.add_edge(i, j, weight=w)
    return g


def construct_graph2(m, beta):
    g = nx.DiGraph(weight=0.)
    for (i, j), w in m.iteritems():
        g.add_node(i)
        g.add_node(j)
    md = m.toarray()
    am = md.argmax(axis=0)
    ma = md[am, np.arange(md.shape[1])]
    threshold = beta / (1. - beta)
    edges = ma[ma >= threshold]
    md[am, np.arange(md.shape[1])] = 0
    for j, i in enumerate(am):
        if ma[j] >= beta:
            g.add_edge(i, j)
    return g, (np.log(edges).sum() + np.log(1 - ma[ma < threshold]).sum()
               + np.log(1 - md).sum())


def log_likelihood(delta_matrix, dist_matrix, alpha=60., beta=1.):
    m = dok_matrix((8238, 8238), dtype=np.float64)
    for ((i, j), delta) in delta_matrix.iteritems():
        m[i, j] = weight(delta, dist_matrix[i, j], alpha)
    g = construct_graph(m)
    m = np.array([w for _, w in m.items()])
    n, e = g.number_of_nodes(), g.number_of_edges()
    c = nx.number_weakly_connected_components(g)
    l = (np.sum(np.log(1 - m[m < 0.5])) + np.sum(np.log(m[m >= 0.5])))
    l = l + c * log(beta)
    cnt = Counter()
    print "Nodes:{0}, Edges:{1}, Cascades:{2}, Alpha:{4}, Beta:{5}, "\
        "LL:{3}".format(n, e, c, l, alpha, beta)
    for c in nx.weakly_connected_components(g):
        cnt[len(c)] += 1
    print cnt.most_common()
    return l


def log_likelihood2((alpha, beta), delta_matrix, dist_matrix):
    m = dok_matrix((8238, 8238), dtype=np.float64)
    for ((i, j), delta) in delta_matrix.iteritems():
        m[i, j] = weight(delta, dist_matrix[i, j], alpha)
    g, l = construct_graph2(m, beta)
    n, e = g.number_of_nodes(), g.number_of_edges()
    c = nx.number_weakly_connected_components(g)
    l = (l + c * log(beta) + (123506 - c) * log(1 - beta)
         + log(0.15) * 36186 + log(0.15 * 0.57) * 41711
         + log(0.15 * 0.57 * 0.57) * 23264)
    # cnt = Counter()
    # print "Nodes:{0}, Edges:{1}, Cascades:{2}, Alpha:{4}, Beta:{5}, "\
    #     "LL:{3}".format(n, e, c, l, alpha, beta)
    # for c in nx.weakly_connected_components(g):
    #     cnt[len(c)] += 1
    # print cnt.most_common()
    #print beta, alpha, l
    return -l


def accept_test(**kwargs):
    bounds = [(1, 100), (0.01, 0.5)]
    x = kwargs["x_new"]
    l = [bounds[i][0] <= e <= bounds[i][1] for i, e in enumerate(x)]
    print x, l


if __name__ == "__main__":
    delta, dist = construct_matrix(sys.argv[1])
    basinhopping(log_likelihood2, [10, 0.1],
                 minimizer_kwargs={"args": (delta, dist),
                                   "bounds": [(1, 100), (0.01, 0.5)]},
                 accept_test=accept_test,
                 disp=True)