diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2015-03-30 15:01:56 -0400 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2015-03-30 15:01:56 -0400 |
| commit | b84dddecf2eab982941704a43663cf643be027d3 (patch) | |
| tree | 273afacb4e2d2e043a6bbf75f774d8752e9fc202 /experiments/old_stuff/main.py | |
| parent | 68187fcfb505e87e5853c5a1b2e1dc073278a2ba (diff) | |
| download | criminal_cascades-b84dddecf2eab982941704a43663cf643be027d3.tar.gz | |
Archive old code
Diffstat (limited to 'experiments/old_stuff/main.py')
| -rw-r--r-- | experiments/old_stuff/main.py | 118 |
1 files changed, 118 insertions, 0 deletions
diff --git a/experiments/old_stuff/main.py b/experiments/old_stuff/main.py new file mode 100644 index 0000000..7556dac --- /dev/null +++ b/experiments/old_stuff/main.py @@ -0,0 +1,118 @@ +import networkx as nx +import sys +from scipy.sparse import dok_matrix +import numpy as np +from collections import Counter +from math import log +from scipy.optimize import minimize, basinhopping + + +def weight(delta, dist, alpha): + r = 1. / ((1. + (delta / alpha)) ** 1.01) * dist # * (0.01 / alpha) + return r + + +def construct_matrix(filename): + delta_matrix = dok_matrix((8238, 8238), dtype=np.float64) + dist_matrix = dok_matrix((8238, 8238), dtype=np.float64) + labels = {} + idx = 0 + with open(filename) as fh: + fh.readline() + for line in fh: + values = line.strip().split(",") + i, j, dist, t1, t2 = map(int, values[1:]) + if i not in labels: + labels[i] = idx + idx += 1 + if j not in labels: + labels[j] = idx + idx += 1 + delta = float(t2 - t1) + dist = 0 if dist >= 4 else 0.15 * (0.57 ** (dist - 1)) + delta_matrix[labels[i], labels[j]] = delta + dist_matrix[labels[i], labels[j]] = dist + return delta_matrix, dist_matrix + + +def construct_graph(m, threshold=0.5): + g = nx.DiGraph(weight=0.) + for ((i, j), w) in m.iteritems(): + g.add_node(i) + g.add_node(j) + if w >= threshold: + g.add_edge(i, j, weight=w) + return g + + +def construct_graph2(m, beta): + g = nx.DiGraph(weight=0.) + for (i, j), w in m.iteritems(): + g.add_node(i) + g.add_node(j) + md = m.toarray() + am = md.argmax(axis=0) + ma = md[am, np.arange(md.shape[1])] + threshold = beta / (1. - beta) + edges = ma[ma >= threshold] + md[am, np.arange(md.shape[1])] = 0 + for j, i in enumerate(am): + if ma[j] >= beta: + g.add_edge(i, j) + return g, (np.log(edges).sum() + np.log(1 - ma[ma < threshold]).sum() + + np.log(1 - md).sum()) + + +def log_likelihood(delta_matrix, dist_matrix, alpha=60., beta=1.): + m = dok_matrix((8238, 8238), dtype=np.float64) + for ((i, j), delta) in delta_matrix.iteritems(): + m[i, j] = weight(delta, dist_matrix[i, j], alpha) + g = construct_graph(m) + m = np.array([w for _, w in m.items()]) + n, e = g.number_of_nodes(), g.number_of_edges() + c = nx.number_weakly_connected_components(g) + l = (np.sum(np.log(1 - m[m < 0.5])) + np.sum(np.log(m[m >= 0.5]))) + l = l + c * log(beta) + cnt = Counter() + print "Nodes:{0}, Edges:{1}, Cascades:{2}, Alpha:{4}, Beta:{5}, "\ + "LL:{3}".format(n, e, c, l, alpha, beta) + for c in nx.weakly_connected_components(g): + cnt[len(c)] += 1 + print cnt.most_common() + return l + + +def log_likelihood2((alpha, beta), delta_matrix, dist_matrix): + m = dok_matrix((8238, 8238), dtype=np.float64) + for ((i, j), delta) in delta_matrix.iteritems(): + m[i, j] = weight(delta, dist_matrix[i, j], alpha) + g, l = construct_graph2(m, beta) + n, e = g.number_of_nodes(), g.number_of_edges() + c = nx.number_weakly_connected_components(g) + l = (l + c * log(beta) + (123506 - c) * log(1 - beta) + + log(0.15) * 36186 + log(0.15 * 0.57) * 41711 + + log(0.15 * 0.57 * 0.57) * 23264) + # cnt = Counter() + # print "Nodes:{0}, Edges:{1}, Cascades:{2}, Alpha:{4}, Beta:{5}, "\ + # "LL:{3}".format(n, e, c, l, alpha, beta) + # for c in nx.weakly_connected_components(g): + # cnt[len(c)] += 1 + # print cnt.most_common() + #print beta, alpha, l + return -l + + +def accept_test(**kwargs): + bounds = [(1, 100), (0.01, 0.5)] + x = kwargs["x_new"] + l = [bounds[i][0] <= e <= bounds[i][1] for i, e in enumerate(x)] + print x, l + + +if __name__ == "__main__": + delta, dist = construct_matrix(sys.argv[1]) + basinhopping(log_likelihood2, [10, 0.1], + minimizer_kwargs={"args": (delta, dist), + "bounds": [(1, 100), (0.01, 0.5)]}, + accept_test=accept_test, + disp=True) |
