summaryrefslogtreecommitdiffstats
path: root/experiments/main.py
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2015-03-30 15:01:56 -0400
committerThibaut Horel <thibaut.horel@gmail.com>2015-03-30 15:01:56 -0400
commitb84dddecf2eab982941704a43663cf643be027d3 (patch)
tree273afacb4e2d2e043a6bbf75f774d8752e9fc202 /experiments/main.py
parent68187fcfb505e87e5853c5a1b2e1dc073278a2ba (diff)
downloadcriminal_cascades-b84dddecf2eab982941704a43663cf643be027d3.tar.gz
Archive old code
Diffstat (limited to 'experiments/main.py')
-rw-r--r--experiments/main.py118
1 files changed, 0 insertions, 118 deletions
diff --git a/experiments/main.py b/experiments/main.py
deleted file mode 100644
index 7556dac..0000000
--- a/experiments/main.py
+++ /dev/null
@@ -1,118 +0,0 @@
-import networkx as nx
-import sys
-from scipy.sparse import dok_matrix
-import numpy as np
-from collections import Counter
-from math import log
-from scipy.optimize import minimize, basinhopping
-
-
-def weight(delta, dist, alpha):
- r = 1. / ((1. + (delta / alpha)) ** 1.01) * dist # * (0.01 / alpha)
- return r
-
-
-def construct_matrix(filename):
- delta_matrix = dok_matrix((8238, 8238), dtype=np.float64)
- dist_matrix = dok_matrix((8238, 8238), dtype=np.float64)
- labels = {}
- idx = 0
- with open(filename) as fh:
- fh.readline()
- for line in fh:
- values = line.strip().split(",")
- i, j, dist, t1, t2 = map(int, values[1:])
- if i not in labels:
- labels[i] = idx
- idx += 1
- if j not in labels:
- labels[j] = idx
- idx += 1
- delta = float(t2 - t1)
- dist = 0 if dist >= 4 else 0.15 * (0.57 ** (dist - 1))
- delta_matrix[labels[i], labels[j]] = delta
- dist_matrix[labels[i], labels[j]] = dist
- return delta_matrix, dist_matrix
-
-
-def construct_graph(m, threshold=0.5):
- g = nx.DiGraph(weight=0.)
- for ((i, j), w) in m.iteritems():
- g.add_node(i)
- g.add_node(j)
- if w >= threshold:
- g.add_edge(i, j, weight=w)
- return g
-
-
-def construct_graph2(m, beta):
- g = nx.DiGraph(weight=0.)
- for (i, j), w in m.iteritems():
- g.add_node(i)
- g.add_node(j)
- md = m.toarray()
- am = md.argmax(axis=0)
- ma = md[am, np.arange(md.shape[1])]
- threshold = beta / (1. - beta)
- edges = ma[ma >= threshold]
- md[am, np.arange(md.shape[1])] = 0
- for j, i in enumerate(am):
- if ma[j] >= beta:
- g.add_edge(i, j)
- return g, (np.log(edges).sum() + np.log(1 - ma[ma < threshold]).sum()
- + np.log(1 - md).sum())
-
-
-def log_likelihood(delta_matrix, dist_matrix, alpha=60., beta=1.):
- m = dok_matrix((8238, 8238), dtype=np.float64)
- for ((i, j), delta) in delta_matrix.iteritems():
- m[i, j] = weight(delta, dist_matrix[i, j], alpha)
- g = construct_graph(m)
- m = np.array([w for _, w in m.items()])
- n, e = g.number_of_nodes(), g.number_of_edges()
- c = nx.number_weakly_connected_components(g)
- l = (np.sum(np.log(1 - m[m < 0.5])) + np.sum(np.log(m[m >= 0.5])))
- l = l + c * log(beta)
- cnt = Counter()
- print "Nodes:{0}, Edges:{1}, Cascades:{2}, Alpha:{4}, Beta:{5}, "\
- "LL:{3}".format(n, e, c, l, alpha, beta)
- for c in nx.weakly_connected_components(g):
- cnt[len(c)] += 1
- print cnt.most_common()
- return l
-
-
-def log_likelihood2((alpha, beta), delta_matrix, dist_matrix):
- m = dok_matrix((8238, 8238), dtype=np.float64)
- for ((i, j), delta) in delta_matrix.iteritems():
- m[i, j] = weight(delta, dist_matrix[i, j], alpha)
- g, l = construct_graph2(m, beta)
- n, e = g.number_of_nodes(), g.number_of_edges()
- c = nx.number_weakly_connected_components(g)
- l = (l + c * log(beta) + (123506 - c) * log(1 - beta)
- + log(0.15) * 36186 + log(0.15 * 0.57) * 41711
- + log(0.15 * 0.57 * 0.57) * 23264)
- # cnt = Counter()
- # print "Nodes:{0}, Edges:{1}, Cascades:{2}, Alpha:{4}, Beta:{5}, "\
- # "LL:{3}".format(n, e, c, l, alpha, beta)
- # for c in nx.weakly_connected_components(g):
- # cnt[len(c)] += 1
- # print cnt.most_common()
- #print beta, alpha, l
- return -l
-
-
-def accept_test(**kwargs):
- bounds = [(1, 100), (0.01, 0.5)]
- x = kwargs["x_new"]
- l = [bounds[i][0] <= e <= bounds[i][1] for i, e in enumerate(x)]
- print x, l
-
-
-if __name__ == "__main__":
- delta, dist = construct_matrix(sys.argv[1])
- basinhopping(log_likelihood2, [10, 0.1],
- minimizer_kwargs={"args": (delta, dist),
- "bounds": [(1, 100), (0.01, 0.5)]},
- accept_test=accept_test,
- disp=True)