summaryrefslogtreecommitdiffstats
path: root/ic_experiments/old_stuff/main.py
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2015-09-14 23:08:02 -0400
committerThibaut Horel <thibaut.horel@gmail.com>2015-09-14 23:08:02 -0400
commitab0b1f3cefedb35327a19ec1b6afd560bfdf802d (patch)
treeb777f3e2c0ac0e712d8c5faab5107b1d236e2c3a /ic_experiments/old_stuff/main.py
parent960676226862d2d68c7a9c04c56d4f8157803025 (diff)
downloadcriminal_cascades-ab0b1f3cefedb35327a19ec1b6afd560bfdf802d.tar.gz
Import supplements and repo reorganization
Diffstat (limited to 'ic_experiments/old_stuff/main.py')
-rw-r--r--ic_experiments/old_stuff/main.py118
1 files changed, 118 insertions, 0 deletions
diff --git a/ic_experiments/old_stuff/main.py b/ic_experiments/old_stuff/main.py
new file mode 100644
index 0000000..7556dac
--- /dev/null
+++ b/ic_experiments/old_stuff/main.py
@@ -0,0 +1,118 @@
+import networkx as nx
+import sys
+from scipy.sparse import dok_matrix
+import numpy as np
+from collections import Counter
+from math import log
+from scipy.optimize import minimize, basinhopping
+
+
+def weight(delta, dist, alpha):
+ r = 1. / ((1. + (delta / alpha)) ** 1.01) * dist # * (0.01 / alpha)
+ return r
+
+
+def construct_matrix(filename):
+ delta_matrix = dok_matrix((8238, 8238), dtype=np.float64)
+ dist_matrix = dok_matrix((8238, 8238), dtype=np.float64)
+ labels = {}
+ idx = 0
+ with open(filename) as fh:
+ fh.readline()
+ for line in fh:
+ values = line.strip().split(",")
+ i, j, dist, t1, t2 = map(int, values[1:])
+ if i not in labels:
+ labels[i] = idx
+ idx += 1
+ if j not in labels:
+ labels[j] = idx
+ idx += 1
+ delta = float(t2 - t1)
+ dist = 0 if dist >= 4 else 0.15 * (0.57 ** (dist - 1))
+ delta_matrix[labels[i], labels[j]] = delta
+ dist_matrix[labels[i], labels[j]] = dist
+ return delta_matrix, dist_matrix
+
+
+def construct_graph(m, threshold=0.5):
+ g = nx.DiGraph(weight=0.)
+ for ((i, j), w) in m.iteritems():
+ g.add_node(i)
+ g.add_node(j)
+ if w >= threshold:
+ g.add_edge(i, j, weight=w)
+ return g
+
+
+def construct_graph2(m, beta):
+ g = nx.DiGraph(weight=0.)
+ for (i, j), w in m.iteritems():
+ g.add_node(i)
+ g.add_node(j)
+ md = m.toarray()
+ am = md.argmax(axis=0)
+ ma = md[am, np.arange(md.shape[1])]
+ threshold = beta / (1. - beta)
+ edges = ma[ma >= threshold]
+ md[am, np.arange(md.shape[1])] = 0
+ for j, i in enumerate(am):
+ if ma[j] >= beta:
+ g.add_edge(i, j)
+ return g, (np.log(edges).sum() + np.log(1 - ma[ma < threshold]).sum()
+ + np.log(1 - md).sum())
+
+
+def log_likelihood(delta_matrix, dist_matrix, alpha=60., beta=1.):
+ m = dok_matrix((8238, 8238), dtype=np.float64)
+ for ((i, j), delta) in delta_matrix.iteritems():
+ m[i, j] = weight(delta, dist_matrix[i, j], alpha)
+ g = construct_graph(m)
+ m = np.array([w for _, w in m.items()])
+ n, e = g.number_of_nodes(), g.number_of_edges()
+ c = nx.number_weakly_connected_components(g)
+ l = (np.sum(np.log(1 - m[m < 0.5])) + np.sum(np.log(m[m >= 0.5])))
+ l = l + c * log(beta)
+ cnt = Counter()
+ print "Nodes:{0}, Edges:{1}, Cascades:{2}, Alpha:{4}, Beta:{5}, "\
+ "LL:{3}".format(n, e, c, l, alpha, beta)
+ for c in nx.weakly_connected_components(g):
+ cnt[len(c)] += 1
+ print cnt.most_common()
+ return l
+
+
+def log_likelihood2((alpha, beta), delta_matrix, dist_matrix):
+ m = dok_matrix((8238, 8238), dtype=np.float64)
+ for ((i, j), delta) in delta_matrix.iteritems():
+ m[i, j] = weight(delta, dist_matrix[i, j], alpha)
+ g, l = construct_graph2(m, beta)
+ n, e = g.number_of_nodes(), g.number_of_edges()
+ c = nx.number_weakly_connected_components(g)
+ l = (l + c * log(beta) + (123506 - c) * log(1 - beta)
+ + log(0.15) * 36186 + log(0.15 * 0.57) * 41711
+ + log(0.15 * 0.57 * 0.57) * 23264)
+ # cnt = Counter()
+ # print "Nodes:{0}, Edges:{1}, Cascades:{2}, Alpha:{4}, Beta:{5}, "\
+ # "LL:{3}".format(n, e, c, l, alpha, beta)
+ # for c in nx.weakly_connected_components(g):
+ # cnt[len(c)] += 1
+ # print cnt.most_common()
+ #print beta, alpha, l
+ return -l
+
+
+def accept_test(**kwargs):
+ bounds = [(1, 100), (0.01, 0.5)]
+ x = kwargs["x_new"]
+ l = [bounds[i][0] <= e <= bounds[i][1] for i, e in enumerate(x)]
+ print x, l
+
+
+if __name__ == "__main__":
+ delta, dist = construct_matrix(sys.argv[1])
+ basinhopping(log_likelihood2, [10, 0.1],
+ minimizer_kwargs={"args": (delta, dist),
+ "bounds": [(1, 100), (0.01, 0.5)]},
+ accept_test=accept_test,
+ disp=True)