import networkx as nx import sys from scipy.sparse import dok_matrix import numpy as np from collections import Counter from math import log from scipy.optimize import minimize, basinhopping def weight(delta, dist, alpha): r = 1. / ((1. + (delta / alpha)) ** 1.01) * dist # * (0.01 / alpha) return r def construct_matrix(filename): delta_matrix = dok_matrix((8238, 8238), dtype=np.float64) dist_matrix = dok_matrix((8238, 8238), dtype=np.float64) labels = {} idx = 0 with open(filename) as fh: fh.readline() for line in fh: values = line.strip().split(",") i, j, dist, t1, t2 = map(int, values[1:]) if i not in labels: labels[i] = idx idx += 1 if j not in labels: labels[j] = idx idx += 1 delta = float(t2 - t1) dist = 0 if dist >= 4 else 0.15 * (0.57 ** (dist - 1)) delta_matrix[labels[i], labels[j]] = delta dist_matrix[labels[i], labels[j]] = dist return delta_matrix, dist_matrix def construct_graph(m, threshold=0.5): g = nx.DiGraph(weight=0.) for ((i, j), w) in m.iteritems(): g.add_node(i) g.add_node(j) if w >= threshold: g.add_edge(i, j, weight=w) return g def construct_graph2(m, beta): g = nx.DiGraph(weight=0.) for (i, j), w in m.iteritems(): g.add_node(i) g.add_node(j) md = m.toarray() am = md.argmax(axis=0) ma = md[am, np.arange(md.shape[1])] threshold = beta / (1. - beta) edges = ma[ma >= threshold] md[am, np.arange(md.shape[1])] = 0 for j, i in enumerate(am): if ma[j] >= beta: g.add_edge(i, j) return g, (np.log(edges).sum() + np.log(1 - ma[ma < threshold]).sum() + np.log(1 - md).sum()) def log_likelihood(delta_matrix, dist_matrix, alpha=60., beta=1.): m = dok_matrix((8238, 8238), dtype=np.float64) for ((i, j), delta) in delta_matrix.iteritems(): m[i, j] = weight(delta, dist_matrix[i, j], alpha) g = construct_graph(m) m = np.array([w for _, w in m.items()]) n, e = g.number_of_nodes(), g.number_of_edges() c = nx.number_weakly_connected_components(g) l = (np.sum(np.log(1 - m[m < 0.5])) + np.sum(np.log(m[m >= 0.5]))) l = l + c * log(beta) cnt = Counter() print "Nodes:{0}, Edges:{1}, Cascades:{2}, Alpha:{4}, Beta:{5}, "\ "LL:{3}".format(n, e, c, l, alpha, beta) for c in nx.weakly_connected_components(g): cnt[len(c)] += 1 print cnt.most_common() return l def log_likelihood2((alpha, beta), delta_matrix, dist_matrix): m = dok_matrix((8238, 8238), dtype=np.float64) for ((i, j), delta) in delta_matrix.iteritems(): m[i, j] = weight(delta, dist_matrix[i, j], alpha) g, l = construct_graph2(m, beta) n, e = g.number_of_nodes(), g.number_of_edges() c = nx.number_weakly_connected_components(g) l = (l + c * log(beta) + (123506 - c) * log(1 - beta) + log(0.15) * 36186 + log(0.15 * 0.57) * 41711 + log(0.15 * 0.57 * 0.57) * 23264) # cnt = Counter() # print "Nodes:{0}, Edges:{1}, Cascades:{2}, Alpha:{4}, Beta:{5}, "\ # "LL:{3}".format(n, e, c, l, alpha, beta) # for c in nx.weakly_connected_components(g): # cnt[len(c)] += 1 # print cnt.most_common() #print beta, alpha, l return -l def accept_test(**kwargs): bounds = [(1, 100), (0.01, 0.5)] x = kwargs["x_new"] l = [bounds[i][0] <= e <= bounds[i][1] for i, e in enumerate(x)] print x, l if __name__ == "__main__": delta, dist = construct_matrix(sys.argv[1]) basinhopping(log_likelihood2, [10, 0.1], minimizer_kwargs={"args": (delta, dist), "bounds": [(1, 100), (0.01, 0.5)]}, accept_test=accept_test, disp=True)