diff options
Diffstat (limited to 'jpa_test')
| -rw-r--r-- | jpa_test/algorithms.py | 103 | ||||
| -rw-r--r-- | jpa_test/cascade_creation.py | 162 | ||||
| -rw-r--r-- | jpa_test/convex_optimization.py | 135 | ||||
| -rw-r--r-- | jpa_test/make_plots.py | 41 | ||||
| -rw-r--r-- | jpa_test/rip_condition.py | 49 | ||||
| -rw-r--r-- | jpa_test/timeout.py | 25 |
6 files changed, 0 insertions, 515 deletions
diff --git a/jpa_test/algorithms.py b/jpa_test/algorithms.py deleted file mode 100644 index 0e240c9..0000000 --- a/jpa_test/algorithms.py +++ /dev/null @@ -1,103 +0,0 @@ -import numpy as np -import networkx as nx -import cascade_creation -from collections import Counter -from itertools import izip -import convex_optimization -import timeout - - -def greedy_prediction(G, cascades): - """ - Returns estimated graph from Greedy algorithm in "Learning Epidemic ..." - Only words for independent cascade model! - """ - G_hat = cascade_creation.InfluenceGraph(max_proba=None) - G_hat.add_nodes_from(G.nodes()) - for node in G_hat.nodes(): - print node - # Avoid cases where infection time is None or 0 - tmp = [cascade for cascade in cascades if cascade.infection_time(node) - [0]] - while tmp: - parents = Counter() - for cascade in tmp: - parents += cascade.candidate_infectors(node) - parent = parents.most_common(1)[0][0] - G_hat.add_edge(parent, node) - tmp = [cascade for cascade in tmp if ( - cascade.infection_time(parent)[0] is not None and - cascade.infection_time(parent)[0]+1 not in - cascade.infection_time(node))] - return G_hat - - -def recovery_l1obj_l2constraint(G, cascades, floor_cstt, passed_function, - *args, **kwargs): - """ - Returns estimated graph from convex program specified by passed_function - passed_function should have similar structure to ones in convex_optimation - """ - G_hat = cascade_creation.InfluenceGraph(max_proba=None) - G_hat.add_nodes_from(G.nodes()) - for node in G_hat.nodes(): - print node - try: - M, w = cascade_creation.icc_matrixvector_for_node(cascades, node) - p_node, __ = passed_function(M,w, *args, **kwargs) - G_hat = cascade_creation.add_edges_from_proba_vector(G=G_hat, - p_node=p_node, node=node, floor_cstt=floor_cstt) - except timeout.TimeoutError: - print "TimeoutError, skipping to next node" - return G_hat - - -def correctness_measure(G, G_hat, print_values=False): - """ - Measures correctness of estimated graph G_hat to ground truth G - """ - edges = set(G.edges()) - edges_hat = set(G_hat.edges()) - fp = len(edges_hat - edges) - fn = len(edges - edges_hat) - tp = len(edges | edges_hat) - tn = G.number_of_nodes() ** 2 - fp - fn - tp - - #Other metrics - precision = 1. * tp / (tp + fp) - recall = 1. * tp / (tp + fn) - f1_score = 2.* tp / (2 * tp + fp + fn) - - if print_values: - print "False Positives: {}".format(fp) - print "False Negatives: {}".format(fn) - print "True Positives: {}".format(tp) - print "True Negatives: {}".format(tn) - print "-------------------------------" - print "Precision: {}".format(precision) - print "Recall: {}".format(recall) - print "F1 score: {}".format(f1_score) - - return fp, fn, tp, tn - - -def test(): - """ - unit test - """ - G = cascade_creation.InfluenceGraph(max_proba = .8) - G.erdos_init(n = 100, p = .05) - import time - t0 = time.time() - A = cascade_creation.generate_cascades(G, .2, 10000) - if 1: - G_hat = greedy_prediction(G, A) - if 0: - G_hat = recovery_l1obj_l2constraint(G, A, - passed_function=convex_optimization.l1obj_l2penalization, - floor_cstt=.1, lbda=10) - correctness_measure(G, G_hat, print_values=True) - - -if __name__=="__main__": - test() diff --git a/jpa_test/cascade_creation.py b/jpa_test/cascade_creation.py deleted file mode 100644 index 9a26c03..0000000 --- a/jpa_test/cascade_creation.py +++ /dev/null @@ -1,162 +0,0 @@ -import networkx as nx -import numpy as np -import collections -from itertools import izip -from sklearn.preprocessing import normalize - -class InfluenceGraph(nx.Graph): - """ - networkX graph with mat and logmat attributes - """ - def __init__(self, max_proba, *args, **kwargs): - self.max_proba = max_proba - super(InfluenceGraph, self).__init__(*args, **kwargs) - - def erdos_init(self, n, p): - G = nx.erdos_renyi_graph(n, p, directed=True) - self.add_edges_from(G.edges()) - - def import_from_file(self, file_name): - """ - Takes a file from the Stanford collection of networks - """ - with open(file_name, 'r') as f: - for edge in f: - if "#" not in edge: - u, v = [int(node) for node in edge.split()] - self.add_edge(u, v) - - @property - def mat(self): - if not hasattr(self, '_mat'): - self._mat = (self.max_proba * np.random.rand(len(self), len(self)) - * np.asarray(nx.adjacency_matrix(self))) - return self._mat - - @property - def logmat(self): - if not hasattr(self, '_logmat'): - self._logmat = np.log(1 - self.mat) - return self._logmat - - -class Cascade(list): - """ - Cascade object: list with attributes - """ - def __init__(self, *args, **kwargs): - super(Cascade, self).__init__(*args, **kwargs) - - def infection_time(self, node): - """ - Returns lists of infections times for node i in cascade - """ - infected_times = [] - for t, infected_set in izip(xrange(len(self)), self): - if infected_set[node]: - infected_times.append(t) - if not infected_times: - infected_times.append(None) - return infected_times - - def candidate_infectors(self, node): - """ - Returns Counter of nodes infected just before node i was - """ - candidate_infectors = collections.Counter() - for t in self.infection_time(node): - if t: # avoid cases where t=0 or t is None - candidate_infectors.update(np.where(self[t-1])[0]) - return candidate_infectors - - -def icc_cascade(G, p_init): - """ - Returns boolean vectors for one cascade - True means node was active at that time step - p_init: proba that node in seed set - """ - susceptible = np.ones(G.number_of_nodes(), dtype=bool) - active = np.random.rand(G.number_of_nodes()) < p_init - susceptible = susceptible & np.logical_not(active) - cascade = Cascade() - while active.any(): - cascade.append(active) - active = np.exp(np.dot(G.logmat, active)) \ - < np.random.rand(G.number_of_nodes()) - active = active & susceptible - susceptible = susceptible & np.logical_not(active) - if not cascade: - print "Empty cascade, consider changing p_init or n_nodes. Retrying." - return icc_cascade(G, p_init) - return cascade - - -def generate_cascades(G, p_init, n_cascades): - """ - returns list of cascades - """"" - return [icc_cascade(G,p_init) for i in xrange(n_cascades)] - - -def icc_matrixvector_for_node(cascades, node): - """ - for the ICC model: - Returns M, w in matrix form where rows of M are i = t + k.T - Excludes all (t,k) after node infection time; w = 1_{infected} - """ - #TODO: you need to remove the variable corresponding to the node - #you are solving for!!!! - if node is None: - return np.vstack(cascades), None - else: - w = [] - M = [] - for cascade in cascades: - t_i = cascade.infection_time(node)[0] - if t_i != 0: - indicator = np.zeros(len(cascade[:t_i])) - if t_i > 0: - indicator[-1] = 1 - w.append(indicator) - M.append(np.array(cascade[:t_i])) - M = np.vstack(M) - w = np.hstack(w) - return M, w - - -def normalize_matrix(M): - """ - returns M with normalized (L_1 norm) columns - """ - return normalize(M.astype("float32"), axis=0, norm="l2") - - -def add_edges_from_proba_vector(G, p_node, node, floor_cstt): - """ - Takes proba vector, node and adds edges to G by flooring very small - probabilities - Also updates G's mat matrix - """ - floor_parent = np.nonzero(p_node*(p_node > floor_cstt)) - for parent in floor_parent[0]: - G.add_edge(parent, node) - #TODO: update G's mat matrix - return G - - -def test(): - """ - unit test - """ - G = InfluenceGraph(max_proba = .3) - G.erdos_init(n = 10, p = 1) - import time - t0 = time.time() - A = generate_cascades(G, .1, 2) - M, w = icc_matrixvector_for_node(A, 0) - t1 = time.time() - print t1 - t0 - -if __name__ == "__main__": - test() diff --git a/jpa_test/convex_optimization.py b/jpa_test/convex_optimization.py deleted file mode 100644 index 530e3f5..0000000 --- a/jpa_test/convex_optimization.py +++ /dev/null @@ -1,135 +0,0 @@ -import theano -import cascade_creation -from theano import tensor, function -import numpy as np -import timeout -import cvxopt - - -@timeout.timeout(10) -def l1obj_l2constraint(M_val, w_val): - """ - Solves: - min - sum_j theta_j - s.t theta_j <= 0 - |e^{M*theta} - (1 - w)|_2 <= 1 - """ - assert len(M_val) == len(w_val) - - if M_val.dtype == bool: - M_val = M_val.astype('float32') - - m, n = M_val.shape - c = cvxopt.matrix(-1.0, (n,1)) - - theta = tensor.row().T - z = tensor.row().T - theta_ = theta.flatten() - z_ = z.flatten() - M = theano.shared(M_val.astype(theano.config.floatX)) - w = theano.shared(w_val.astype(theano.config.floatX)) - y = (tensor.exp(M.dot(theta_)) - (1 - w)).norm(2) - 1 - y_diff = tensor.grad(y, theta_) - y_hess = z[0] * theano.gradient.hessian(y, theta_) - f_x = theano.function([theta], [y, y_diff], allow_input_downcast=True) - f_xz = theano.function([theta, z], [y, y_diff, y_hess], - allow_input_downcast=True) - - def F(x=None, z=None): - if x is None: - return 1, cvxopt.matrix(.0001, (n,1)) - elif z is None: - y, y_diff = f_x(x) - return cvxopt.matrix(float(y), (1, 1)),\ - cvxopt.matrix(y_diff.astype("float64")).T - else: - y, y_diff, y_hess = f_xz(x, z) - return cvxopt.matrix(float(y), (1, 1)), \ - cvxopt.matrix(y_diff.astype("float64")).T, \ - cvxopt.matrix(y_hess.astype("float64")) - - G = cvxopt.spdiag([1 for i in xrange(n)]) - h = cvxopt.matrix(0.0, (n,1)) - - cvxopt.solvers.options['show_progress'] = False - try: - theta = cvxopt.solvers.cpl(c,F, G, h)['x'] - except ArithmeticError: - print "ArithmeticError thrown, change initial point"+\ - " given to the solver" - - return 1 - np.exp(theta), theta - - -@timeout.timeout(10) -def l1obj_l2penalization(M_val, w_val, lbda): - """ - Solves: - min - sum_j theta_j + lbda*|e^{M*theta} - (1 - w)|_2 - s.t theta_j <= 0 - """ - assert len(M_val) == len(w_val) - - if M_val.dtype == bool: - M_val = M_val.astype('float32') - - if type(lbda) == int: - lbda = np.array(lbda) - - m, n = M_val.shape - - theta = tensor.row().T - z = tensor.row().T - theta_ = theta.flatten() - z_ = z.flatten() - M = theano.shared(M_val.astype(theano.config.floatX)) - w = theano.shared(w_val.astype(theano.config.floatX)) - lbda = theano.shared(lbda.astype(theano.config.floatX)) - y = (theta_).norm(1) + lbda * ( - tensor.exp(M.dot(theta_)) - (1 - w)).norm(2) - y_diff = tensor.grad(y, theta_) - y_hess = z[0] * theano.gradient.hessian(y, theta_) - f_x = theano.function([theta], [y, y_diff], allow_input_downcast=True) - f_xz = theano.function([theta, z], [y, y_diff, y_hess], - allow_input_downcast=True) - - def F(x=None, z=None): - if x is None: - return 0, cvxopt.matrix(.0001, (n,1)) - elif z is None: - y, y_diff = f_x(x) - return cvxopt.matrix(float(y), (1, 1)),\ - cvxopt.matrix(y_diff.astype("float64")).T - else: - y, y_diff, y_hess = f_xz(x, z) - return cvxopt.matrix(float(y), (1, 1)), \ - cvxopt.matrix(y_diff.astype("float64")).T, \ - cvxopt.matrix(y_hess.astype("float64")) - - G = cvxopt.spdiag([1 for i in xrange(n)]) - h = cvxopt.matrix(0.0, (n,1)) - - cvxopt.solvers.options['show_progress'] = False - try: - theta = cvxopt.solvers.cp(F, G, h)['x'] - except ArithmeticError: - print "ArithmeticError thrown, change initial point"+\ - " given to the solver" - - return 1 - np.exp(theta), theta - - -def test(): - """ - unit test - """ - lbda = 10 - G = cascade_creation.InfluenceGraph(max_proba=.8) - G.erdos_init(n=100, p = .1) - A = cascade_creation.generate_cascades(G, .1, 2000) - M_val, w_val = cascade_creation.icc_matrixvector_for_node(A, 0) - p_vec, theta = l1obj_l2penalization(M_val, w_val, lbda) - print p_vec - -if __name__=="__main__": - test() diff --git a/jpa_test/make_plots.py b/jpa_test/make_plots.py deleted file mode 100644 index e3c0d3d..0000000 --- a/jpa_test/make_plots.py +++ /dev/null @@ -1,41 +0,0 @@ -import matplotlib.pyplot as plt -import numpy as np -import cascade_creation -import rip_condition - - -def plot_rip_numberofnodes(max_proba, n_min, n_max, p_init, n_cascades, K_max): - """ - Plots the RIP constant for varying number of nodes (n_max included) - """ - x = np.arange(n_min, n_max+1) - y = [] - - for n_nodes in x: - print n_nodes - G = cascade_creation.InfluenceGraph(max_proba=.3) - G.erdos_init(n=n_nodes, p=.1) #TODO: handle different inits! - cascades = cascade_creation.generate_cascades(G, p_init=p_init, - n_cascades=n_cascades) - M, __ = cascade_creation.icc_matrixvector_for_node(cascades, None) - M = cascade_creation.normalize_matrix(M) - y.append(rip_condition.find_kth_rip_constants(M, 4)) # - - print y - - plt.clf() - plt.plot(x, y) - #plt.show() - - return x, y - - -def test(): - """ - unit test - """ - plot_rip_numberofnodes(max_proba=.3, n_min=30, n_max=30, - p_init=.01, n_cascades=100, K_max=4) - -if __name__=="__main__": - test() diff --git a/jpa_test/rip_condition.py b/jpa_test/rip_condition.py deleted file mode 100644 index fccf9a4..0000000 --- a/jpa_test/rip_condition.py +++ /dev/null @@ -1,49 +0,0 @@ -import numpy as np -import cascade_creation -import itertools -from scipy import sparse -from scipy.sparse import linalg - -def find_kth_rip_constants(M, k): - """ - Returns max(A_1, A_2) where: - 1 + A_1 = arg max |Mx|_2^2 s.t. |x|_2 = 1 and |x|_0 = k - 1 - A_2 = arg min |Mx|_2^2 s.t. |x|_2 = 1 and |x|_0 = kx - """ - delta = 0 - print M.shape - for col_set in itertools.combinations(xrange(M.shape[1]), k): - M_kcol = M[:,list(col_set)] - delta_upper, delta_lower = upperlower_bound_rip(M_kcol) - delta = max(delta, max(delta_upper, delta_lower)) - return delta - - -def upperlower_bound_rip(M): - """ - returns arg max/min |Mx|_2^2 s.t. |x|_2 = 1 - which is the greatest eigenvalue value of M^T*M - or the square of the greatest singular value of M - """ - M = sparse.csc_matrix(M) - s_upper = linalg.svds(M, 1, tol=.01, which ='LM', maxiter = 2000, - return_singular_vectors=False) - s_lower = linalg.svds(M, 1, tol=.01, which = 'SM', maxiter= 2000, - return_singular_vectors=False) - return s_upper ** 2 - 1, 1 -s_lower ** 2 - - -def test(): - """ - unit test - """ - G = cascade_creation.InfluenceGraph(max_proba=.3) - G.erdos_init(n=10, p =1) - cascades = cascade_creation.generate_cascades(G, p_init=.3, n_cascades=10) - M, __ = cascade_creation.icc_matrixvector_for_node(cascades, None) - M = cascade_creation.normalize_matrix(M) - print find_kth_rip_constants(M, 5) - - -if __name__=="__main__": - test() diff --git a/jpa_test/timeout.py b/jpa_test/timeout.py deleted file mode 100644 index d7381c3..0000000 --- a/jpa_test/timeout.py +++ /dev/null @@ -1,25 +0,0 @@ -from functools import wraps -import errno -import os -import signal - -class TimeoutError(Exception): - pass - -def timeout(seconds=10, error_message=os.strerror(errno.ETIME)): - def decorator(func): - def _handle_timeout(signum, frame): - raise TimeoutError(error_message) - - def wrapper(*args, **kwargs): - signal.signal(signal.SIGALRM, _handle_timeout) - signal.alarm(seconds) - try: - result = func(*args, **kwargs) - finally: - signal.alarm(0) - return result - - return wraps(func)(wrapper) - - return decorator |
