aboutsummaryrefslogtreecommitdiffstats
path: root/jpa_test
diff options
context:
space:
mode:
Diffstat (limited to 'jpa_test')
-rw-r--r--jpa_test/algorithms.py103
-rw-r--r--jpa_test/cascade_creation.py162
-rw-r--r--jpa_test/convex_optimization.py135
-rw-r--r--jpa_test/make_plots.py41
-rw-r--r--jpa_test/rip_condition.py49
-rw-r--r--jpa_test/timeout.py25
6 files changed, 0 insertions, 515 deletions
diff --git a/jpa_test/algorithms.py b/jpa_test/algorithms.py
deleted file mode 100644
index 0e240c9..0000000
--- a/jpa_test/algorithms.py
+++ /dev/null
@@ -1,103 +0,0 @@
-import numpy as np
-import networkx as nx
-import cascade_creation
-from collections import Counter
-from itertools import izip
-import convex_optimization
-import timeout
-
-
-def greedy_prediction(G, cascades):
- """
- Returns estimated graph from Greedy algorithm in "Learning Epidemic ..."
- Only words for independent cascade model!
- """
- G_hat = cascade_creation.InfluenceGraph(max_proba=None)
- G_hat.add_nodes_from(G.nodes())
- for node in G_hat.nodes():
- print node
- # Avoid cases where infection time is None or 0
- tmp = [cascade for cascade in cascades if cascade.infection_time(node)
- [0]]
- while tmp:
- parents = Counter()
- for cascade in tmp:
- parents += cascade.candidate_infectors(node)
- parent = parents.most_common(1)[0][0]
- G_hat.add_edge(parent, node)
- tmp = [cascade for cascade in tmp if (
- cascade.infection_time(parent)[0] is not None and
- cascade.infection_time(parent)[0]+1 not in
- cascade.infection_time(node))]
- return G_hat
-
-
-def recovery_l1obj_l2constraint(G, cascades, floor_cstt, passed_function,
- *args, **kwargs):
- """
- Returns estimated graph from convex program specified by passed_function
- passed_function should have similar structure to ones in convex_optimation
- """
- G_hat = cascade_creation.InfluenceGraph(max_proba=None)
- G_hat.add_nodes_from(G.nodes())
- for node in G_hat.nodes():
- print node
- try:
- M, w = cascade_creation.icc_matrixvector_for_node(cascades, node)
- p_node, __ = passed_function(M,w, *args, **kwargs)
- G_hat = cascade_creation.add_edges_from_proba_vector(G=G_hat,
- p_node=p_node, node=node, floor_cstt=floor_cstt)
- except timeout.TimeoutError:
- print "TimeoutError, skipping to next node"
- return G_hat
-
-
-def correctness_measure(G, G_hat, print_values=False):
- """
- Measures correctness of estimated graph G_hat to ground truth G
- """
- edges = set(G.edges())
- edges_hat = set(G_hat.edges())
- fp = len(edges_hat - edges)
- fn = len(edges - edges_hat)
- tp = len(edges | edges_hat)
- tn = G.number_of_nodes() ** 2 - fp - fn - tp
-
- #Other metrics
- precision = 1. * tp / (tp + fp)
- recall = 1. * tp / (tp + fn)
- f1_score = 2.* tp / (2 * tp + fp + fn)
-
- if print_values:
- print "False Positives: {}".format(fp)
- print "False Negatives: {}".format(fn)
- print "True Positives: {}".format(tp)
- print "True Negatives: {}".format(tn)
- print "-------------------------------"
- print "Precision: {}".format(precision)
- print "Recall: {}".format(recall)
- print "F1 score: {}".format(f1_score)
-
- return fp, fn, tp, tn
-
-
-def test():
- """
- unit test
- """
- G = cascade_creation.InfluenceGraph(max_proba = .8)
- G.erdos_init(n = 100, p = .05)
- import time
- t0 = time.time()
- A = cascade_creation.generate_cascades(G, .2, 10000)
- if 1:
- G_hat = greedy_prediction(G, A)
- if 0:
- G_hat = recovery_l1obj_l2constraint(G, A,
- passed_function=convex_optimization.l1obj_l2penalization,
- floor_cstt=.1, lbda=10)
- correctness_measure(G, G_hat, print_values=True)
-
-
-if __name__=="__main__":
- test()
diff --git a/jpa_test/cascade_creation.py b/jpa_test/cascade_creation.py
deleted file mode 100644
index 9a26c03..0000000
--- a/jpa_test/cascade_creation.py
+++ /dev/null
@@ -1,162 +0,0 @@
-import networkx as nx
-import numpy as np
-import collections
-from itertools import izip
-from sklearn.preprocessing import normalize
-
-class InfluenceGraph(nx.Graph):
- """
- networkX graph with mat and logmat attributes
- """
- def __init__(self, max_proba, *args, **kwargs):
- self.max_proba = max_proba
- super(InfluenceGraph, self).__init__(*args, **kwargs)
-
- def erdos_init(self, n, p):
- G = nx.erdos_renyi_graph(n, p, directed=True)
- self.add_edges_from(G.edges())
-
- def import_from_file(self, file_name):
- """
- Takes a file from the Stanford collection of networks
- """
- with open(file_name, 'r') as f:
- for edge in f:
- if "#" not in edge:
- u, v = [int(node) for node in edge.split()]
- self.add_edge(u, v)
-
- @property
- def mat(self):
- if not hasattr(self, '_mat'):
- self._mat = (self.max_proba * np.random.rand(len(self), len(self))
- * np.asarray(nx.adjacency_matrix(self)))
- return self._mat
-
- @property
- def logmat(self):
- if not hasattr(self, '_logmat'):
- self._logmat = np.log(1 - self.mat)
- return self._logmat
-
-
-class Cascade(list):
- """
- Cascade object: list with attributes
- """
- def __init__(self, *args, **kwargs):
- super(Cascade, self).__init__(*args, **kwargs)
-
- def infection_time(self, node):
- """
- Returns lists of infections times for node i in cascade
- """
- infected_times = []
- for t, infected_set in izip(xrange(len(self)), self):
- if infected_set[node]:
- infected_times.append(t)
- if not infected_times:
- infected_times.append(None)
- return infected_times
-
- def candidate_infectors(self, node):
- """
- Returns Counter of nodes infected just before node i was
- """
- candidate_infectors = collections.Counter()
- for t in self.infection_time(node):
- if t: # avoid cases where t=0 or t is None
- candidate_infectors.update(np.where(self[t-1])[0])
- return candidate_infectors
-
-
-def icc_cascade(G, p_init):
- """
- Returns boolean vectors for one cascade
- True means node was active at that time step
- p_init: proba that node in seed set
- """
- susceptible = np.ones(G.number_of_nodes(), dtype=bool)
- active = np.random.rand(G.number_of_nodes()) < p_init
- susceptible = susceptible & np.logical_not(active)
- cascade = Cascade()
- while active.any():
- cascade.append(active)
- active = np.exp(np.dot(G.logmat, active)) \
- < np.random.rand(G.number_of_nodes())
- active = active & susceptible
- susceptible = susceptible & np.logical_not(active)
- if not cascade:
- print "Empty cascade, consider changing p_init or n_nodes. Retrying."
- return icc_cascade(G, p_init)
- return cascade
-
-
-def generate_cascades(G, p_init, n_cascades):
- """
- returns list of cascades
- """""
- return [icc_cascade(G,p_init) for i in xrange(n_cascades)]
-
-
-def icc_matrixvector_for_node(cascades, node):
- """
- for the ICC model:
- Returns M, w in matrix form where rows of M are i = t + k.T
- Excludes all (t,k) after node infection time; w = 1_{infected}
- """
- #TODO: you need to remove the variable corresponding to the node
- #you are solving for!!!!
- if node is None:
- return np.vstack(cascades), None
- else:
- w = []
- M = []
- for cascade in cascades:
- t_i = cascade.infection_time(node)[0]
- if t_i != 0:
- indicator = np.zeros(len(cascade[:t_i]))
- if t_i > 0:
- indicator[-1] = 1
- w.append(indicator)
- M.append(np.array(cascade[:t_i]))
- M = np.vstack(M)
- w = np.hstack(w)
- return M, w
-
-
-def normalize_matrix(M):
- """
- returns M with normalized (L_1 norm) columns
- """
- return normalize(M.astype("float32"), axis=0, norm="l2")
-
-
-def add_edges_from_proba_vector(G, p_node, node, floor_cstt):
- """
- Takes proba vector, node and adds edges to G by flooring very small
- probabilities
- Also updates G's mat matrix
- """
- floor_parent = np.nonzero(p_node*(p_node > floor_cstt))
- for parent in floor_parent[0]:
- G.add_edge(parent, node)
- #TODO: update G's mat matrix
- return G
-
-
-def test():
- """
- unit test
- """
- G = InfluenceGraph(max_proba = .3)
- G.erdos_init(n = 10, p = 1)
- import time
- t0 = time.time()
- A = generate_cascades(G, .1, 2)
- M, w = icc_matrixvector_for_node(A, 0)
- t1 = time.time()
- print t1 - t0
-
-if __name__ == "__main__":
- test()
diff --git a/jpa_test/convex_optimization.py b/jpa_test/convex_optimization.py
deleted file mode 100644
index 530e3f5..0000000
--- a/jpa_test/convex_optimization.py
+++ /dev/null
@@ -1,135 +0,0 @@
-import theano
-import cascade_creation
-from theano import tensor, function
-import numpy as np
-import timeout
-import cvxopt
-
-
-@timeout.timeout(10)
-def l1obj_l2constraint(M_val, w_val):
- """
- Solves:
- min - sum_j theta_j
- s.t theta_j <= 0
- |e^{M*theta} - (1 - w)|_2 <= 1
- """
- assert len(M_val) == len(w_val)
-
- if M_val.dtype == bool:
- M_val = M_val.astype('float32')
-
- m, n = M_val.shape
- c = cvxopt.matrix(-1.0, (n,1))
-
- theta = tensor.row().T
- z = tensor.row().T
- theta_ = theta.flatten()
- z_ = z.flatten()
- M = theano.shared(M_val.astype(theano.config.floatX))
- w = theano.shared(w_val.astype(theano.config.floatX))
- y = (tensor.exp(M.dot(theta_)) - (1 - w)).norm(2) - 1
- y_diff = tensor.grad(y, theta_)
- y_hess = z[0] * theano.gradient.hessian(y, theta_)
- f_x = theano.function([theta], [y, y_diff], allow_input_downcast=True)
- f_xz = theano.function([theta, z], [y, y_diff, y_hess],
- allow_input_downcast=True)
-
- def F(x=None, z=None):
- if x is None:
- return 1, cvxopt.matrix(.0001, (n,1))
- elif z is None:
- y, y_diff = f_x(x)
- return cvxopt.matrix(float(y), (1, 1)),\
- cvxopt.matrix(y_diff.astype("float64")).T
- else:
- y, y_diff, y_hess = f_xz(x, z)
- return cvxopt.matrix(float(y), (1, 1)), \
- cvxopt.matrix(y_diff.astype("float64")).T, \
- cvxopt.matrix(y_hess.astype("float64"))
-
- G = cvxopt.spdiag([1 for i in xrange(n)])
- h = cvxopt.matrix(0.0, (n,1))
-
- cvxopt.solvers.options['show_progress'] = False
- try:
- theta = cvxopt.solvers.cpl(c,F, G, h)['x']
- except ArithmeticError:
- print "ArithmeticError thrown, change initial point"+\
- " given to the solver"
-
- return 1 - np.exp(theta), theta
-
-
-@timeout.timeout(10)
-def l1obj_l2penalization(M_val, w_val, lbda):
- """
- Solves:
- min - sum_j theta_j + lbda*|e^{M*theta} - (1 - w)|_2
- s.t theta_j <= 0
- """
- assert len(M_val) == len(w_val)
-
- if M_val.dtype == bool:
- M_val = M_val.astype('float32')
-
- if type(lbda) == int:
- lbda = np.array(lbda)
-
- m, n = M_val.shape
-
- theta = tensor.row().T
- z = tensor.row().T
- theta_ = theta.flatten()
- z_ = z.flatten()
- M = theano.shared(M_val.astype(theano.config.floatX))
- w = theano.shared(w_val.astype(theano.config.floatX))
- lbda = theano.shared(lbda.astype(theano.config.floatX))
- y = (theta_).norm(1) + lbda * (
- tensor.exp(M.dot(theta_)) - (1 - w)).norm(2)
- y_diff = tensor.grad(y, theta_)
- y_hess = z[0] * theano.gradient.hessian(y, theta_)
- f_x = theano.function([theta], [y, y_diff], allow_input_downcast=True)
- f_xz = theano.function([theta, z], [y, y_diff, y_hess],
- allow_input_downcast=True)
-
- def F(x=None, z=None):
- if x is None:
- return 0, cvxopt.matrix(.0001, (n,1))
- elif z is None:
- y, y_diff = f_x(x)
- return cvxopt.matrix(float(y), (1, 1)),\
- cvxopt.matrix(y_diff.astype("float64")).T
- else:
- y, y_diff, y_hess = f_xz(x, z)
- return cvxopt.matrix(float(y), (1, 1)), \
- cvxopt.matrix(y_diff.astype("float64")).T, \
- cvxopt.matrix(y_hess.astype("float64"))
-
- G = cvxopt.spdiag([1 for i in xrange(n)])
- h = cvxopt.matrix(0.0, (n,1))
-
- cvxopt.solvers.options['show_progress'] = False
- try:
- theta = cvxopt.solvers.cp(F, G, h)['x']
- except ArithmeticError:
- print "ArithmeticError thrown, change initial point"+\
- " given to the solver"
-
- return 1 - np.exp(theta), theta
-
-
-def test():
- """
- unit test
- """
- lbda = 10
- G = cascade_creation.InfluenceGraph(max_proba=.8)
- G.erdos_init(n=100, p = .1)
- A = cascade_creation.generate_cascades(G, .1, 2000)
- M_val, w_val = cascade_creation.icc_matrixvector_for_node(A, 0)
- p_vec, theta = l1obj_l2penalization(M_val, w_val, lbda)
- print p_vec
-
-if __name__=="__main__":
- test()
diff --git a/jpa_test/make_plots.py b/jpa_test/make_plots.py
deleted file mode 100644
index e3c0d3d..0000000
--- a/jpa_test/make_plots.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import matplotlib.pyplot as plt
-import numpy as np
-import cascade_creation
-import rip_condition
-
-
-def plot_rip_numberofnodes(max_proba, n_min, n_max, p_init, n_cascades, K_max):
- """
- Plots the RIP constant for varying number of nodes (n_max included)
- """
- x = np.arange(n_min, n_max+1)
- y = []
-
- for n_nodes in x:
- print n_nodes
- G = cascade_creation.InfluenceGraph(max_proba=.3)
- G.erdos_init(n=n_nodes, p=.1) #TODO: handle different inits!
- cascades = cascade_creation.generate_cascades(G, p_init=p_init,
- n_cascades=n_cascades)
- M, __ = cascade_creation.icc_matrixvector_for_node(cascades, None)
- M = cascade_creation.normalize_matrix(M)
- y.append(rip_condition.find_kth_rip_constants(M, 4)) #
-
- print y
-
- plt.clf()
- plt.plot(x, y)
- #plt.show()
-
- return x, y
-
-
-def test():
- """
- unit test
- """
- plot_rip_numberofnodes(max_proba=.3, n_min=30, n_max=30,
- p_init=.01, n_cascades=100, K_max=4)
-
-if __name__=="__main__":
- test()
diff --git a/jpa_test/rip_condition.py b/jpa_test/rip_condition.py
deleted file mode 100644
index fccf9a4..0000000
--- a/jpa_test/rip_condition.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import numpy as np
-import cascade_creation
-import itertools
-from scipy import sparse
-from scipy.sparse import linalg
-
-def find_kth_rip_constants(M, k):
- """
- Returns max(A_1, A_2) where:
- 1 + A_1 = arg max |Mx|_2^2 s.t. |x|_2 = 1 and |x|_0 = k
- 1 - A_2 = arg min |Mx|_2^2 s.t. |x|_2 = 1 and |x|_0 = kx
- """
- delta = 0
- print M.shape
- for col_set in itertools.combinations(xrange(M.shape[1]), k):
- M_kcol = M[:,list(col_set)]
- delta_upper, delta_lower = upperlower_bound_rip(M_kcol)
- delta = max(delta, max(delta_upper, delta_lower))
- return delta
-
-
-def upperlower_bound_rip(M):
- """
- returns arg max/min |Mx|_2^2 s.t. |x|_2 = 1
- which is the greatest eigenvalue value of M^T*M
- or the square of the greatest singular value of M
- """
- M = sparse.csc_matrix(M)
- s_upper = linalg.svds(M, 1, tol=.01, which ='LM', maxiter = 2000,
- return_singular_vectors=False)
- s_lower = linalg.svds(M, 1, tol=.01, which = 'SM', maxiter= 2000,
- return_singular_vectors=False)
- return s_upper ** 2 - 1, 1 -s_lower ** 2
-
-
-def test():
- """
- unit test
- """
- G = cascade_creation.InfluenceGraph(max_proba=.3)
- G.erdos_init(n=10, p =1)
- cascades = cascade_creation.generate_cascades(G, p_init=.3, n_cascades=10)
- M, __ = cascade_creation.icc_matrixvector_for_node(cascades, None)
- M = cascade_creation.normalize_matrix(M)
- print find_kth_rip_constants(M, 5)
-
-
-if __name__=="__main__":
- test()
diff --git a/jpa_test/timeout.py b/jpa_test/timeout.py
deleted file mode 100644
index d7381c3..0000000
--- a/jpa_test/timeout.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from functools import wraps
-import errno
-import os
-import signal
-
-class TimeoutError(Exception):
- pass
-
-def timeout(seconds=10, error_message=os.strerror(errno.ETIME)):
- def decorator(func):
- def _handle_timeout(signum, frame):
- raise TimeoutError(error_message)
-
- def wrapper(*args, **kwargs):
- signal.signal(signal.SIGALRM, _handle_timeout)
- signal.alarm(seconds)
- try:
- result = func(*args, **kwargs)
- finally:
- signal.alarm(0)
- return result
-
- return wraps(func)(wrapper)
-
- return decorator