6 files changed, 0 insertions, 515 deletions
diff --git a/jpa_test/algorithms.py b/jpa_test/algorithms.py
deleted file mode 100644
index 0e240c9..0000000
--- a/jpa_test/algorithms.py
+++ /dev/null
@@ -1,103 +0,0 @@
-import numpy as np
-import networkx as nx
-import cascade_creation
-from collections import Counter
-from itertools import izip
-import convex_optimization
-import timeout
-
-
-def greedy_prediction(G, cascades):
-    """
-    Returns estimated graph from Greedy algorithm in "Learning Epidemic ..."
-    Only words for independent cascade model!
-    """
-    G_hat = cascade_creation.InfluenceGraph(max_proba=None)
-    G_hat.add_nodes_from(G.nodes())
-    for node in G_hat.nodes():
-        print node
-        # Avoid cases where infection time is None or 0
-        tmp = [cascade for cascade in cascades if cascade.infection_time(node)
-                                                                           [0]]
-        while tmp:
-            parents = Counter()
-            for cascade in tmp:
-                    parents += cascade.candidate_infectors(node)
-            parent = parents.most_common(1)[0][0]
-            G_hat.add_edge(parent, node)
-            tmp = [cascade for cascade in tmp if (
-                    cascade.infection_time(parent)[0] is not None and
-                    cascade.infection_time(parent)[0]+1 not in
-                        cascade.infection_time(node))]
-    return G_hat
-
-
-def recovery_l1obj_l2constraint(G, cascades, floor_cstt, passed_function,
-                                *args, **kwargs):
-    """
-    Returns estimated graph from convex program specified by passed_function
-    passed_function should have similar structure to ones in convex_optimation
-    """
-    G_hat = cascade_creation.InfluenceGraph(max_proba=None)
-    G_hat.add_nodes_from(G.nodes())
-    for node in G_hat.nodes():
-        print node
-        try:
-            M, w = cascade_creation.icc_matrixvector_for_node(cascades, node)
-            p_node, __ = passed_function(M,w, *args, **kwargs)
-            G_hat = cascade_creation.add_edges_from_proba_vector(G=G_hat,
-                        p_node=p_node, node=node, floor_cstt=floor_cstt)
-        except timeout.TimeoutError:
-            print "TimeoutError, skipping to next node"
-    return G_hat
-
-
-def correctness_measure(G, G_hat, print_values=False):
-    """
-    Measures correctness of estimated graph G_hat to ground truth G
-    """
-    edges = set(G.edges())
-    edges_hat = set(G_hat.edges())
-    fp = len(edges_hat - edges)
-    fn = len(edges - edges_hat)
-    tp = len(edges | edges_hat)
-    tn = G.number_of_nodes() ** 2 - fp - fn - tp
-
-    #Other metrics
-    precision = 1. * tp / (tp + fp)
-    recall = 1. * tp / (tp + fn)
-    f1_score = 2.* tp / (2 * tp + fp + fn)
-
-    if print_values:
-        print "False Positives: {}".format(fp)
-        print "False Negatives: {}".format(fn)
-        print "True Positives: {}".format(tp)
-        print "True Negatives: {}".format(tn)
-        print "-------------------------------"
-        print "Precision: {}".format(precision)
-        print "Recall: {}".format(recall)
-        print "F1 score: {}".format(f1_score)
-
-    return fp, fn, tp, tn
-
-
-def test():
-    """
-    unit test
-    """
-    G = cascade_creation.InfluenceGraph(max_proba = .8)
-    G.erdos_init(n = 100, p = .05)
-    import time
-    t0 = time.time()
-    A = cascade_creation.generate_cascades(G, .2, 10000)
-    if 1:
-        G_hat = greedy_prediction(G, A)
-    if 0:
-        G_hat = recovery_l1obj_l2constraint(G, A,
-                passed_function=convex_optimization.l1obj_l2penalization,
-                floor_cstt=.1, lbda=10)
-    correctness_measure(G, G_hat, print_values=True)
-
-
-if __name__=="__main__":
-    test()
diff --git a/jpa_test/cascade_creation.py b/jpa_test/cascade_creation.py
deleted file mode 100644
index 9a26c03..0000000
--- a/jpa_test/cascade_creation.py
+++ /dev/null
@@ -1,162 +0,0 @@
-import networkx as nx
-import numpy as np
-import collections
-from itertools import izip
-from sklearn.preprocessing import normalize
-
-class InfluenceGraph(nx.Graph):
-    """
-    networkX graph with mat and logmat attributes
-    """
-    def __init__(self, max_proba, *args, **kwargs):
-        self.max_proba = max_proba
-        super(InfluenceGraph, self).__init__(*args, **kwargs)
-
-    def erdos_init(self, n, p):
-        G = nx.erdos_renyi_graph(n, p, directed=True)
-        self.add_edges_from(G.edges())
-
-    def import_from_file(self, file_name):
-        """
-        Takes a file from the Stanford collection of networks
-        """
-        with open(file_name, 'r') as f:
-            for edge in f:
-                if "#" not in edge:
-                    u, v = [int(node) for node in edge.split()]
-                    self.add_edge(u, v)
-
-    @property
-    def mat(self):
-        if not hasattr(self, '_mat'):
-            self._mat = (self.max_proba * np.random.rand(len(self), len(self))
-                         * np.asarray(nx.adjacency_matrix(self)))
-        return self._mat
-
-    @property
-    def logmat(self):
-        if not hasattr(self, '_logmat'):
-            self._logmat = np.log(1 - self.mat)
-        return self._logmat
-
-
-class Cascade(list):
-    """
-    Cascade object: list with attributes
-    """
-    def __init__(self, *args, **kwargs):
-        super(Cascade, self).__init__(*args, **kwargs)
-
-    def infection_time(self, node):
-        """
-        Returns lists of infections times for node i in cascade
-        """
-        infected_times = []
-        for t, infected_set in izip(xrange(len(self)), self):
-            if infected_set[node]:
-                infected_times.append(t)
-        if not infected_times:
-            infected_times.append(None)
-        return infected_times
-
-    def candidate_infectors(self, node):
-        """
-        Returns Counter of nodes infected just before node i was
-        """
-        candidate_infectors = collections.Counter()
-        for t in self.infection_time(node):
-            if t:  # avoid cases where t=0 or t is None
-                candidate_infectors.update(np.where(self[t-1])[0])
-        return candidate_infectors
-
-
-def icc_cascade(G, p_init):
-    """
-    Returns boolean vectors for one cascade
-    True means node was active at that time step
-    p_init: proba that node in seed set
-    """
-    susceptible = np.ones(G.number_of_nodes(), dtype=bool)
-    active = np.random.rand(G.number_of_nodes()) < p_init
-    susceptible = susceptible & np.logical_not(active)
-    cascade = Cascade()
-    while active.any():
-        cascade.append(active)
-        active = np.exp(np.dot(G.logmat, active)) \
-                    < np.random.rand(G.number_of_nodes())
-        active = active & susceptible
-        susceptible = susceptible & np.logical_not(active)
-    if not cascade:
-        print "Empty cascade, consider changing p_init or n_nodes. Retrying."
-        return icc_cascade(G, p_init)
-    return cascade
-
-
-def generate_cascades(G, p_init, n_cascades):
-    """
-    returns list of cascades
-    """""
-    return [icc_cascade(G,p_init) for i in xrange(n_cascades)]
-
-
-def icc_matrixvector_for_node(cascades, node):
-    """
-    for the ICC model:
-    Returns M, w in matrix form where rows of M are i = t + k.T
-    Excludes all (t,k) after node infection time; w = 1_{infected}
-    """
-    #TODO: you need to remove the variable corresponding to the node
-    #you are solving for!!!!
-    if node is None:
-        return np.vstack(cascades), None
-    else:
-        w = []
-        M = []
-        for cascade in cascades:
-            t_i = cascade.infection_time(node)[0]
-            if t_i != 0:
-                indicator = np.zeros(len(cascade[:t_i]))
-                if t_i > 0:
-                    indicator[-1] = 1
-                w.append(indicator)
-                M.append(np.array(cascade[:t_i]))
-        M = np.vstack(M)
-        w = np.hstack(w)
-    return M, w
-
-
-def normalize_matrix(M):
-    """
-    returns M with normalized (L_1 norm) columns
-    """
-    return normalize(M.astype("float32"), axis=0, norm="l2")
-
-
-def add_edges_from_proba_vector(G, p_node, node, floor_cstt):
-    """
-    Takes proba vector, node and adds edges to G by flooring very small
-    probabilities
-    Also updates G's mat matrix
-    """
-    floor_parent = np.nonzero(p_node*(p_node > floor_cstt))
-    for parent in floor_parent[0]:
-        G.add_edge(parent, node)
-    #TODO: update G's mat matrix
-    return G
-
-
-def test():
-    """
-    unit test
-    """
-    G = InfluenceGraph(max_proba = .3)
-    G.erdos_init(n = 10, p = 1)
-    import time
-    t0 = time.time()
-    A = generate_cascades(G, .1, 2)
-    M, w = icc_matrixvector_for_node(A, 0)
-    t1 = time.time()
-    print t1 - t0
-
-if __name__ == "__main__":
-    test()
diff --git a/jpa_test/convex_optimization.py b/jpa_test/convex_optimization.py
deleted file mode 100644
index 530e3f5..0000000
--- a/jpa_test/convex_optimization.py
+++ /dev/null
@@ -1,135 +0,0 @@
-import theano
-import cascade_creation
-from theano import tensor, function
-import numpy as np
-import timeout
-import cvxopt
-
-
-@timeout.timeout(10)
-def l1obj_l2constraint(M_val, w_val):
-    """
-    Solves:
-        min - sum_j theta_j
-        s.t theta_j <= 0
-            |e^{M*theta} - (1 - w)|_2 <= 1
-    """
-    assert len(M_val) == len(w_val)
-
-    if M_val.dtype == bool:
-        M_val = M_val.astype('float32')
-
-    m, n = M_val.shape
-    c = cvxopt.matrix(-1.0, (n,1))
-
-    theta = tensor.row().T
-    z = tensor.row().T
-    theta_ = theta.flatten()
-    z_ = z.flatten()
-    M = theano.shared(M_val.astype(theano.config.floatX))
-    w = theano.shared(w_val.astype(theano.config.floatX))
-    y = (tensor.exp(M.dot(theta_)) - (1 - w)).norm(2) - 1
-    y_diff = tensor.grad(y, theta_)
-    y_hess = z[0] * theano.gradient.hessian(y, theta_)
-    f_x = theano.function([theta], [y, y_diff], allow_input_downcast=True)
-    f_xz = theano.function([theta, z], [y, y_diff, y_hess],
-                            allow_input_downcast=True)
-
-    def F(x=None, z=None):
-        if x is None:
-            return 1, cvxopt.matrix(.0001, (n,1))
-        elif z is None:
-            y, y_diff = f_x(x)
-            return cvxopt.matrix(float(y), (1, 1)),\
-                    cvxopt.matrix(y_diff.astype("float64")).T
-        else:
-            y, y_diff, y_hess = f_xz(x, z)
-            return cvxopt.matrix(float(y), (1, 1)), \
-                    cvxopt.matrix(y_diff.astype("float64")).T, \
-                    cvxopt.matrix(y_hess.astype("float64"))
-
-    G = cvxopt.spdiag([1 for i in xrange(n)])
-    h = cvxopt.matrix(0.0, (n,1))
-
-    cvxopt.solvers.options['show_progress'] = False
-    try:
-        theta = cvxopt.solvers.cpl(c,F, G, h)['x']
-    except ArithmeticError:
-        print "ArithmeticError thrown, change initial point"+\
-              " given to the solver"
-
-    return 1 - np.exp(theta), theta
-
-
-@timeout.timeout(10)
-def l1obj_l2penalization(M_val, w_val, lbda):
-    """
-    Solves:
-        min - sum_j theta_j + lbda*|e^{M*theta} - (1 - w)|_2
-        s.t theta_j <= 0
-    """
-    assert len(M_val) == len(w_val)
-
-    if M_val.dtype == bool:
-        M_val = M_val.astype('float32')
-
-    if type(lbda) == int:
-        lbda = np.array(lbda)
-
-    m, n = M_val.shape
-
-    theta = tensor.row().T
-    z = tensor.row().T
-    theta_ = theta.flatten()
-    z_ = z.flatten()
-    M = theano.shared(M_val.astype(theano.config.floatX))
-    w = theano.shared(w_val.astype(theano.config.floatX))
-    lbda = theano.shared(lbda.astype(theano.config.floatX))
-    y = (theta_).norm(1) + lbda * (
-        tensor.exp(M.dot(theta_)) - (1 - w)).norm(2)
-    y_diff = tensor.grad(y, theta_)
-    y_hess = z[0] * theano.gradient.hessian(y, theta_)
-    f_x = theano.function([theta], [y, y_diff], allow_input_downcast=True)
-    f_xz = theano.function([theta, z], [y, y_diff, y_hess],
-                           allow_input_downcast=True)
-
-    def F(x=None, z=None):
-        if x is None:
-            return 0, cvxopt.matrix(.0001, (n,1))
-        elif z is None:
-            y, y_diff = f_x(x)
-            return cvxopt.matrix(float(y), (1, 1)),\
-                   cvxopt.matrix(y_diff.astype("float64")).T
-        else:
-            y, y_diff, y_hess = f_xz(x, z)
-            return cvxopt.matrix(float(y), (1, 1)), \
-                    cvxopt.matrix(y_diff.astype("float64")).T, \
-                    cvxopt.matrix(y_hess.astype("float64"))
-
-    G = cvxopt.spdiag([1 for i in xrange(n)])
-    h = cvxopt.matrix(0.0, (n,1))
-
-    cvxopt.solvers.options['show_progress'] = False
-    try:
-        theta = cvxopt.solvers.cp(F, G, h)['x']
-    except ArithmeticError:
-        print "ArithmeticError thrown, change initial point"+\
-              " given to the solver"
-
-    return 1 - np.exp(theta), theta
-
-
-def test():
-    """
-    unit test
-    """
-    lbda = 10
-    G = cascade_creation.InfluenceGraph(max_proba=.8)
-    G.erdos_init(n=100, p = .1)
-    A = cascade_creation.generate_cascades(G, .1, 2000)
-    M_val, w_val = cascade_creation.icc_matrixvector_for_node(A, 0)
-    p_vec, theta = l1obj_l2penalization(M_val, w_val, lbda)
-    print p_vec
-
-if __name__=="__main__":
-    test()
diff --git a/jpa_test/make_plots.py b/jpa_test/make_plots.py
deleted file mode 100644
index e3c0d3d..0000000
--- a/jpa_test/make_plots.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import matplotlib.pyplot as plt
-import numpy as np
-import cascade_creation
-import rip_condition
-
-
-def plot_rip_numberofnodes(max_proba, n_min, n_max, p_init, n_cascades, K_max):
-    """
-    Plots the RIP constant for varying number of nodes (n_max included)
-    """
-    x = np.arange(n_min, n_max+1)
-    y = []
-
-    for n_nodes in x:
-        print n_nodes
-        G = cascade_creation.InfluenceGraph(max_proba=.3)
-        G.erdos_init(n=n_nodes, p=.1) #TODO: handle different inits!
-        cascades = cascade_creation.generate_cascades(G, p_init=p_init,
-                                                      n_cascades=n_cascades)
-        M, __ = cascade_creation.icc_matrixvector_for_node(cascades, None)
-        M = cascade_creation.normalize_matrix(M)
-        y.append(rip_condition.find_kth_rip_constants(M, 4)) #
-
-    print y
-
-    plt.clf()
-    plt.plot(x, y)
-    #plt.show()
-
-    return x, y
-
-
-def test():
-    """
-    unit test
-    """
-    plot_rip_numberofnodes(max_proba=.3, n_min=30, n_max=30,
-                            p_init=.01, n_cascades=100, K_max=4)
-
-if __name__=="__main__":
-    test()
diff --git a/jpa_test/rip_condition.py b/jpa_test/rip_condition.py
deleted file mode 100644
index fccf9a4..0000000
--- a/jpa_test/rip_condition.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import numpy as np
-import cascade_creation
-import itertools
-from scipy import sparse
-from scipy.sparse import linalg
-
-def find_kth_rip_constants(M, k):
-    """
-    Returns max(A_1, A_2) where:
-    1 + A_1 = arg max |Mx|_2^2 s.t. |x|_2 = 1 and |x|_0 = k
-    1 - A_2 = arg min |Mx|_2^2 s.t. |x|_2 = 1 and |x|_0 = kx
-    """
-    delta = 0
-    print M.shape
-    for col_set in itertools.combinations(xrange(M.shape[1]), k):
-        M_kcol = M[:,list(col_set)]
-        delta_upper, delta_lower = upperlower_bound_rip(M_kcol)
-        delta = max(delta, max(delta_upper, delta_lower))
-    return delta
-
-
-def upperlower_bound_rip(M):
-    """
-    returns arg max/min |Mx|_2^2 s.t. |x|_2 = 1
-    which is the greatest eigenvalue value of M^T*M
-    or the square of the greatest singular value of M
-    """
-    M = sparse.csc_matrix(M)
-    s_upper = linalg.svds(M, 1, tol=.01, which ='LM', maxiter = 2000,
-                                return_singular_vectors=False)
-    s_lower = linalg.svds(M, 1, tol=.01, which = 'SM', maxiter= 2000,
-                                return_singular_vectors=False)
-    return s_upper ** 2 - 1, 1 -s_lower ** 2
-
-
-def test():
-    """
-    unit test
-    """
-    G = cascade_creation.InfluenceGraph(max_proba=.3)
-    G.erdos_init(n=10, p =1)
-    cascades = cascade_creation.generate_cascades(G, p_init=.3, n_cascades=10)
-    M, __ = cascade_creation.icc_matrixvector_for_node(cascades, None)
-    M = cascade_creation.normalize_matrix(M)
-    print find_kth_rip_constants(M, 5)
-
-
-if __name__=="__main__":
-    test()
diff --git a/jpa_test/timeout.py b/jpa_test/timeout.py
deleted file mode 100644
index d7381c3..0000000
--- a/jpa_test/timeout.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from functools import wraps
-import errno
-import os
-import signal
-
-class TimeoutError(Exception):
-    pass
-
-def timeout(seconds=10, error_message=os.strerror(errno.ETIME)):
-    def decorator(func):
-        def _handle_timeout(signum, frame):
-            raise TimeoutError(error_message)
-
-        def wrapper(*args, **kwargs):
-            signal.signal(signal.SIGALRM, _handle_timeout)
-            signal.alarm(seconds)
-            try:
-                result = func(*args, **kwargs)
-            finally:
-                signal.alarm(0)
-            return result
-
-        return wraps(func)(wrapper)
-
-    return decorator