convex_optimization first draft

author: jeanpouget-abadie <jean.pougetabadie@gmail.com> 2014-11-30 23:17:41 -0500
committer: jeanpouget-abadie <jean.pougetabadie@gmail.com> 2014-11-30 23:17:41 -0500
commit: b49e39e300f9d87310f6cce20018427a98f34486 (patch)
tree: 272262ef3f19a5170dfd74dadda37dd9c3b61fb8
parent: 2a6010634417eac9bf2ac4682ac3675dc5074518 (diff)
download: cascades-b49e39e300f9d87310f6cce20018427a98f34486.tar.gz
3 files changed, 92 insertions, 3 deletions
diff --git a/jpa_test/algorithms.py b/jpa_test/algorithms.py
index 2a32f57..cf4ce50 100644
--- a/jpa_test/algorithms.py
+++ b/jpa_test/algorithms.py
@@ -2,13 +2,14 @@ import numpy as np
 import networkx as nx
 import cascade_creation
 from collections import Counter
-
 from itertools import izip
+import convex_optimization
 
 
 def greedy_prediction(G, cascades):
     """
     Returns estimated graph from Greedy algorithm in "Learning Epidemic ..."
+    TODO: write cleaner code?
     """
     G_hat = cascade_creation.InfluenceGraph(max_proba=None)
     G_hat.add_nodes_from(G.nodes())
@@ -33,6 +34,19 @@ def greedy_prediction(G, cascades):
     return G_hat
 
 
+def sparserecovery(G, cascades):
+    """
+    Returns estimated graph from following convex program:
+    min |theta_1| + lbda | exp(M theta) -(1- w)|
+    where theta = log (1 - p); w = 1_{infected}; lbda = lagrange cstt
+    """
+    G_hat = cascade_creation.InfluenceGraph(max_proba=None)
+    G_hat.add_nodes_from(G.nodes())
+    for node in G_hat.nodes():
+        M, w = cascade_creation.icc_matrixvector_for_node(cascades, node)
+        edges_node = convex_optimization.l1regls(M,w)
+
+
 def correctness_measure(G, G_hat):
     """
     Measures correctness of estimated graph G_hat to ground truth G
@@ -54,6 +68,9 @@ def test():
     import time
     t0 = time.time()
     A = cascade_creation.generate_cascades(G, .1, 100)
+
+    sparserecovery(G, A)
+
     G_hat = greedy_prediction(G, A)
     fp, fn, gp = correctness_measure(G, G_hat)
     print "False Positive: {}".format(len(fp))
diff --git a/jpa_test/cascade_creation.py b/jpa_test/cascade_creation.py
index 80054a7..2b53963 100644
--- a/jpa_test/cascade_creation.py
+++ b/jpa_test/cascade_creation.py
@@ -55,6 +55,8 @@ class Cascade(list):
         for t, infected_set in izip(xrange(len(self)), self):
             if infected_set[node]:
                 infected_times.append(t)
+        if not infected_times:
+            infected_times.append(None)
         return infected_times
 
     def candidate_infectors(self, node):
@@ -68,6 +70,27 @@ class Cascade(list):
         return candidate_infectors
 
 
+def icc_matrixvector_for_node(cascades, node):
+    """
+    for the ICC model:
+    Returns M, w in matrix form where rows of M are i = t + k.T
+    Excludes all (t,k) after node infection time; w = 1_{infected}
+    """
+    w = []
+    M = []
+    for cascade in cascades:
+        t_i = cascade.infection_time(node)[0]
+        if t_i > 0 or t_i is None:
+            indicator = np.zeros(len(cascade))
+            if t_i > 0:
+                indicator[-1] = 1
+            w.append(indicator)
+            M.append(np.array(cascade[:t_i]))
+    M = np.vstack(M)
+    w = np.hstack(w)
+    return M, w
+
+
 def icc_cascade(G, p_init):
     """
     Returns boolean vectors for one cascade
@@ -78,7 +101,7 @@ def icc_cascade(G, p_init):
     active = np.random.rand(G.number_of_nodes()) < p_init
     susceptible = susceptible - active
     cascade = Cascade()
-    while active.any() and susceptible.any():
+    while active.any():
         cascade.append(active)
         active = np.exp(np.dot(G.logmat, active)) \
                     < np.random.rand(G.number_of_nodes())
@@ -102,7 +125,9 @@ def test():
     G.erdos_init(n = 100, p = 1)
     import time
     t0 = time.time()
-    print len(icc_cascade(G, p_init=.1))
+    A = generate_cascades(G, .1, 10)
+    M, w = icc_matrixvector_for_node(A, 0)
+    print w
     t1 = time.time()
     print t1 - t0
 
diff --git a/jpa_test/convex_optimization.py b/jpa_test/convex_optimization.py
new file mode 100644
index 0000000..ebd68e3
--- /dev/null
+++ b/jpa_test/convex_optimization.py
@@ -0,0 +1,47 @@
+import theano
+from theano import tensor, function
+import numpy as np
+import cvxopt
+
+
+
+def l1regls(M_val, w_val):
+    """
+    Solves:
+        min - sum theta
+        s.t theta <= 0
+            |e^{M*theta} - (1 - w)|_2 <= 1
+    """
+    m, n = M_val.shape
+    c = cvxopt.matrix(-1.0, (n,1))
+
+    theta = tensor.row().T
+    z = tensor.row().T
+    theta_ = theta.flatten()
+    z_ = z.flatten()
+    M = theano.shared(M_val.astype(theano.config.floatX))
+    w = theano.shared(w_val.astype(theano.config.floatX))
+    y = (tensor.exp(M.dot(theta_)) - (1 - w)).norm(2) - 1
+    y_diff = tensor.grad(y, theta_)
+    y_hess = z[0] * theano.gradient.hessian(y, theta_)
+    f_x = theano.function([theta], [y, y_diff], allow_input_downcast=True)
+    f_xz = theano.function([theta, z], [y, y_diff, y_hess])
+
+    def F(x=None, z=None):
+        if x is None:
+            return 1, cvxopt.matrix(1.0, (n,1))
+        elif z is None:
+            y, y_diff = f_x(x)
+            return cvxopt.matrix(float(y), (1, 1)), cvxopt.matrix(y_diff.astype("float64")).T
+        else:
+            y, y_diff, y_hess = f_xz(x, z)
+            return cvxopt.matrix(float(y), (1, 1)), \
+                    cvxopt.matrix(y_diff.astype("float64")).T, \
+                    cvxopt.matrix(y_hess.astype("float64"))
+
+    return cvxopt.solvers.cpl(c,F)['x']
+
+if __name__=="__main__":
+    M_val = np.random.rand(100, 20)
+    w_val = np.random.rand(100)
+    l1regls(M_val, w_val)
author	jeanpouget-abadie <jean.pougetabadie@gmail.com>	2014-11-30 23:17:41 -0500
committer	jeanpouget-abadie <jean.pougetabadie@gmail.com>	2014-11-30 23:17:41 -0500
commit	b49e39e300f9d87310f6cce20018427a98f34486 (patch)
tree	272262ef3f19a5170dfd74dadda37dd9c3b61fb8
parent	2a6010634417eac9bf2ac4682ac3675dc5074518 (diff)
download	cascades-b49e39e300f9d87310f6cce20018427a98f34486.tar.gz