3 files changed, 203 insertions, 33 deletions
diff --git a/paper/sections/appendix.tex b/paper/sections/appendix.tex
index 64953a3..4c6aed7 100644
--- a/paper/sections/appendix.tex
+++ b/paper/sections/appendix.tex
@@ -70,7 +70,7 @@ the proof.
     $
     \forall \Delta\in C(S),\;
     \Delta H\Delta \geq \frac{1}{2} \Delta \E[H]\Delta,
-    $ w.p. at least $1-e^{-n^{\delta}\log m}$ and the conclusion of
+    $ w.p.~at least $1-e^{-n^{\delta}\log m}$ and the conclusion of
     Proposition~\ref{prop:fi} follows.
 \end{proof}
 
@@ -99,37 +99,71 @@ has a quantity of information $\Omega(s\log \frac{m}{s})$ about $S$. By the
 Shannon-Hartley theorem, this information is also upper-bounded by $\O(n\log
 C)$. These two bounds together imply the theorem.
 
-\subsection{Other continuous time processes binned to ours: proportional
-hazards model}
 
-\subsection{Irrepresentability vs. Restricted Eigenvalue Condition}
-In the words and notation of Theorem 9.1 in \cite{vandegeer:2009}:
-\begin{lemma}
-\label{lemm:irrepresentability_proof}
-Let $\phi^2_{\text{compatible}}(L,S) \defeq \min \{ \frac{s
-\|f_\beta\|^2_2}{\|\beta_S\|^2_1} \ : \ \beta \in {\cal R}(L, S) \}$, where
-$\|f_\beta\|^2_2 \defeq \{ \beta^T \Sigma \beta \}$ and ${\cal R}(L,S) \defeq
-\{\beta : \|\beta_{S^c}\|_1 \leq L \|\beta_S\|_1 \neq 0\}$. If
-$\nu_{\text{irrepresentable}(S,s)} < 1/L$, then $\phi^2_{\text{compatible}}(L,S)
-\geq (1 - L \nu_{\text{irrepresentable}(S,s)})^2 \lambda_{\min}^2$.
-\end{lemma}
+\subsection{Running Time Analysis}
 
-Since ${\cal R}(3, S) = {\cal C}$, $\|\beta_S\|_1 \geq \|\beta_S\|_2$, and
-$\|\beta_S\|_1 \geq \frac{1}{3} \|\beta_{S^c}\|_1$ it is easy to see that
-$\|\beta_S\|_1 \geq \frac{1}{4} \|\beta\|_2$ and therefore that: $\gamma_n \geq
-\frac{n}{4s}\phi^2_{\text{compatible}}(3,S)$
+\begin{figure}
+  \centering
+  \includegraphics[scale=.4]{figures/n_nodes_running_time.pdf}
+  \caption{Running time analysis for estimating the parents of a \emph{single
+    node} on a Barabasi-Albert graph as a function of the number of nodes in
+    the graph. The parameter $k$ (number of nodes each new node is attached to)
+    is constant equal to $30$.  $p_{\text{init}}$ is chosen equal to $.15$, and
+    the edge weights are chosen uniformly at random in $[.2,.7]$. The
+  penalization parameter $\lambda$ is chosen equal to $.1$.}
+    \label{fig:running_time_n_nodes}
+\end{figure}
 
-Consequently, if $\epsilon > \frac{2}{3}$, then
-$\nu_{\text{irrepresentable}(S,s)} < 1/3$ and the conditions of
-Lemma~\ref{lemm:irrepresentability_proof} hold.
+\begin{figure}
+  \centering
+  \includegraphics[scale=.4]{figures/n_cascades_running_time.pdf}
+  \caption{Running time analysis for estimating the parents of a \emph{single
+    node} on a Barabasi-Albert graph as a function of the number of total
+    observed cascades.  The parameter $k$ (number of nodes each new node is
+    attached to) is constant equal to $30$.  $p_{\text{init}}$ is chosen equal
+  to $.15$, and the edge weights are chosen uniformly at random in $[.2,.7]$.
+The penalization parameter $\lambda$ is chosen equal to $.1$.}
+    \label{fig:running_time_n_cascades}
+\end{figure}
 
+We include here a running time analysis of the different algorithms, as shown
+in Figure~\ref{fig:running_time_n_nodes} and
+Figure~\ref{fig:running_time_n_cascades}. As expected, the simple greedy
+algorithm is the fastest algorithm. Interestingly, the non-penalized convex
+optimization program also converges faster than our suggested penalized
+maximum-likelihood program.
 
+%\subsection{Other continuous time processes binned to ours: proportional
+%hazards model}
 
-\subsection{Lower bound for restricted eigenvalues (expected hessian) for
-different graphs}
+%\subsection{Irrepresentability vs. Restricted Eigenvalue Condition}
+%In the words and notation of Theorem 9.1 in \cite{vandegeer:2009}:
+%\begin{lemma}
+%\label{lemm:irrepresentability_proof}
+%Let $\phi^2_{\text{compatible}}(L,S) \defeq \min \{ \frac{s
+%\|f_\beta\|^2_2}{\|\beta_S\|^2_1} \ : \ \beta \in {\cal R}(L, S) \}$, where
+%$\|f_\beta\|^2_2 \defeq \{ \beta^T \Sigma \beta \}$ and ${\cal R}(L,S) \defeq
+%\{\beta : \|\beta_{S^c}\|_1 \leq L \|\beta_S\|_1 \neq 0\}$. If
+%$\nu_{\text{irrepresentable}(S,s)} < 1/L$, then $\phi^2_{\text{compatible}}(L,S)
+%\geq (1 - L \nu_{\text{irrepresentable}(S,s)})^2 \lambda_{\min}^2$.
+%\end{lemma}
 
-\subsection{Better asymptotic w.r.t expected hessian}
+%Since ${\cal R}(3, S) = {\cal C}$, $\|\beta_S\|_1 \geq \|\beta_S\|_2$, and
+%$\|\beta_S\|_1 \geq \frac{1}{3} \|\beta_{S^c}\|_1$ it is easy to see that
+%$\|\beta_S\|_1 \geq \frac{1}{4} \|\beta\|_2$ and therefore that: $\gamma_n \geq
+%\frac{n}{4s}\phi^2_{\text{compatible}}(3,S)$
 
-\subsection{Confidence intervals?}
+%Consequently, if $\epsilon > \frac{2}{3}$, then
+%$\nu_{\text{irrepresentable}(S,s)} < 1/3$ and the conditions of
+%Lemma~\ref{lemm:irrepresentability_proof} hold.
 
-\subsection{Active learning}
+
+
+%\subsection{Lower bound for restricted eigenvalues (expected hessian) for
+%different graphs}
+
+%\subsection{Better asymptotic w.r.t expected hessian}
+
+%\subsection{Confidence intervals?}
+
+%\subsection{Active learning}
diff --git a/src/convex_optimization.py b/src/convex_optimization.py
index 000143f..fbfeb29 100644
--- a/src/convex_optimization.py
+++ b/src/convex_optimization.py
@@ -4,7 +4,7 @@ from theano import tensor, function
 import numpy as np
 import timeout
 import cvxopt
-
+import scipy
 
 @timeout.timeout(20)
 def sparse_recovery(lbda, n_cascades):
@@ -124,28 +124,89 @@ def diff_and_opt(M_val, w_val, f_x, f_xz):
     return 1 - np.exp(theta), theta
 
 
+def restrictedEigenvalue(M_val, w_val, theta_val, gramMatrix=False):
+    """
+    returns the smallest restricted eigenvalue of the matrix on the set S
+    gramMatrix (bool) : If true, use the gram matrix instead of the Hessian
+    theta_val : true vector of parameters
+    """
+    m, n = M_val.shape
+
+    if gramMatrix:
+        H = 1./m * np.dot(M_val.T,M_val)
+    else:
+        #compute the Hessian
+        #theta = tensor.row().T
+        #theta_ = theta.flatten()
+
+        #M = tensor.matrix(name='M', dtype=theano.config.floatX)
+        #w = tensor.vector(name='w', dtype=theano.config.floatX)
+
+        #y =   - 1./n*( (w).dot(tensor.log(1-tensor.exp(M.dot(theta_)))) +
+                #(1-w).dot(tensor.dot(M, theta_)))
+
+        #f = theano.function([theta, M, w], [theano.gradient.hessian(y,
+            #theta_)], allow_input_downcast=True)
+
+        #print(theta_val)
+        #H = f(np.atleast_2d(theta_val).T, M_val.astype('float64'), w_val)
+
+    #print(H)
+
+    #evals_small, evecs_small = scipy.sparse.linalg.eigsh(H.astype('float64'), 1,
+            #which="LM")
+    #print(evals_small)
+
+        theta = tensor.vector()
+
+        M = tensor.matrix(name='M', dtype=theano.config.floatX)
+        w = tensor.vector(name='w', dtype=theano.config.floatX)
+
+        y =   - 1./n*((w).dot(tensor.log(1-tensor.exp(M.dot(theta)))) +
+                (1-w).dot(tensor.dot(M, theta)))
+
+        f = theano.function([theta, M, w], [theano.gradient.hessian(y,
+            theta)], allow_input_downcast=True)
+
+        H = f(theta_val, M_val.astype('float32'), w_val)
+
+        print(H)
+
+    evals_small, evecs_small = scipy.sparse.linalg.eigsh(H, 1,
+            which="LM")
+    print(evals_small)
+
+
 def test():
     """
     unit test
     """
     lbda = .0001
-    G = cascade_creation.InfluenceGraph(max_proba=.9)
+    G = cascade_creation.InfluenceGraph(max_proba=.9, min_proba=.2)
     G.erdos_init(n=20, p = .3)
     A = cascade_creation.generate_cascades(G, .1, 500)
-    M_val, w_val = cascade_creation.icc_matrixvector_for_node(A, 2)
-    print(len(M_val))
+    M_val, w_val = cascade_creation.icc_matrixvector_for_node(A, 0)
+    print(M_val.shape)
+    print(w_val.shape)
 
     #Type lasso
     if 0:
-        f_x, f_xz = type_lasso(lbda)
+        f_x, f_xz = type_lasso(lbda, 500)
         p_vec, _ = diff_and_opt(M_val, w_val, f_x, f_xz)
         print(G.mat[2])
 
     #Sparse recovery
-    if 1:
+    if 0:
         f_x, f_xz = sparse_recovery(lbda, 500)
         p_vec, _ = diff_and_opt(M_val, w_val, f_x, f_xz)
         print(G.mat[2])
 
+    #Restricted Eigenvalue
+    if 2:
+        node = 1
+        parents = np.nonzero(G.mat[:, node])[0]
+        theta_val = G.logmat[:,node]
+        restrictedEigenvalue(M_val, w_val, theta_val)
+
 if __name__=="__main__":
     test()
diff --git a/src/make_plots.py b/src/make_plots.py
index 8970cdf..4cbef4a 100644
--- a/src/make_plots.py
+++ b/src/make_plots.py
@@ -3,9 +3,72 @@ import numpy as np
 import cascade_creation
 import convex_optimization
 import algorithms
+import timeout
 import rip_condition
 
 
+def theoreticalGuarantees(graph_name, n_cascades, min_proba, max_proba,
+        sparse_edges, p_init, passed_function, *args, **kwargs):
+
+    #creating original graph
+    G = cascade_creation.InfluenceGraph(max_proba = max_proba,
+                                        min_proba = min_proba,
+                                        sparse_edges = sparse_edges
+                                        )
+    G.import_from_file(graph_name)
+
+    #creating dummy graph
+    G_hat = cascade_creation.InfluenceGraph(max_proba=None)
+    G_hat.add_nodes_from(G.nodes())
+
+    #generating cascades
+    A = cascade_creation.generate_cascades(G, p_init=p_init,
+                                            n_cascades = n_cascades)
+
+    #Value of restrictedEigenvalueList
+    restrictedEigenvalueList = []
+
+    for node in G.nodes():
+        print(node)
+        try:
+            M, _ = cascade_creation.icc_matrixvector_for_node(A, node)
+            M_val = np.delete(M, node, axis=1)
+            parents = np.nonzero(G.mat[:,node])[0]
+            restrictedEigenvalueList.append(
+                    convex_optimization.restrictedEigenvalue(M, S=parents,
+                        gramMatrix=True, node=node))
+            print(restrictedEigenvalueList[-1])
+        except timeout.TimeoutError:
+            print("TimeoutError, skipping to next node")
+
+    print(restrictedEigenvalueList)
+    alpha = 1
+    theoreticalGuarantee = sum(
+            np.sqrt(len(parents)*np.log(G.number_of_nodes())/len(M))/(
+                alpha*gamma) for gamma in restrictedEigenvalueList if gamma >=
+            1000)
+    print(theoreticalGuarantee)
+
+
+def cascades_vs_measurements(graph_name, n_cascades, min_proba, max_proba,
+        sparse_edges=False, p_init=0.05):
+    """
+    Compares the number of measurements within a cascade for different types of
+    graphs
+    """
+    G = cascade_creation.InfluenceGraph(max_proba = max_proba,
+                                        min_proba = min_proba,
+                                        sparse_edges = sparse_edges
+                                        )
+    G.import_from_file(graph_name)
+    A = cascade_creation.generate_cascades(G, p_init=p_init,
+                                            n_cascades = n_cascades)
+
+    L = [len(cascade) for cascade in A]
+    print(L)
+    print("mean: {}\nstandard deviation: {}".format(np.mean(L), np.std(L)))
+
+
 def compare_greedy_and_lagrange_cs284r():
     """
     Compares the performance of the greedy algorithm on the
@@ -36,7 +99,8 @@ def compute_graph(graph_name, n_cascades, lbda, passed_function, min_proba,
                                         min_proba=min_proba,
                                         sparse_edges=sparse_edges)
     G.import_from_file(graph_name)
-    A = cascade_creation.generate_cascades(G, p_init=p_init, n_cascades=n_cascades)
+    A = cascade_creation.generate_cascades(G, p_init=p_init,
+            n_cascades=n_cascades)
 
     if passed_function==algorithms.greedy_prediction:
         G_hat = algorithms.greedy_prediction(G, A)
@@ -232,6 +296,17 @@ def plot_p_init_watts_strogatz():
 
 if __name__=="__main__":
     if 1:
+        theoreticalGuarantees(graph_name =
+                "../datasets/powerlaw_200_30_point3.txt",
+                            n_cascades=100, min_proba=.2, max_proba=.7,
+                            p_init=.05, sparse_edges=False,
+                            passed_function=convex_optimization.sparse_recovery,
+                            lbda=.001)
+
+    if 0:
+        cascades_vs_measurements("../datasets/watts_strogatz_p_init.txt",
+                n_cascades=1000, min_proba=.2, max_proba=.7)
+    if 0:
         compute_graph("../datasets/watts_strogatz_300_30_point3.txt",
                     n_cascades=300, lbda=.013382, min_proba=.2, max_proba=.7,
                     passed_function=