diff options
| author | jeanpouget-abadie <jean.pougetabadie@gmail.com> | 2015-05-18 19:19:35 +0200 |
|---|---|---|
| committer | jeanpouget-abadie <jean.pougetabadie@gmail.com> | 2015-05-18 19:19:35 +0200 |
| commit | e41204334108b4ad40246f3d81435a40ff484837 (patch) | |
| tree | 5a391fd927fe64b41a145bbca6089d05b96b7be9 | |
| parent | 3d818b2861faf0ccdbcb2eeb6ac90a07f4e4374a (diff) | |
| download | cascades-e41204334108b4ad40246f3d81435a40ff484837.tar.gz | |
added plots to appendix
| -rw-r--r-- | paper/sections/appendix.tex | 86 | ||||
| -rw-r--r-- | src/convex_optimization.py | 73 | ||||
| -rw-r--r-- | src/make_plots.py | 77 |
3 files changed, 203 insertions, 33 deletions
diff --git a/paper/sections/appendix.tex b/paper/sections/appendix.tex index 64953a3..4c6aed7 100644 --- a/paper/sections/appendix.tex +++ b/paper/sections/appendix.tex @@ -70,7 +70,7 @@ the proof. $ \forall \Delta\in C(S),\; \Delta H\Delta \geq \frac{1}{2} \Delta \E[H]\Delta, - $ w.p. at least $1-e^{-n^{\delta}\log m}$ and the conclusion of + $ w.p.~at least $1-e^{-n^{\delta}\log m}$ and the conclusion of Proposition~\ref{prop:fi} follows. \end{proof} @@ -99,37 +99,71 @@ has a quantity of information $\Omega(s\log \frac{m}{s})$ about $S$. By the Shannon-Hartley theorem, this information is also upper-bounded by $\O(n\log C)$. These two bounds together imply the theorem. -\subsection{Other continuous time processes binned to ours: proportional -hazards model} -\subsection{Irrepresentability vs. Restricted Eigenvalue Condition} -In the words and notation of Theorem 9.1 in \cite{vandegeer:2009}: -\begin{lemma} -\label{lemm:irrepresentability_proof} -Let $\phi^2_{\text{compatible}}(L,S) \defeq \min \{ \frac{s -\|f_\beta\|^2_2}{\|\beta_S\|^2_1} \ : \ \beta \in {\cal R}(L, S) \}$, where -$\|f_\beta\|^2_2 \defeq \{ \beta^T \Sigma \beta \}$ and ${\cal R}(L,S) \defeq -\{\beta : \|\beta_{S^c}\|_1 \leq L \|\beta_S\|_1 \neq 0\}$. If -$\nu_{\text{irrepresentable}(S,s)} < 1/L$, then $\phi^2_{\text{compatible}}(L,S) -\geq (1 - L \nu_{\text{irrepresentable}(S,s)})^2 \lambda_{\min}^2$. -\end{lemma} +\subsection{Running Time Analysis} -Since ${\cal R}(3, S) = {\cal C}$, $\|\beta_S\|_1 \geq \|\beta_S\|_2$, and -$\|\beta_S\|_1 \geq \frac{1}{3} \|\beta_{S^c}\|_1$ it is easy to see that -$\|\beta_S\|_1 \geq \frac{1}{4} \|\beta\|_2$ and therefore that: $\gamma_n \geq -\frac{n}{4s}\phi^2_{\text{compatible}}(3,S)$ +\begin{figure} + \centering + \includegraphics[scale=.4]{figures/n_nodes_running_time.pdf} + \caption{Running time analysis for estimating the parents of a \emph{single + node} on a Barabasi-Albert graph as a function of the number of nodes in + the graph. The parameter $k$ (number of nodes each new node is attached to) + is constant equal to $30$. $p_{\text{init}}$ is chosen equal to $.15$, and + the edge weights are chosen uniformly at random in $[.2,.7]$. The + penalization parameter $\lambda$ is chosen equal to $.1$.} + \label{fig:running_time_n_nodes} +\end{figure} -Consequently, if $\epsilon > \frac{2}{3}$, then -$\nu_{\text{irrepresentable}(S,s)} < 1/3$ and the conditions of -Lemma~\ref{lemm:irrepresentability_proof} hold. +\begin{figure} + \centering + \includegraphics[scale=.4]{figures/n_cascades_running_time.pdf} + \caption{Running time analysis for estimating the parents of a \emph{single + node} on a Barabasi-Albert graph as a function of the number of total + observed cascades. The parameter $k$ (number of nodes each new node is + attached to) is constant equal to $30$. $p_{\text{init}}$ is chosen equal + to $.15$, and the edge weights are chosen uniformly at random in $[.2,.7]$. +The penalization parameter $\lambda$ is chosen equal to $.1$.} + \label{fig:running_time_n_cascades} +\end{figure} +We include here a running time analysis of the different algorithms, as shown +in Figure~\ref{fig:running_time_n_nodes} and +Figure~\ref{fig:running_time_n_cascades}. As expected, the simple greedy +algorithm is the fastest algorithm. Interestingly, the non-penalized convex +optimization program also converges faster than our suggested penalized +maximum-likelihood program. +%\subsection{Other continuous time processes binned to ours: proportional +%hazards model} -\subsection{Lower bound for restricted eigenvalues (expected hessian) for -different graphs} +%\subsection{Irrepresentability vs. Restricted Eigenvalue Condition} +%In the words and notation of Theorem 9.1 in \cite{vandegeer:2009}: +%\begin{lemma} +%\label{lemm:irrepresentability_proof} +%Let $\phi^2_{\text{compatible}}(L,S) \defeq \min \{ \frac{s +%\|f_\beta\|^2_2}{\|\beta_S\|^2_1} \ : \ \beta \in {\cal R}(L, S) \}$, where +%$\|f_\beta\|^2_2 \defeq \{ \beta^T \Sigma \beta \}$ and ${\cal R}(L,S) \defeq +%\{\beta : \|\beta_{S^c}\|_1 \leq L \|\beta_S\|_1 \neq 0\}$. If +%$\nu_{\text{irrepresentable}(S,s)} < 1/L$, then $\phi^2_{\text{compatible}}(L,S) +%\geq (1 - L \nu_{\text{irrepresentable}(S,s)})^2 \lambda_{\min}^2$. +%\end{lemma} -\subsection{Better asymptotic w.r.t expected hessian} +%Since ${\cal R}(3, S) = {\cal C}$, $\|\beta_S\|_1 \geq \|\beta_S\|_2$, and +%$\|\beta_S\|_1 \geq \frac{1}{3} \|\beta_{S^c}\|_1$ it is easy to see that +%$\|\beta_S\|_1 \geq \frac{1}{4} \|\beta\|_2$ and therefore that: $\gamma_n \geq +%\frac{n}{4s}\phi^2_{\text{compatible}}(3,S)$ -\subsection{Confidence intervals?} +%Consequently, if $\epsilon > \frac{2}{3}$, then +%$\nu_{\text{irrepresentable}(S,s)} < 1/3$ and the conditions of +%Lemma~\ref{lemm:irrepresentability_proof} hold. -\subsection{Active learning} + + +%\subsection{Lower bound for restricted eigenvalues (expected hessian) for +%different graphs} + +%\subsection{Better asymptotic w.r.t expected hessian} + +%\subsection{Confidence intervals?} + +%\subsection{Active learning} diff --git a/src/convex_optimization.py b/src/convex_optimization.py index 000143f..fbfeb29 100644 --- a/src/convex_optimization.py +++ b/src/convex_optimization.py @@ -4,7 +4,7 @@ from theano import tensor, function import numpy as np import timeout import cvxopt - +import scipy @timeout.timeout(20) def sparse_recovery(lbda, n_cascades): @@ -124,28 +124,89 @@ def diff_and_opt(M_val, w_val, f_x, f_xz): return 1 - np.exp(theta), theta +def restrictedEigenvalue(M_val, w_val, theta_val, gramMatrix=False): + """ + returns the smallest restricted eigenvalue of the matrix on the set S + gramMatrix (bool) : If true, use the gram matrix instead of the Hessian + theta_val : true vector of parameters + """ + m, n = M_val.shape + + if gramMatrix: + H = 1./m * np.dot(M_val.T,M_val) + else: + #compute the Hessian + #theta = tensor.row().T + #theta_ = theta.flatten() + + #M = tensor.matrix(name='M', dtype=theano.config.floatX) + #w = tensor.vector(name='w', dtype=theano.config.floatX) + + #y = - 1./n*( (w).dot(tensor.log(1-tensor.exp(M.dot(theta_)))) + + #(1-w).dot(tensor.dot(M, theta_))) + + #f = theano.function([theta, M, w], [theano.gradient.hessian(y, + #theta_)], allow_input_downcast=True) + + #print(theta_val) + #H = f(np.atleast_2d(theta_val).T, M_val.astype('float64'), w_val) + + #print(H) + + #evals_small, evecs_small = scipy.sparse.linalg.eigsh(H.astype('float64'), 1, + #which="LM") + #print(evals_small) + + theta = tensor.vector() + + M = tensor.matrix(name='M', dtype=theano.config.floatX) + w = tensor.vector(name='w', dtype=theano.config.floatX) + + y = - 1./n*((w).dot(tensor.log(1-tensor.exp(M.dot(theta)))) + + (1-w).dot(tensor.dot(M, theta))) + + f = theano.function([theta, M, w], [theano.gradient.hessian(y, + theta)], allow_input_downcast=True) + + H = f(theta_val, M_val.astype('float32'), w_val) + + print(H) + + evals_small, evecs_small = scipy.sparse.linalg.eigsh(H, 1, + which="LM") + print(evals_small) + + def test(): """ unit test """ lbda = .0001 - G = cascade_creation.InfluenceGraph(max_proba=.9) + G = cascade_creation.InfluenceGraph(max_proba=.9, min_proba=.2) G.erdos_init(n=20, p = .3) A = cascade_creation.generate_cascades(G, .1, 500) - M_val, w_val = cascade_creation.icc_matrixvector_for_node(A, 2) - print(len(M_val)) + M_val, w_val = cascade_creation.icc_matrixvector_for_node(A, 0) + print(M_val.shape) + print(w_val.shape) #Type lasso if 0: - f_x, f_xz = type_lasso(lbda) + f_x, f_xz = type_lasso(lbda, 500) p_vec, _ = diff_and_opt(M_val, w_val, f_x, f_xz) print(G.mat[2]) #Sparse recovery - if 1: + if 0: f_x, f_xz = sparse_recovery(lbda, 500) p_vec, _ = diff_and_opt(M_val, w_val, f_x, f_xz) print(G.mat[2]) + #Restricted Eigenvalue + if 2: + node = 1 + parents = np.nonzero(G.mat[:, node])[0] + theta_val = G.logmat[:,node] + restrictedEigenvalue(M_val, w_val, theta_val) + if __name__=="__main__": test() diff --git a/src/make_plots.py b/src/make_plots.py index 8970cdf..4cbef4a 100644 --- a/src/make_plots.py +++ b/src/make_plots.py @@ -3,9 +3,72 @@ import numpy as np import cascade_creation import convex_optimization import algorithms +import timeout import rip_condition +def theoreticalGuarantees(graph_name, n_cascades, min_proba, max_proba, + sparse_edges, p_init, passed_function, *args, **kwargs): + + #creating original graph + G = cascade_creation.InfluenceGraph(max_proba = max_proba, + min_proba = min_proba, + sparse_edges = sparse_edges + ) + G.import_from_file(graph_name) + + #creating dummy graph + G_hat = cascade_creation.InfluenceGraph(max_proba=None) + G_hat.add_nodes_from(G.nodes()) + + #generating cascades + A = cascade_creation.generate_cascades(G, p_init=p_init, + n_cascades = n_cascades) + + #Value of restrictedEigenvalueList + restrictedEigenvalueList = [] + + for node in G.nodes(): + print(node) + try: + M, _ = cascade_creation.icc_matrixvector_for_node(A, node) + M_val = np.delete(M, node, axis=1) + parents = np.nonzero(G.mat[:,node])[0] + restrictedEigenvalueList.append( + convex_optimization.restrictedEigenvalue(M, S=parents, + gramMatrix=True, node=node)) + print(restrictedEigenvalueList[-1]) + except timeout.TimeoutError: + print("TimeoutError, skipping to next node") + + print(restrictedEigenvalueList) + alpha = 1 + theoreticalGuarantee = sum( + np.sqrt(len(parents)*np.log(G.number_of_nodes())/len(M))/( + alpha*gamma) for gamma in restrictedEigenvalueList if gamma >= + 1000) + print(theoreticalGuarantee) + + +def cascades_vs_measurements(graph_name, n_cascades, min_proba, max_proba, + sparse_edges=False, p_init=0.05): + """ + Compares the number of measurements within a cascade for different types of + graphs + """ + G = cascade_creation.InfluenceGraph(max_proba = max_proba, + min_proba = min_proba, + sparse_edges = sparse_edges + ) + G.import_from_file(graph_name) + A = cascade_creation.generate_cascades(G, p_init=p_init, + n_cascades = n_cascades) + + L = [len(cascade) for cascade in A] + print(L) + print("mean: {}\nstandard deviation: {}".format(np.mean(L), np.std(L))) + + def compare_greedy_and_lagrange_cs284r(): """ Compares the performance of the greedy algorithm on the @@ -36,7 +99,8 @@ def compute_graph(graph_name, n_cascades, lbda, passed_function, min_proba, min_proba=min_proba, sparse_edges=sparse_edges) G.import_from_file(graph_name) - A = cascade_creation.generate_cascades(G, p_init=p_init, n_cascades=n_cascades) + A = cascade_creation.generate_cascades(G, p_init=p_init, + n_cascades=n_cascades) if passed_function==algorithms.greedy_prediction: G_hat = algorithms.greedy_prediction(G, A) @@ -232,6 +296,17 @@ def plot_p_init_watts_strogatz(): if __name__=="__main__": if 1: + theoreticalGuarantees(graph_name = + "../datasets/powerlaw_200_30_point3.txt", + n_cascades=100, min_proba=.2, max_proba=.7, + p_init=.05, sparse_edges=False, + passed_function=convex_optimization.sparse_recovery, + lbda=.001) + + if 0: + cascades_vs_measurements("../datasets/watts_strogatz_p_init.txt", + n_cascades=1000, min_proba=.2, max_proba=.7) + if 0: compute_graph("../datasets/watts_strogatz_300_30_point3.txt", n_cascades=300, lbda=.013382, min_proba=.2, max_proba=.7, passed_function= |
