diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2015-05-19 00:00:17 +0200 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2015-05-19 00:00:17 +0200 |
| commit | 282349d87a75e930bb4d2ac7bc389106d0519f0b (patch) | |
| tree | f82134035c45cd7e8a012316478a073d82b10ab1 /paper | |
| parent | 39a72f9f8127970470f46529ff8f1cc1a22450d9 (diff) | |
| download | cascades-282349d87a75e930bb4d2ac7bc389106d0519f0b.tar.gz | |
Beginning of the compression
Diffstat (limited to 'paper')
| -rw-r--r-- | paper/sections/appendix.tex | 23 | ||||
| -rw-r--r-- | paper/sections/model.tex | 8 | ||||
| -rw-r--r-- | paper/sections/results.tex | 48 | ||||
| -rw-r--r-- | paper/sparse.bib | 66 |
4 files changed, 37 insertions, 108 deletions
diff --git a/paper/sections/appendix.tex b/paper/sections/appendix.tex index 32e2775..22b87c2 100644 --- a/paper/sections/appendix.tex +++ b/paper/sections/appendix.tex @@ -5,11 +5,13 @@ the paper. \subsection{Proofs of Section~\ref{sec:results}} -\paragraph{Bounded gradient} The proof of Lemma~\ref{lem:ub} giving an upper -bound on the $\ell_{\infty}$ norm of the gradient of the likelihood function is -given below. +\begin{proof}[Proof of Lemma~\ref{lem:transform}] +Using the inequality $\forall x>0, \; \log x \geq 1 - \frac{1}{x}$, we have +$|\log (\frac{1}{1 - p}) - \log (\frac{1}{1-p'})| \geq \max(1 - \frac{1-p}{1-p'}, +1 - \frac{1-p'}{1-p}) \geq \max( p-p', p'-p)$. +\end{proof} -\begin{proof} +\begin{proof}[Proof of Lemma~\ref{lem:ub}] The gradient of $\mathcal{L}$ is given by: \begin{multline*} \nabla \mathcal{L}(\theta^*) = @@ -42,6 +44,16 @@ Choosing $\lambda\defeq 2\sqrt{\frac{\log m}{\alpha n^{1-\delta}}}$ concludes the proof. \end{proof} +\begin{proof}[Proof of Corollary~\ref{cor:variable_selection}] +By choosing $\delta = 0$, if $ n > \frac{9s\log m}{\alpha\gamma^2\epsilon^2}$, +then $\|\hat \theta-\theta^*\|_2 < \epsilon < \eta$ with probability +$1-\frac{1}{m}$. If $\theta_i^* = 0$ and $\hat \theta > \eta$, then $\|\hat +\theta - \theta^*\|_2 \geq |\hat \theta_i-\theta_i^*| > \eta$, which is a +contradiction. Therefore we get no false positives. If $\theta_i^* > \eta + +\epsilon$, then $|\hat{\theta}_i- \theta_i^*| < \epsilon \implies \theta_j > +\eta$ and we get all strong parents. +\end{proof} + %\subsubsection{Approximate sparsity proof} \paragraph{(RE) with high probability} We now prove Proposition~\ref{prop:fi}. The proof mostly relies on showing that the Hessian of likelihood function @@ -113,6 +125,7 @@ C)$. These two bounds together imply the theorem. \begin{figure} \centering \includegraphics[scale=.4]{figures/n_nodes_running_time.pdf} + \vspace{-1em} \caption{Running time analysis for estimating the parents of a \emph{single node} on a Barabasi-Albert graph as a function of the number of nodes in the graph. The parameter $k$ (number of nodes each new node is attached to) @@ -120,6 +133,7 @@ C)$. These two bounds together imply the theorem. the edge weights are chosen uniformly at random in $[.2,.7]$. The penalization parameter $\lambda$ is chosen equal to $.1$.} \label{fig:running_time_n_nodes} + \vspace{-1em} \end{figure} \begin{figure} @@ -130,6 +144,7 @@ C)$. These two bounds together imply the theorem. observed cascades. The parameters defining the graph were set as in Figure~\ref{fig:running_time_n_nodes}.} \label{fig:running_time_n_cascades} + \vspace{-1em} \end{figure} We include here a running time analysis of our algorithm. In diff --git a/paper/sections/model.tex b/paper/sections/model.tex index fd25c27..532ee5e 100644 --- a/paper/sections/model.tex +++ b/paper/sections/model.tex @@ -140,13 +140,9 @@ the recovery error on $\Theta_j$ is an upper bound on the error on the original $p_j$ parameters. \begin{lemma} + \label{lem:transform} $\|\hat{\theta} - \theta^* \|_2 \geq \|\hat{p} - p^*\|_2$. \end{lemma} -\begin{proof} -Using the inequality $\forall x>0, \; \log x \geq 1 - \frac{1}{x}$, we have -$|\log (\frac{1}{1 - p}) - \log (\frac{1}{1-p'})| \geq \max(1 - \frac{1-p}{1-p'}, -1 - \frac{1-p'}{1-p}) \geq \max( p-p', p'-p)$. -\end{proof} \subsubsection{The Linear Voter Model} @@ -334,7 +330,7 @@ $\mathcal{L}_i$ is equal to $-\infty$ when the parameters are outside of the domain of definition of the models, these contraints do not need to appear explicitly in the optimization program. -In the specific case of the voter model the constraint $\sum_j \Theta_{i,j} +In the specific case of the voter model, the constraint $\sum_j \Theta_{i,j} = 1$ will not necessarily be verified by the estimator obtained in \eqref{eq:pre-mle}. In some applications, the experimenter might not need this constraint to be verified, in which case the results in diff --git a/paper/sections/results.tex b/paper/sections/results.tex index 2292ce3..cab27a7 100644 --- a/paper/sections/results.tex +++ b/paper/sections/results.tex @@ -139,16 +139,6 @@ S}^*_{\eta + \epsilon} \subseteq \hat {\cal S}_\eta \subseteq {\cal S}^*$. In other words we recover all `strong' parents and no `false' parents. \end{corollary} -\begin{proof} -By choosing $\delta = 0$, if $ n > \frac{9s\log m}{\alpha\gamma^2\epsilon^2}$, -then $\|\hat \theta-\theta^*\|_2 < \epsilon < \eta$ with probability -$1-\frac{1}{m}$. If $\theta_i^* = 0$ and $\hat \theta > \eta$, then $\|\hat -\theta - \theta^*\|_2 \geq |\hat \theta_i-\theta_i^*| > \eta$, which is a -contradiction. Therefore we get no false positives. If $\theta_i^* > \eta + -\epsilon$, then $|\hat{\theta}_i- \theta_i^*| < \epsilon \implies \theta_j > -\eta$ and we get all strong parents. -\end{proof} - Assuming we know a lower bound $\alpha$ on $\Theta_{i,j}$, Corollary~\ref{cor:variable_selection} can be applied to the Network Inference problem in the following manner: pick $\epsilon = \frac{\eta}{2}$ and $\eta @@ -199,9 +189,11 @@ solving \eqref{eq:pre-mle} for $\lambda \defeq 2\sqrt{\frac{\log m}{\alpha n^{1 \end{align*} \end{theorem} -As before, edge recovery is a consequence of upper-bounding $\|\theta^* - \hat -\theta\|_2$ +As in Corollary~\ref{cor:variable_selection}, an edge recovery guarantee can be +derived from Theorem~\ref{thm:approx_sparse} in the case of approximate +sparsity. +\begin{comment} \begin{corollary} Under the same assumptions as Theorem~\ref{thm:approx_sparse}, if the number of measurements verifies: \begin{equation} @@ -212,22 +204,18 @@ As before, edge recovery is a consequence of upper-bounding $\|\theta^* - \hat then: ${\cal S}^*_{\eta + \epsilon} \subset \hat {\cal S}_\eta \subset {\cal S}^*$ w.p. at least $1-\frac{1}{m}$. \end{corollary} +\end{comment} \subsection{Restricted Eigenvalue Condition} \label{sec:re} -In this section, we discuss the main assumption of Theorem~\ref{thm:main} namely -the restricted eigenvalue condition introduced by~\cite{bickel2009simultaneous}. - -\paragraph{Interpretation} - There exists a large class of sufficient conditions under which sparse recovery is achievable in the context of regularized estimation~\cite{vandegeer:2009}. -The restricted eigenvalue condition is one of the weakest such assumption. It -can be interpreted as a restricted form of non-degeneracy. Since we apply it to -the Hessian of the log-likelihood function $\nabla^2 \mathcal{L}(\theta)$, it -essentially reduces to a form of restricted strong convexity, that -Lemma~\ref{lem:negahban} ultimately relies on. +The restricted eigenvalue condition, introduced in \cite{bickel:2009}, is one +of the weakest such assumption. It can be interpreted as a restricted form of +non-degeneracy. Since we apply it to the Hessian of the log-likelihood function +$\nabla^2 \mathcal{L}(\theta)$, it essentially reduces to a form of restricted +strong convexity, that Lemma~\ref{lem:negahban} ultimately relies on. Observe that the Hessian of $\mathcal{L}$ can be seen as a re-weighted \emph{Gram matrix} of the observations: @@ -238,16 +226,12 @@ Observe that the Hessian of $\mathcal{L}$ can be seen as a re-weighted -(1-x_i^{t+1})\frac{f''(1-f) + f'^2}{(1-f)^2}(\inprod{\theta^*}{x^t})\bigg] \end{multline*} If $f$ and $1-f$ are $c$-strictly log-convex~\cite{bagnoli2005log} for $c>0$, -then -\begin{displaymath} - \min\left(\frac{f''f-f'^2}{f^2}(x), \frac{f''(1-f) + f'^2}{(1-f)^2}(x) \right) - \geq c -\end{displaymath} -and the $(S, \gamma)$-({\bf RE}) condition on the Hessian in -Theorem~\ref{thm:main} and Theorem~\ref{thm:approx_sparse} reduces to a -condition on the \emph{Gram matrix} of the observations $X^T X = -\frac{1}{|\mathcal{T}|}\sum_{t\in\mathcal{T}}x^t(x^t)^T$ for $\gamma' \defeq -\gamma\cdot c$. +then $ \min\left((\log f)'', (\log (1-f))'' \right) \geq c $. This implies that +the $(S, \gamma)$-({\bf RE}) condition in Theorem~\ref{thm:main} and +Theorem~\ref{thm:approx_sparse} reduces to a condition on the \emph{Gram +matrix} of the observations $X^T +X = \frac{1}{|\mathcal{T}|}\sum_{t\in\mathcal{T}}x^t(x^t)^T$ for $\gamma' +\defeq \gamma\cdot c$. \paragraph{(RE) with high probability} diff --git a/paper/sparse.bib b/paper/sparse.bib index 00001f0..618a3a1 100644 --- a/paper/sparse.bib +++ b/paper/sparse.bib @@ -89,7 +89,6 @@ year = {2006}, issn = {1532-4435}, pages = {2541--2563}, numpages = {23}, - url = {http://dl.acm.org/citation.cfm?id=1248547.1248637}, acmid = {1248637}, publisher = {JMLR.org}, } @@ -105,9 +104,7 @@ year = {2006}, {ICML} 2014, Beijing, China, 21-26 June 2014}, pages = {793--801}, year = {2014}, - url = {http://jmlr.org/proceedings/papers/v32/daneshmand14.html}, timestamp = {Fri, 07 Nov 2014 20:42:30 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/conf/icml/DaneshmandGSS14}, bibsource = {dblp computer science bibliography, http://dblp.org} } @@ -121,10 +118,7 @@ year = {2006}, - 27, 2003}, pages = {137--146}, year = {2003}, - url = {http://doi.acm.org/10.1145/956750.956769}, - doi = {10.1145/956750.956769}, timestamp = {Mon, 13 Feb 2006 15:34:20 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/conf/kdd/KempeKT03}, bibsource = {dblp computer science bibliography, http://dblp.org} } @@ -138,37 +132,30 @@ year = {2006}, and Data Mining, {KDD} 2013, Chicago, IL, USA, August 11-14, 2013}, pages = {491--499}, year = {2013}, - url = {http://doi.acm.org/10.1145/2487575.2487664}, - doi = {10.1145/2487575.2487664}, timestamp = {Tue, 10 Sep 2013 10:11:57 +0200}, - biburl = {http://dblp.uni-trier.de/rec/bib/conf/kdd/AbrahaoCKP13}, bibsource = {dblp computer science bibliography, http://dblp.org} } @article{vandegeer:2009, author = "van de Geer, Sara A. and B{\"u}hlmann, Peter", -doi = "10.1214/09-EJS506", fjournal = "Electronic Journal of Statistics", journal = "Electron. J. Statist.", pages = "1360--1392", publisher = "The Institute of Mathematical Statistics and the Bernoulli Society", title = "On the conditions used to prove oracle results for the Lasso", -url = "http://dx.doi.org/10.1214/09-EJS506", volume = "3", year = "2009" } @article{vandegeer:2011, author = "van de Geer, Sara and Bühlmann, Peter and Zhou, Shuheng", -doi = "10.1214/11-EJS624", fjournal = "Electronic Journal of Statistics", journal = "Electron. J. Statist.", pages = "688--749", publisher = "The Institute of Mathematical Statistics and the Bernoulli Society", title = "The adaptive and the thresholded Lasso for potentially misspecified models (and a lower bound for the Lasso)", -url = "http://dx.doi.org/10.1214/11-EJS624", volume = "5", year = "2011" } @@ -181,8 +168,6 @@ volume = {101}, number = {476}, pages = {1418-1429}, year = {2006}, -doi = {10.1198/016214506000000735}, -URL = {http://dx.doi.org/10.1198/016214506000000735}, } @article{Jacques:2013, @@ -197,10 +182,7 @@ URL = {http://dx.doi.org/10.1198/016214506000000735}, number = {4}, pages = {2082--2102}, year = {2013}, - url = {http://dx.doi.org/10.1109/TIT.2012.2234823}, - doi = {10.1109/TIT.2012.2234823}, timestamp = {Tue, 09 Apr 2013 19:57:48 +0200}, - biburl = {http://dblp.uni-trier.de/rec/bib/journals/tit/JacquesLBB13}, bibsource = {dblp computer science bibliography, http://dblp.org} } @@ -212,10 +194,7 @@ URL = {http://dx.doi.org/10.1198/016214506000000735}, 2008, Princeton, NJ, USA, 19-21 March 2008}, pages = {16--21}, year = {2008}, - url = {http://dx.doi.org/10.1109/CISS.2008.4558487}, - doi = {10.1109/CISS.2008.4558487}, timestamp = {Wed, 15 Oct 2014 17:04:27 +0200}, - biburl = {http://dblp.uni-trier.de/rec/bib/conf/ciss/BoufounosB08}, bibsource = {dblp computer science bibliography, http://dblp.org} } @@ -229,10 +208,7 @@ URL = {http://dx.doi.org/10.1198/016214506000000735}, June 13-18, 2010, Austin, Texas, USA, Proceedings}, pages = {1553--1557}, year = {2010}, - url = {http://dx.doi.org/10.1109/ISIT.2010.5513510}, - doi = {10.1109/ISIT.2010.5513510}, timestamp = {Thu, 15 Jan 2015 17:11:50 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/conf/isit/GuptaNR10}, bibsource = {dblp computer science bibliography, http://dblp.org} } @@ -245,16 +221,12 @@ URL = {http://dx.doi.org/10.1198/016214506000000735}, number = {2}, pages = {438--461}, year = {2014}, - url = {http://dx.doi.org/10.1007/s00454-013-9561-6}, - doi = {10.1007/s00454-013-9561-6}, timestamp = {Tue, 11 Feb 2014 13:48:56 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/journals/dcg/PlanV14}, bibsource = {dblp computer science bibliography, http://dblp.org} } @article{bickel:2009, author = "Bickel, Peter J. and Ritov, Ya’acov and Tsybakov, Alexandre B.", -doi = "10.1214/08-AOS620", fjournal = "The Annals of Statistics", journal = "Ann. Statist.", month = "08", @@ -262,7 +234,6 @@ number = "4", pages = "1705--1732", publisher = "The Institute of Mathematical Statistics", title = "Simultaneous analysis of Lasso and Dantzig selector", -url = "http://dx.doi.org/10.1214/08-AOS620", volume = "37", year = "2009" } @@ -276,9 +247,7 @@ year = "2009" volume = {11}, pages = {2241--2259}, year = {2010}, - url = {http://portal.acm.org/citation.cfm?id=1859929}, timestamp = {Wed, 15 Oct 2014 17:04:32 +0200}, - biburl = {http://dblp.uni-trier.de/rec/bib/journals/jmlr/RaskuttiWY10}, bibsource = {dblp computer science bibliography, http://dblp.org} } @@ -291,10 +260,7 @@ year = "2009" number = {6}, pages = {3434--3447}, year = {2013}, - url = {http://dx.doi.org/10.1109/TIT.2013.2243201}, - doi = {10.1109/TIT.2013.2243201}, timestamp = {Tue, 21 May 2013 14:15:50 +0200}, - biburl = {http://dblp.uni-trier.de/rec/bib/journals/tit/RudelsonZ13}, bibsource = {dblp computer science bibliography, http://dblp.org} } @@ -307,9 +273,7 @@ year = "2009" journal = {CoRR}, volume = {abs/1106.0365}, year = {2011}, - url = {http://arxiv.org/abs/1106.0365}, timestamp = {Mon, 05 Dec 2011 18:04:39 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/abs-1106-0365}, bibsource = {dblp computer science bibliography, http://dblp.org} } @@ -322,10 +286,7 @@ year = "2009" pages = {295--304}, year = {2011}, crossref = {DBLP:conf/focs/2011}, - url = {http://dx.doi.org/10.1109/FOCS.2011.92}, - doi = {10.1109/FOCS.2011.92}, timestamp = {Tue, 16 Dec 2014 09:57:24 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/conf/focs/PriceW11}, bibsource = {dblp computer science bibliography, http://dblp.org} } @@ -335,10 +296,8 @@ year = "2009" 2011, Palm Springs, CA, USA, October 22-25, 2011}, publisher = {{IEEE} Computer Society}, year = {2011}, - url = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6108120}, isbn = {978-1-4577-1843-4}, timestamp = {Mon, 15 Dec 2014 18:48:45 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/conf/focs/2011}, bibsource = {dblp computer science bibliography, http://dblp.org} } @@ -352,10 +311,7 @@ year = "2009" pages = {2446--2450}, year = {2012}, crossref = {DBLP:conf/isit/2012}, - url = {http://dx.doi.org/10.1109/ISIT.2012.6283954}, - doi = {10.1109/ISIT.2012.6283954}, timestamp = {Mon, 01 Oct 2012 17:34:07 +0200}, - biburl = {http://dblp.uni-trier.de/rec/bib/conf/isit/PriceW12}, bibsource = {dblp computer science bibliography, http://dblp.org} } @@ -364,10 +320,8 @@ year = "2009" Theory, {ISIT} 2012, Cambridge, MA, USA, July 1-6, 2012}, publisher = {{IEEE}}, year = {2012}, - url = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6268627}, isbn = {978-1-4673-2580-6}, timestamp = {Mon, 01 Oct 2012 17:33:45 +0200}, - biburl = {http://dblp.uni-trier.de/rec/bib/conf/isit/2012}, bibsource = {dblp computer science bibliography, http://dblp.org} } @@ -382,10 +336,7 @@ year = "2009" volume = {11}, pages = {985--1042}, year = {2010}, - url = {http://doi.acm.org/10.1145/1756006.1756039}, - doi = {10.1145/1756006.1756039}, timestamp = {Thu, 22 Apr 2010 13:26:26 +0200}, - biburl = {http://dblp.uni-trier.de/rec/bib/journals/jmlr/LeskovecCKFG10}, bibsource = {dblp computer science bibliography, http://dblp.org} } @@ -410,7 +361,6 @@ year = "2009" Pages = {440--442}, Read = {0}, Title = {Collective dynamics of `small-world' networks}, - Url = {http://dx.doi.org/10.1038/30918}, Volume = {393}, Year = {1998}, } @@ -422,9 +372,7 @@ year = "2009" journal = {CoRR}, volume = {cond-mat/0106096}, year = {2001}, - url = {http://arxiv.org/abs/cond-mat/0106096}, timestamp = {Mon, 05 Dec 2011 18:05:15 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/cond-mat-0106096}, bibsource = {dblp computer science bibliography, http://dblp.org} } @@ -437,16 +385,12 @@ year = "2009" journal = {CoRR}, volume = {abs/1105.0697}, year = {2011}, - url = {http://arxiv.org/abs/1105.0697}, timestamp = {Mon, 05 Dec 2011 18:05:23 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/abs-1105-0697}, bibsource = {dblp computer science bibliography, http://dblp.org} } @article{Nowell08, author = {Liben-Nowell, David and Kleinberg, Jon}, - biburl = {http://www.bibsonomy.org/bibtex/250b9b1ca1849fa9cb8bb92d6d9031436/mkroell}, - doi = {10.1073/pnas.0708471105}, eprint = {http://www.pnas.org/content/105/12/4633.full.pdf+html}, journal = {Proceedings of the National Academy of Sciences}, keywords = {SNA graph networks}, @@ -454,7 +398,6 @@ year = "2009" pages = {4633-4638}, timestamp = {2008-10-09T10:32:56.000+0200}, title = {{Tracing information flow on a global scale using Internet chain-letter data}}, - url = {http://www.pnas.org/content/105/12/4633.abstract}, volume = 105, year = 2008 } @@ -470,10 +413,7 @@ year = "2009" Mining, April 26-28, 2007, Minneapolis, Minnesota, {USA}}, pages = {551--556}, year = {2007}, - url = {http://dx.doi.org/10.1137/1.9781611972771.60}, - doi = {10.1137/1.9781611972771.60}, timestamp = {Wed, 12 Feb 2014 17:08:15 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/conf/sdm/LeskovecMFGH07}, bibsource = {dblp computer science bibliography, http://dblp.org} } @@ -486,10 +426,7 @@ year = "2009" {(WI} 2005), 19-22 September 2005, Compiegne, France}, pages = {207--214}, year = {2005}, - url = {http://dx.doi.org/10.1109/WI.2005.151}, - doi = {10.1109/WI.2005.151}, timestamp = {Tue, 12 Aug 2014 16:59:16 +0200}, - biburl = {http://dblp.uni-trier.de/rec/bib/conf/webi/AdarA05}, bibsource = {dblp computer science bibliography, http://dblp.org} } @@ -500,10 +437,7 @@ year = "2009" of Computing, May 21-23, 2000, Portland, OR, {USA}}, pages = {163--170}, year = {2000}, - url = {http://doi.acm.org/10.1145/335305.335325}, - doi = {10.1145/335305.335325}, timestamp = {Thu, 16 Feb 2012 12:06:08 +0100}, - biburl = {http://dblp.uni-trier.de/rec/bib/conf/stoc/Kleinberg00}, bibsource = {dblp computer science bibliography, http://dblp.org} } |
