aboutsummaryrefslogtreecommitdiffstats
path: root/paper
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2015-05-19 00:00:17 +0200
committerThibaut Horel <thibaut.horel@gmail.com>2015-05-19 00:00:17 +0200
commit282349d87a75e930bb4d2ac7bc389106d0519f0b (patch)
treef82134035c45cd7e8a012316478a073d82b10ab1 /paper
parent39a72f9f8127970470f46529ff8f1cc1a22450d9 (diff)
downloadcascades-282349d87a75e930bb4d2ac7bc389106d0519f0b.tar.gz
Beginning of the compression
Diffstat (limited to 'paper')
-rw-r--r--paper/sections/appendix.tex23
-rw-r--r--paper/sections/model.tex8
-rw-r--r--paper/sections/results.tex48
-rw-r--r--paper/sparse.bib66
4 files changed, 37 insertions, 108 deletions
diff --git a/paper/sections/appendix.tex b/paper/sections/appendix.tex
index 32e2775..22b87c2 100644
--- a/paper/sections/appendix.tex
+++ b/paper/sections/appendix.tex
@@ -5,11 +5,13 @@ the paper.
\subsection{Proofs of Section~\ref{sec:results}}
-\paragraph{Bounded gradient} The proof of Lemma~\ref{lem:ub} giving an upper
-bound on the $\ell_{\infty}$ norm of the gradient of the likelihood function is
-given below.
+\begin{proof}[Proof of Lemma~\ref{lem:transform}]
+Using the inequality $\forall x>0, \; \log x \geq 1 - \frac{1}{x}$, we have
+$|\log (\frac{1}{1 - p}) - \log (\frac{1}{1-p'})| \geq \max(1 - \frac{1-p}{1-p'},
+1 - \frac{1-p'}{1-p}) \geq \max( p-p', p'-p)$.
+\end{proof}
-\begin{proof}
+\begin{proof}[Proof of Lemma~\ref{lem:ub}]
The gradient of $\mathcal{L}$ is given by:
\begin{multline*}
\nabla \mathcal{L}(\theta^*) =
@@ -42,6 +44,16 @@ Choosing $\lambda\defeq 2\sqrt{\frac{\log m}{\alpha n^{1-\delta}}}$ concludes
the proof.
\end{proof}
+\begin{proof}[Proof of Corollary~\ref{cor:variable_selection}]
+By choosing $\delta = 0$, if $ n > \frac{9s\log m}{\alpha\gamma^2\epsilon^2}$,
+then $\|\hat \theta-\theta^*\|_2 < \epsilon < \eta$ with probability
+$1-\frac{1}{m}$. If $\theta_i^* = 0$ and $\hat \theta > \eta$, then $\|\hat
+\theta - \theta^*\|_2 \geq |\hat \theta_i-\theta_i^*| > \eta$, which is a
+contradiction. Therefore we get no false positives. If $\theta_i^* > \eta +
+\epsilon$, then $|\hat{\theta}_i- \theta_i^*| < \epsilon \implies \theta_j >
+\eta$ and we get all strong parents.
+\end{proof}
+
%\subsubsection{Approximate sparsity proof}
\paragraph{(RE) with high probability} We now prove Proposition~\ref{prop:fi}.
The proof mostly relies on showing that the Hessian of likelihood function
@@ -113,6 +125,7 @@ C)$. These two bounds together imply the theorem.
\begin{figure}
\centering
\includegraphics[scale=.4]{figures/n_nodes_running_time.pdf}
+ \vspace{-1em}
\caption{Running time analysis for estimating the parents of a \emph{single
node} on a Barabasi-Albert graph as a function of the number of nodes in
the graph. The parameter $k$ (number of nodes each new node is attached to)
@@ -120,6 +133,7 @@ C)$. These two bounds together imply the theorem.
the edge weights are chosen uniformly at random in $[.2,.7]$. The
penalization parameter $\lambda$ is chosen equal to $.1$.}
\label{fig:running_time_n_nodes}
+ \vspace{-1em}
\end{figure}
\begin{figure}
@@ -130,6 +144,7 @@ C)$. These two bounds together imply the theorem.
observed cascades. The parameters defining the graph were set as in
Figure~\ref{fig:running_time_n_nodes}.}
\label{fig:running_time_n_cascades}
+ \vspace{-1em}
\end{figure}
We include here a running time analysis of our algorithm. In
diff --git a/paper/sections/model.tex b/paper/sections/model.tex
index fd25c27..532ee5e 100644
--- a/paper/sections/model.tex
+++ b/paper/sections/model.tex
@@ -140,13 +140,9 @@ the recovery error on $\Theta_j$ is an upper bound on the error on the original
$p_j$ parameters.
\begin{lemma}
+ \label{lem:transform}
$\|\hat{\theta} - \theta^* \|_2 \geq \|\hat{p} - p^*\|_2$.
\end{lemma}
-\begin{proof}
-Using the inequality $\forall x>0, \; \log x \geq 1 - \frac{1}{x}$, we have
-$|\log (\frac{1}{1 - p}) - \log (\frac{1}{1-p'})| \geq \max(1 - \frac{1-p}{1-p'},
-1 - \frac{1-p'}{1-p}) \geq \max( p-p', p'-p)$.
-\end{proof}
\subsubsection{The Linear Voter Model}
@@ -334,7 +330,7 @@ $\mathcal{L}_i$ is equal to $-\infty$ when the parameters are outside of the
domain of definition of the models, these contraints do not need to appear
explicitly in the optimization program.
-In the specific case of the voter model the constraint $\sum_j \Theta_{i,j}
+In the specific case of the voter model, the constraint $\sum_j \Theta_{i,j}
= 1$ will not necessarily be verified by the estimator obtained in
\eqref{eq:pre-mle}. In some applications, the experimenter might not need this
constraint to be verified, in which case the results in
diff --git a/paper/sections/results.tex b/paper/sections/results.tex
index 2292ce3..cab27a7 100644
--- a/paper/sections/results.tex
+++ b/paper/sections/results.tex
@@ -139,16 +139,6 @@ S}^*_{\eta + \epsilon} \subseteq \hat {\cal S}_\eta \subseteq {\cal S}^*$. In
other words we recover all `strong' parents and no `false' parents.
\end{corollary}
-\begin{proof}
-By choosing $\delta = 0$, if $ n > \frac{9s\log m}{\alpha\gamma^2\epsilon^2}$,
-then $\|\hat \theta-\theta^*\|_2 < \epsilon < \eta$ with probability
-$1-\frac{1}{m}$. If $\theta_i^* = 0$ and $\hat \theta > \eta$, then $\|\hat
-\theta - \theta^*\|_2 \geq |\hat \theta_i-\theta_i^*| > \eta$, which is a
-contradiction. Therefore we get no false positives. If $\theta_i^* > \eta +
-\epsilon$, then $|\hat{\theta}_i- \theta_i^*| < \epsilon \implies \theta_j >
-\eta$ and we get all strong parents.
-\end{proof}
-
Assuming we know a lower bound $\alpha$ on $\Theta_{i,j}$,
Corollary~\ref{cor:variable_selection} can be applied to the Network Inference
problem in the following manner: pick $\epsilon = \frac{\eta}{2}$ and $\eta
@@ -199,9 +189,11 @@ solving \eqref{eq:pre-mle} for $\lambda \defeq 2\sqrt{\frac{\log m}{\alpha n^{1
\end{align*}
\end{theorem}
-As before, edge recovery is a consequence of upper-bounding $\|\theta^* - \hat
-\theta\|_2$
+As in Corollary~\ref{cor:variable_selection}, an edge recovery guarantee can be
+derived from Theorem~\ref{thm:approx_sparse} in the case of approximate
+sparsity.
+\begin{comment}
\begin{corollary}
Under the same assumptions as Theorem~\ref{thm:approx_sparse}, if the number
of measurements verifies: \begin{equation}
@@ -212,22 +204,18 @@ As before, edge recovery is a consequence of upper-bounding $\|\theta^* - \hat
then: ${\cal S}^*_{\eta + \epsilon} \subset \hat {\cal S}_\eta
\subset {\cal S}^*$ w.p. at least $1-\frac{1}{m}$.
\end{corollary}
+\end{comment}
\subsection{Restricted Eigenvalue Condition}
\label{sec:re}
-In this section, we discuss the main assumption of Theorem~\ref{thm:main} namely
-the restricted eigenvalue condition introduced by~\cite{bickel2009simultaneous}.
-
-\paragraph{Interpretation}
-
There exists a large class of sufficient conditions under which sparse recovery
is achievable in the context of regularized estimation~\cite{vandegeer:2009}.
-The restricted eigenvalue condition is one of the weakest such assumption. It
-can be interpreted as a restricted form of non-degeneracy. Since we apply it to
-the Hessian of the log-likelihood function $\nabla^2 \mathcal{L}(\theta)$, it
-essentially reduces to a form of restricted strong convexity, that
-Lemma~\ref{lem:negahban} ultimately relies on.
+The restricted eigenvalue condition, introduced in \cite{bickel:2009}, is one
+of the weakest such assumption. It can be interpreted as a restricted form of
+non-degeneracy. Since we apply it to the Hessian of the log-likelihood function
+$\nabla^2 \mathcal{L}(\theta)$, it essentially reduces to a form of restricted
+strong convexity, that Lemma~\ref{lem:negahban} ultimately relies on.
Observe that the Hessian of $\mathcal{L}$ can be seen as a re-weighted
\emph{Gram matrix} of the observations:
@@ -238,16 +226,12 @@ Observe that the Hessian of $\mathcal{L}$ can be seen as a re-weighted
-(1-x_i^{t+1})\frac{f''(1-f) + f'^2}{(1-f)^2}(\inprod{\theta^*}{x^t})\bigg]
\end{multline*}
If $f$ and $1-f$ are $c$-strictly log-convex~\cite{bagnoli2005log} for $c>0$,
-then
-\begin{displaymath}
- \min\left(\frac{f''f-f'^2}{f^2}(x), \frac{f''(1-f) + f'^2}{(1-f)^2}(x) \right)
- \geq c
-\end{displaymath}
-and the $(S, \gamma)$-({\bf RE}) condition on the Hessian in
-Theorem~\ref{thm:main} and Theorem~\ref{thm:approx_sparse} reduces to a
-condition on the \emph{Gram matrix} of the observations $X^T X =
-\frac{1}{|\mathcal{T}|}\sum_{t\in\mathcal{T}}x^t(x^t)^T$ for $\gamma' \defeq
-\gamma\cdot c$.
+then $ \min\left((\log f)'', (\log (1-f))'' \right) \geq c $. This implies that
+the $(S, \gamma)$-({\bf RE}) condition in Theorem~\ref{thm:main} and
+Theorem~\ref{thm:approx_sparse} reduces to a condition on the \emph{Gram
+matrix} of the observations $X^T
+X = \frac{1}{|\mathcal{T}|}\sum_{t\in\mathcal{T}}x^t(x^t)^T$ for $\gamma'
+\defeq \gamma\cdot c$.
\paragraph{(RE) with high probability}
diff --git a/paper/sparse.bib b/paper/sparse.bib
index 00001f0..618a3a1 100644
--- a/paper/sparse.bib
+++ b/paper/sparse.bib
@@ -89,7 +89,6 @@ year = {2006},
issn = {1532-4435},
pages = {2541--2563},
numpages = {23},
- url = {http://dl.acm.org/citation.cfm?id=1248547.1248637},
acmid = {1248637},
publisher = {JMLR.org},
}
@@ -105,9 +104,7 @@ year = {2006},
{ICML} 2014, Beijing, China, 21-26 June 2014},
pages = {793--801},
year = {2014},
- url = {http://jmlr.org/proceedings/papers/v32/daneshmand14.html},
timestamp = {Fri, 07 Nov 2014 20:42:30 +0100},
- biburl = {http://dblp.uni-trier.de/rec/bib/conf/icml/DaneshmandGSS14},
bibsource = {dblp computer science bibliography, http://dblp.org}
}
@@ -121,10 +118,7 @@ year = {2006},
- 27, 2003},
pages = {137--146},
year = {2003},
- url = {http://doi.acm.org/10.1145/956750.956769},
- doi = {10.1145/956750.956769},
timestamp = {Mon, 13 Feb 2006 15:34:20 +0100},
- biburl = {http://dblp.uni-trier.de/rec/bib/conf/kdd/KempeKT03},
bibsource = {dblp computer science bibliography, http://dblp.org}
}
@@ -138,37 +132,30 @@ year = {2006},
and Data Mining, {KDD} 2013, Chicago, IL, USA, August 11-14, 2013},
pages = {491--499},
year = {2013},
- url = {http://doi.acm.org/10.1145/2487575.2487664},
- doi = {10.1145/2487575.2487664},
timestamp = {Tue, 10 Sep 2013 10:11:57 +0200},
- biburl = {http://dblp.uni-trier.de/rec/bib/conf/kdd/AbrahaoCKP13},
bibsource = {dblp computer science bibliography, http://dblp.org}
}
@article{vandegeer:2009,
author = "van de Geer, Sara A. and B{\"u}hlmann, Peter",
-doi = "10.1214/09-EJS506",
fjournal = "Electronic Journal of Statistics",
journal = "Electron. J. Statist.",
pages = "1360--1392",
publisher = "The Institute of Mathematical Statistics and the Bernoulli Society",
title = "On the conditions used to prove oracle results for the Lasso",
-url = "http://dx.doi.org/10.1214/09-EJS506",
volume = "3",
year = "2009"
}
@article{vandegeer:2011,
author = "van de Geer, Sara and Bühlmann, Peter and Zhou, Shuheng",
-doi = "10.1214/11-EJS624",
fjournal = "Electronic Journal of Statistics",
journal = "Electron. J. Statist.",
pages = "688--749",
publisher = "The Institute of Mathematical Statistics and the Bernoulli Society",
title = "The adaptive and the thresholded Lasso for potentially misspecified
models (and a lower bound for the Lasso)",
-url = "http://dx.doi.org/10.1214/11-EJS624",
volume = "5",
year = "2011"
}
@@ -181,8 +168,6 @@ volume = {101},
number = {476},
pages = {1418-1429},
year = {2006},
-doi = {10.1198/016214506000000735},
-URL = {http://dx.doi.org/10.1198/016214506000000735},
}
@article{Jacques:2013,
@@ -197,10 +182,7 @@ URL = {http://dx.doi.org/10.1198/016214506000000735},
number = {4},
pages = {2082--2102},
year = {2013},
- url = {http://dx.doi.org/10.1109/TIT.2012.2234823},
- doi = {10.1109/TIT.2012.2234823},
timestamp = {Tue, 09 Apr 2013 19:57:48 +0200},
- biburl = {http://dblp.uni-trier.de/rec/bib/journals/tit/JacquesLBB13},
bibsource = {dblp computer science bibliography, http://dblp.org}
}
@@ -212,10 +194,7 @@ URL = {http://dx.doi.org/10.1198/016214506000000735},
2008, Princeton, NJ, USA, 19-21 March 2008},
pages = {16--21},
year = {2008},
- url = {http://dx.doi.org/10.1109/CISS.2008.4558487},
- doi = {10.1109/CISS.2008.4558487},
timestamp = {Wed, 15 Oct 2014 17:04:27 +0200},
- biburl = {http://dblp.uni-trier.de/rec/bib/conf/ciss/BoufounosB08},
bibsource = {dblp computer science bibliography, http://dblp.org}
}
@@ -229,10 +208,7 @@ URL = {http://dx.doi.org/10.1198/016214506000000735},
June 13-18, 2010, Austin, Texas, USA, Proceedings},
pages = {1553--1557},
year = {2010},
- url = {http://dx.doi.org/10.1109/ISIT.2010.5513510},
- doi = {10.1109/ISIT.2010.5513510},
timestamp = {Thu, 15 Jan 2015 17:11:50 +0100},
- biburl = {http://dblp.uni-trier.de/rec/bib/conf/isit/GuptaNR10},
bibsource = {dblp computer science bibliography, http://dblp.org}
}
@@ -245,16 +221,12 @@ URL = {http://dx.doi.org/10.1198/016214506000000735},
number = {2},
pages = {438--461},
year = {2014},
- url = {http://dx.doi.org/10.1007/s00454-013-9561-6},
- doi = {10.1007/s00454-013-9561-6},
timestamp = {Tue, 11 Feb 2014 13:48:56 +0100},
- biburl = {http://dblp.uni-trier.de/rec/bib/journals/dcg/PlanV14},
bibsource = {dblp computer science bibliography, http://dblp.org}
}
@article{bickel:2009,
author = "Bickel, Peter J. and Ritov, Ya’acov and Tsybakov, Alexandre B.",
-doi = "10.1214/08-AOS620",
fjournal = "The Annals of Statistics",
journal = "Ann. Statist.",
month = "08",
@@ -262,7 +234,6 @@ number = "4",
pages = "1705--1732",
publisher = "The Institute of Mathematical Statistics",
title = "Simultaneous analysis of Lasso and Dantzig selector",
-url = "http://dx.doi.org/10.1214/08-AOS620",
volume = "37",
year = "2009"
}
@@ -276,9 +247,7 @@ year = "2009"
volume = {11},
pages = {2241--2259},
year = {2010},
- url = {http://portal.acm.org/citation.cfm?id=1859929},
timestamp = {Wed, 15 Oct 2014 17:04:32 +0200},
- biburl = {http://dblp.uni-trier.de/rec/bib/journals/jmlr/RaskuttiWY10},
bibsource = {dblp computer science bibliography, http://dblp.org}
}
@@ -291,10 +260,7 @@ year = "2009"
number = {6},
pages = {3434--3447},
year = {2013},
- url = {http://dx.doi.org/10.1109/TIT.2013.2243201},
- doi = {10.1109/TIT.2013.2243201},
timestamp = {Tue, 21 May 2013 14:15:50 +0200},
- biburl = {http://dblp.uni-trier.de/rec/bib/journals/tit/RudelsonZ13},
bibsource = {dblp computer science bibliography, http://dblp.org}
}
@@ -307,9 +273,7 @@ year = "2009"
journal = {CoRR},
volume = {abs/1106.0365},
year = {2011},
- url = {http://arxiv.org/abs/1106.0365},
timestamp = {Mon, 05 Dec 2011 18:04:39 +0100},
- biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/abs-1106-0365},
bibsource = {dblp computer science bibliography, http://dblp.org}
}
@@ -322,10 +286,7 @@ year = "2009"
pages = {295--304},
year = {2011},
crossref = {DBLP:conf/focs/2011},
- url = {http://dx.doi.org/10.1109/FOCS.2011.92},
- doi = {10.1109/FOCS.2011.92},
timestamp = {Tue, 16 Dec 2014 09:57:24 +0100},
- biburl = {http://dblp.uni-trier.de/rec/bib/conf/focs/PriceW11},
bibsource = {dblp computer science bibliography, http://dblp.org}
}
@@ -335,10 +296,8 @@ year = "2009"
2011, Palm Springs, CA, USA, October 22-25, 2011},
publisher = {{IEEE} Computer Society},
year = {2011},
- url = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6108120},
isbn = {978-1-4577-1843-4},
timestamp = {Mon, 15 Dec 2014 18:48:45 +0100},
- biburl = {http://dblp.uni-trier.de/rec/bib/conf/focs/2011},
bibsource = {dblp computer science bibliography, http://dblp.org}
}
@@ -352,10 +311,7 @@ year = "2009"
pages = {2446--2450},
year = {2012},
crossref = {DBLP:conf/isit/2012},
- url = {http://dx.doi.org/10.1109/ISIT.2012.6283954},
- doi = {10.1109/ISIT.2012.6283954},
timestamp = {Mon, 01 Oct 2012 17:34:07 +0200},
- biburl = {http://dblp.uni-trier.de/rec/bib/conf/isit/PriceW12},
bibsource = {dblp computer science bibliography, http://dblp.org}
}
@@ -364,10 +320,8 @@ year = "2009"
Theory, {ISIT} 2012, Cambridge, MA, USA, July 1-6, 2012},
publisher = {{IEEE}},
year = {2012},
- url = {http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=6268627},
isbn = {978-1-4673-2580-6},
timestamp = {Mon, 01 Oct 2012 17:33:45 +0200},
- biburl = {http://dblp.uni-trier.de/rec/bib/conf/isit/2012},
bibsource = {dblp computer science bibliography, http://dblp.org}
}
@@ -382,10 +336,7 @@ year = "2009"
volume = {11},
pages = {985--1042},
year = {2010},
- url = {http://doi.acm.org/10.1145/1756006.1756039},
- doi = {10.1145/1756006.1756039},
timestamp = {Thu, 22 Apr 2010 13:26:26 +0200},
- biburl = {http://dblp.uni-trier.de/rec/bib/journals/jmlr/LeskovecCKFG10},
bibsource = {dblp computer science bibliography, http://dblp.org}
}
@@ -410,7 +361,6 @@ year = "2009"
Pages = {440--442},
Read = {0},
Title = {Collective dynamics of `small-world' networks},
- Url = {http://dx.doi.org/10.1038/30918},
Volume = {393},
Year = {1998},
}
@@ -422,9 +372,7 @@ year = "2009"
journal = {CoRR},
volume = {cond-mat/0106096},
year = {2001},
- url = {http://arxiv.org/abs/cond-mat/0106096},
timestamp = {Mon, 05 Dec 2011 18:05:15 +0100},
- biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/cond-mat-0106096},
bibsource = {dblp computer science bibliography, http://dblp.org}
}
@@ -437,16 +385,12 @@ year = "2009"
journal = {CoRR},
volume = {abs/1105.0697},
year = {2011},
- url = {http://arxiv.org/abs/1105.0697},
timestamp = {Mon, 05 Dec 2011 18:05:23 +0100},
- biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/abs-1105-0697},
bibsource = {dblp computer science bibliography, http://dblp.org}
}
@article{Nowell08,
author = {Liben-Nowell, David and Kleinberg, Jon},
- biburl = {http://www.bibsonomy.org/bibtex/250b9b1ca1849fa9cb8bb92d6d9031436/mkroell},
- doi = {10.1073/pnas.0708471105},
eprint = {http://www.pnas.org/content/105/12/4633.full.pdf+html},
journal = {Proceedings of the National Academy of Sciences},
keywords = {SNA graph networks},
@@ -454,7 +398,6 @@ year = "2009"
pages = {4633-4638},
timestamp = {2008-10-09T10:32:56.000+0200},
title = {{Tracing information flow on a global scale using Internet chain-letter data}},
- url = {http://www.pnas.org/content/105/12/4633.abstract},
volume = 105,
year = 2008
}
@@ -470,10 +413,7 @@ year = "2009"
Mining, April 26-28, 2007, Minneapolis, Minnesota, {USA}},
pages = {551--556},
year = {2007},
- url = {http://dx.doi.org/10.1137/1.9781611972771.60},
- doi = {10.1137/1.9781611972771.60},
timestamp = {Wed, 12 Feb 2014 17:08:15 +0100},
- biburl = {http://dblp.uni-trier.de/rec/bib/conf/sdm/LeskovecMFGH07},
bibsource = {dblp computer science bibliography, http://dblp.org}
}
@@ -486,10 +426,7 @@ year = "2009"
{(WI} 2005), 19-22 September 2005, Compiegne, France},
pages = {207--214},
year = {2005},
- url = {http://dx.doi.org/10.1109/WI.2005.151},
- doi = {10.1109/WI.2005.151},
timestamp = {Tue, 12 Aug 2014 16:59:16 +0200},
- biburl = {http://dblp.uni-trier.de/rec/bib/conf/webi/AdarA05},
bibsource = {dblp computer science bibliography, http://dblp.org}
}
@@ -500,10 +437,7 @@ year = "2009"
of Computing, May 21-23, 2000, Portland, OR, {USA}},
pages = {163--170},
year = {2000},
- url = {http://doi.acm.org/10.1145/335305.335325},
- doi = {10.1145/335305.335325},
timestamp = {Thu, 16 Feb 2012 12:06:08 +0100},
- biburl = {http://dblp.uni-trier.de/rec/bib/conf/stoc/Kleinberg00},
bibsource = {dblp computer science bibliography, http://dblp.org}
}