diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2013-06-07 12:38:45 +0200 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2013-06-07 12:38:45 +0200 |
| commit | 55473070decc94d4bf261cdd01d718305ea07073 (patch) | |
| tree | 00a8405201e13cbca6ce1459b6c7159d0f7222e5 | |
| parent | aa5094388e98fc60fb5f4829c4294711ee8267aa (diff) | |
| download | recommendation-55473070decc94d4bf261cdd01d718305ea07073.tar.gz | |
Fixing small typos (Stratis' suggestions)
| -rw-r--r-- | definitions.tex | 9 | ||||
| -rw-r--r-- | dual.tex (renamed from dual2.tex) | 46 |
2 files changed, 27 insertions, 28 deletions
diff --git a/definitions.tex b/definitions.tex index 962a438..616012e 100644 --- a/definitions.tex +++ b/definitions.tex @@ -1,11 +1,12 @@ \newcommand{\mutual}{\ensuremath{{I}}} \newcommand{\entropy}{\ensuremath{{H}}} -%\newtheorem{lemma}{Lemma} +\newtheorem{lemma}{Lemma} \newtheorem{fact}{Fact} -%\newtheorem{example}{Example} +\newtheorem{example}{Example} \newtheorem{prop}{Proposition} -%\newtheorem{theorem}{Theorem} -%\newtheorem{corollary}{Corollary} +\newtheorem{theorem}{Theorem} +\newtheorem{corollary}{Corollary} +\newcommand{\citeN}{\citet} %\newcommand*{\defeq}{\stackrel{\text{def}}{=}} \newcommand*{\defeq}{\equiv} \newcommand{\var}{\mathop{\mathrm{Var}}} @@ -2,7 +2,6 @@ \usepackage[T1]{fontenc} \usepackage[utf8]{inputenc} \usepackage{amsmath, amsfonts, amsthm} -\newtheorem{lemma}{Lemma} \newtheorem{proposition}{Proposition} \input{definitions} @@ -65,7 +64,7 @@ dual feasibility, the KKT conditions give $\forall i\in\{1, \ldots, n\}$: \begin{displaymath} S(\lambda)\defeq I_d + \sum_{i=1}^n \lambda_i x_i\T{x_i} \quad\mathrm{and}\quad - S_k \defeq I_d + \sum_{i=1}^n x_i\T{x_i} + S_k \defeq I_d + \sum_{i=1}^k x_i\T{x_i} \end{displaymath} We have $\partial_i L(\lambda) = \T{x_i}S(\lambda)^{-1}x_i$. Since @@ -111,9 +110,9 @@ dual feasibility, the KKT conditions give $\forall i\in\{1, \ldots, n\}$: \begin{lemma}\label{lemma:proximity} We have: \begin{displaymath} - L^*_c - \alpha n^2(n-1)\leq L^*_c(\alpha) \leq L^*_c + L^*_c - \alpha n^2\leq L^*_c(\alpha) \leq L^*_c \end{displaymath} -In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n^2(n-1)$. +In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n^2$. \end{lemma} \begin{proof} @@ -134,7 +133,7 @@ In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n^2(n-1)$. \begin{displaymath} L^*_{c}(\alpha) \geq L(\lambda) - \alpha\T{\mathbf{1}}\mu^* \end{displaymath} - for any $\lambda$ feasible for \eqref{eq:primal}. In particular, for $\lambda^*$ primal optimal for \eqref{eq:primal}: + for any $\lambda$ feasible for \eqref{eq:primal}. In particular, for $\lambda$ primal optimal for $\eqref{eq:primal}$: \begin{equation}\label{eq:local-1} L^*_{c}(\alpha) \geq L^*_c - \alpha\T{\mathbf{1}}\mu^* \end{equation} @@ -143,10 +142,13 @@ In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n^2(n-1)$. \{i|\mu_i^* > 0\}$, and by $\lambda^*$ a primal optimal point for $\eqref{eq:perturbed-primal}$. From the KKT conditions we see that: \begin{displaymath} - M = \{i|\lambda_i^* = \alpha\} + M \subseteq \{i|\lambda_i^* = \alpha\} \end{displaymath} - Let us first assume that, $|M|\geq 1$, then we have that $\T{c}\lambda^* + + Let us first assume that $|M| = 0$, then $\T{\mathbf{1}}\mu^*=0$ and the lemma follows. + + We will now assume that $|M|\geq 1$. In this case $\T{c}\lambda^* = 1$, otherwise we could increase the coordinates of $\lambda^*$ in $M$, which would increase the value of the objective function and contradict the optimality of $\lambda^*$. Note also, that $|M|\leq n-1$, otherwise, since @@ -154,11 +156,11 @@ In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n^2(n-1)$. contradicts the optimality of $\lambda^*$. Let us write: \begin{displaymath} 1 = \T{c}\lambda^* = \alpha\sum_{i\in M}c_i + \sum_{i\in \bar{M}}\lambda_i^*c_i - \leq \alpha (n-1) + (n-1)\max_{i\in \bar{M}} c_i + \leq \alpha |M| + (n-|M|)\max_{i\in \bar{M}} c_i \end{displaymath} That is: \begin{equation}\label{local-2} - \max_{i\in\bar{M}} c_i \geq \frac{1 - (n-1)\alpha}{n-1}> \frac{1}{n(n-1)} + \max_{i\in\bar{M}} c_i \geq \frac{1 - |M|\alpha}{n-|M|}> \frac{1}{n} \end{equation} where the last inequality uses again that $\alpha<\frac{1}{n}$. From the KKT conditions, we see that for $i\in M$, $\nu_i^* = 0$ and: @@ -169,25 +171,21 @@ In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n^2(n-1)$. Furthermore, using the KKT conditions again, we have that: \begin{equation}\label{local-4} - \xi^* = \inf_{i\in \bar{M}}\frac{\partial_i L(\lambda^*)}{c_i}\leq \inf_{i\in \bar{M}} \frac{1}{c_i} + \xi^* \leq \inf_{i\in \bar{M}}\frac{\partial_i L(\lambda^*)}{c_i}\leq \inf_{i\in \bar{M}} \frac{1}{c_i} = \frac{1}{\max_{i\in\bar{M}} c_i} \end{equation} - where the inequality uses Lemma~\ref{lemma:derivative-bounds}. + where the last inequality uses Lemma~\ref{lemma:derivative-bounds}. Combining \eqref{local-2}, \eqref{local-3} and \eqref{local-4}, we get that: \begin{displaymath} - \sum_{i\in M}\mu_i^* \leq |M|\xi^* \leq n\xi^*\leq \frac{n}{\max_{i\in\bar{M}} c_i} \leq n^2(n-1) + \sum_{i\in M}\mu_i^* \leq |M|\xi^* \leq n\xi^*\leq \frac{n}{\max_{i\in\bar{M}} c_i} \leq n^2 \end{displaymath} - Finally let us write: + This implies that: \begin{displaymath} - \T{\mathbf{1}}\mu^* = \sum_{i=1}^n \mu^*_i = \sum_{i\in M}\mu_i^* + \T{\mathbf{1}}\mu^* = \sum_{i=1}^n \mu^*_i = \sum_{i\in M}\mu_i^*\leq n^2 \end{displaymath} - - Either $|M|=0$, in which case $\T{\mathbf{1}}\mu^* = 0$, either $|M|\geq - 1$, in which case $\T{\mathbf{1}}\mu^*\leq n^2(n-1)$ from the inequality above. - In both cases, $\T{\mathbf{1}}\mu^* \leq n^2(n-1)$, which, in addition to - \eqref{eq:local-1} proves the lemma. + which in addition to \eqref{eq:local-1} proves the lemma. \end{proof} \begin{lemma}\label{lemma:monotonicity} @@ -229,16 +227,16 @@ In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n^2(n-1)$. \begin{proof} Let $\varepsilon$ in $(0, 1]$. The routine works as follows: set $\alpha\defeq -\varepsilon(\delta + n^2(n-1))^{-1}$ and return an approximation $\tilde{L}^*_c$ of +\varepsilon(\delta + n^2)^{-1}$ and return an approximation $\tilde{L}^*_c$ of $L^*_c(\alpha)$ with an accuracy $\frac{1}{2^{n+1}}\alpha\delta b$ computed by a standard convex optimization algorithm. Note that this choice of $\alpha$ -implies $\alpha<\frac{1}{n}$ as requested. +implies $\alpha<\frac{1}{n}$ as required. \begin{enumerate} \item using Lemma~\ref{lemma:proximity}: \begin{displaymath} |\tilde{L}^*_c - L^*_c| \leq |\tilde{L}^*_c - L^*_c(\alpha)| + |L^*_c(\alpha) - L^*_c| - \leq \alpha\delta + \alpha n^2(n-1) = \varepsilon + \leq \alpha\delta + \alpha n^2 = \varepsilon \end{displaymath} \item let $c' = (c_i', c_{-i})$ with $c_i'\leq c_i-\delta$, then: @@ -252,12 +250,12 @@ the inner inequality follows from Lemma~\ref{lemma:monotonicity}. \item the accuracy of the approximation $\tilde{L}^*_c$ is: \begin{displaymath} - A\defeq\frac{\varepsilon\delta b}{2^{n+1}(\delta + n^2(n-1))} + A\defeq\frac{\varepsilon\delta b}{2^{n+1}(\delta + n^2)} \end{displaymath} \sloppy hence, the standard convex optimization algorithm runs in time $O(poly(n, d,\log\log A^{-1}))$. Note that: \begin{displaymath} - \log\log A^{-1} = O\bigg(\log n\; \log\log\frac{1}{\epsilon\delta b}\bigg) + \log\log A^{-1} = O\bigg(\log\log\frac{1}{\varepsilon\delta b} + \log n\bigg) \end{displaymath} which yields the wanted running time for the routine.\qedhere \end{enumerate} |
