summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2013-06-07 12:38:45 +0200
committerThibaut Horel <thibaut.horel@gmail.com>2013-06-07 12:38:45 +0200
commit55473070decc94d4bf261cdd01d718305ea07073 (patch)
tree00a8405201e13cbca6ce1459b6c7159d0f7222e5
parentaa5094388e98fc60fb5f4829c4294711ee8267aa (diff)
downloadrecommendation-55473070decc94d4bf261cdd01d718305ea07073.tar.gz
Fixing small typos (Stratis' suggestions)
-rw-r--r--definitions.tex9
-rw-r--r--dual.tex (renamed from dual2.tex)46
2 files changed, 27 insertions, 28 deletions
diff --git a/definitions.tex b/definitions.tex
index 962a438..616012e 100644
--- a/definitions.tex
+++ b/definitions.tex
@@ -1,11 +1,12 @@
\newcommand{\mutual}{\ensuremath{{I}}}
\newcommand{\entropy}{\ensuremath{{H}}}
-%\newtheorem{lemma}{Lemma}
+\newtheorem{lemma}{Lemma}
\newtheorem{fact}{Fact}
-%\newtheorem{example}{Example}
+\newtheorem{example}{Example}
\newtheorem{prop}{Proposition}
-%\newtheorem{theorem}{Theorem}
-%\newtheorem{corollary}{Corollary}
+\newtheorem{theorem}{Theorem}
+\newtheorem{corollary}{Corollary}
+\newcommand{\citeN}{\citet}
%\newcommand*{\defeq}{\stackrel{\text{def}}{=}}
\newcommand*{\defeq}{\equiv}
\newcommand{\var}{\mathop{\mathrm{Var}}}
diff --git a/dual2.tex b/dual.tex
index a7f9e61..a2d28c6 100644
--- a/dual2.tex
+++ b/dual.tex
@@ -2,7 +2,6 @@
\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\usepackage{amsmath, amsfonts, amsthm}
-\newtheorem{lemma}{Lemma}
\newtheorem{proposition}{Proposition}
\input{definitions}
@@ -65,7 +64,7 @@ dual feasibility, the KKT conditions give $\forall i\in\{1, \ldots, n\}$:
\begin{displaymath}
S(\lambda)\defeq I_d + \sum_{i=1}^n \lambda_i x_i\T{x_i}
\quad\mathrm{and}\quad
- S_k \defeq I_d + \sum_{i=1}^n x_i\T{x_i}
+ S_k \defeq I_d + \sum_{i=1}^k x_i\T{x_i}
\end{displaymath}
We have $\partial_i L(\lambda) = \T{x_i}S(\lambda)^{-1}x_i$. Since
@@ -111,9 +110,9 @@ dual feasibility, the KKT conditions give $\forall i\in\{1, \ldots, n\}$:
\begin{lemma}\label{lemma:proximity}
We have:
\begin{displaymath}
- L^*_c - \alpha n^2(n-1)\leq L^*_c(\alpha) \leq L^*_c
+ L^*_c - \alpha n^2\leq L^*_c(\alpha) \leq L^*_c
\end{displaymath}
-In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n^2(n-1)$.
+In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n^2$.
\end{lemma}
\begin{proof}
@@ -134,7 +133,7 @@ In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n^2(n-1)$.
\begin{displaymath}
L^*_{c}(\alpha) \geq L(\lambda) - \alpha\T{\mathbf{1}}\mu^*
\end{displaymath}
- for any $\lambda$ feasible for \eqref{eq:primal}. In particular, for $\lambda^*$ primal optimal for \eqref{eq:primal}:
+ for any $\lambda$ feasible for \eqref{eq:primal}. In particular, for $\lambda$ primal optimal for $\eqref{eq:primal}$:
\begin{equation}\label{eq:local-1}
L^*_{c}(\alpha) \geq L^*_c - \alpha\T{\mathbf{1}}\mu^*
\end{equation}
@@ -143,10 +142,13 @@ In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n^2(n-1)$.
\{i|\mu_i^* > 0\}$, and by $\lambda^*$ a primal optimal point for
$\eqref{eq:perturbed-primal}$. From the KKT conditions we see that:
\begin{displaymath}
- M = \{i|\lambda_i^* = \alpha\}
+ M \subseteq \{i|\lambda_i^* = \alpha\}
\end{displaymath}
- Let us first assume that, $|M|\geq 1$, then we have that $\T{c}\lambda^*
+
+ Let us first assume that $|M| = 0$, then $\T{\mathbf{1}}\mu^*=0$ and the lemma follows.
+
+ We will now assume that $|M|\geq 1$. In this case $\T{c}\lambda^*
= 1$, otherwise we could increase the coordinates of $\lambda^*$ in $M$,
which would increase the value of the objective function and contradict the
optimality of $\lambda^*$. Note also, that $|M|\leq n-1$, otherwise, since
@@ -154,11 +156,11 @@ In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n^2(n-1)$.
contradicts the optimality of $\lambda^*$. Let us write:
\begin{displaymath}
1 = \T{c}\lambda^* = \alpha\sum_{i\in M}c_i + \sum_{i\in \bar{M}}\lambda_i^*c_i
- \leq \alpha (n-1) + (n-1)\max_{i\in \bar{M}} c_i
+ \leq \alpha |M| + (n-|M|)\max_{i\in \bar{M}} c_i
\end{displaymath}
That is:
\begin{equation}\label{local-2}
- \max_{i\in\bar{M}} c_i \geq \frac{1 - (n-1)\alpha}{n-1}> \frac{1}{n(n-1)}
+ \max_{i\in\bar{M}} c_i \geq \frac{1 - |M|\alpha}{n-|M|}> \frac{1}{n}
\end{equation}
where the last inequality uses again that $\alpha<\frac{1}{n}$. From the
KKT conditions, we see that for $i\in M$, $\nu_i^* = 0$ and:
@@ -169,25 +171,21 @@ In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n^2(n-1)$.
Furthermore, using the KKT conditions again, we have that:
\begin{equation}\label{local-4}
- \xi^* = \inf_{i\in \bar{M}}\frac{\partial_i L(\lambda^*)}{c_i}\leq \inf_{i\in \bar{M}} \frac{1}{c_i}
+ \xi^* \leq \inf_{i\in \bar{M}}\frac{\partial_i L(\lambda^*)}{c_i}\leq \inf_{i\in \bar{M}} \frac{1}{c_i}
= \frac{1}{\max_{i\in\bar{M}} c_i}
\end{equation}
- where the inequality uses Lemma~\ref{lemma:derivative-bounds}.
+ where the last inequality uses Lemma~\ref{lemma:derivative-bounds}.
Combining \eqref{local-2}, \eqref{local-3} and \eqref{local-4}, we get that:
\begin{displaymath}
- \sum_{i\in M}\mu_i^* \leq |M|\xi^* \leq n\xi^*\leq \frac{n}{\max_{i\in\bar{M}} c_i} \leq n^2(n-1)
+ \sum_{i\in M}\mu_i^* \leq |M|\xi^* \leq n\xi^*\leq \frac{n}{\max_{i\in\bar{M}} c_i} \leq n^2
\end{displaymath}
- Finally let us write:
+ This implies that:
\begin{displaymath}
- \T{\mathbf{1}}\mu^* = \sum_{i=1}^n \mu^*_i = \sum_{i\in M}\mu_i^*
+ \T{\mathbf{1}}\mu^* = \sum_{i=1}^n \mu^*_i = \sum_{i\in M}\mu_i^*\leq n^2
\end{displaymath}
-
- Either $|M|=0$, in which case $\T{\mathbf{1}}\mu^* = 0$, either $|M|\geq
- 1$, in which case $\T{\mathbf{1}}\mu^*\leq n^2(n-1)$ from the inequality above.
- In both cases, $\T{\mathbf{1}}\mu^* \leq n^2(n-1)$, which, in addition to
- \eqref{eq:local-1} proves the lemma.
+ which in addition to \eqref{eq:local-1} proves the lemma.
\end{proof}
\begin{lemma}\label{lemma:monotonicity}
@@ -229,16 +227,16 @@ In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n^2(n-1)$.
\begin{proof}
Let $\varepsilon$ in $(0, 1]$. The routine works as follows: set $\alpha\defeq
-\varepsilon(\delta + n^2(n-1))^{-1}$ and return an approximation $\tilde{L}^*_c$ of
+\varepsilon(\delta + n^2)^{-1}$ and return an approximation $\tilde{L}^*_c$ of
$L^*_c(\alpha)$ with an accuracy $\frac{1}{2^{n+1}}\alpha\delta b$ computed by
a standard convex optimization algorithm. Note that this choice of $\alpha$
-implies $\alpha<\frac{1}{n}$ as requested.
+implies $\alpha<\frac{1}{n}$ as required.
\begin{enumerate}
\item using Lemma~\ref{lemma:proximity}:
\begin{displaymath}
|\tilde{L}^*_c - L^*_c| \leq |\tilde{L}^*_c - L^*_c(\alpha)| + |L^*_c(\alpha) - L^*_c|
- \leq \alpha\delta + \alpha n^2(n-1) = \varepsilon
+ \leq \alpha\delta + \alpha n^2 = \varepsilon
\end{displaymath}
\item let $c' = (c_i', c_{-i})$ with $c_i'\leq c_i-\delta$, then:
@@ -252,12 +250,12 @@ the inner inequality follows from Lemma~\ref{lemma:monotonicity}.
\item the accuracy of the approximation $\tilde{L}^*_c$ is:
\begin{displaymath}
- A\defeq\frac{\varepsilon\delta b}{2^{n+1}(\delta + n^2(n-1))}
+ A\defeq\frac{\varepsilon\delta b}{2^{n+1}(\delta + n^2)}
\end{displaymath}
\sloppy
hence, the standard convex optimization algorithm runs in time $O(poly(n, d,\log\log A^{-1}))$. Note that:
\begin{displaymath}
- \log\log A^{-1} = O\bigg(\log n\; \log\log\frac{1}{\epsilon\delta b}\bigg)
+ \log\log A^{-1} = O\bigg(\log\log\frac{1}{\varepsilon\delta b} + \log n\bigg)
\end{displaymath}
which yields the wanted running time for the routine.\qedhere
\end{enumerate}