summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2013-06-05 18:32:36 +0200
committerThibaut Horel <thibaut.horel@gmail.com>2013-06-05 18:32:36 +0200
commitaa5094388e98fc60fb5f4829c4294711ee8267aa (patch)
tree86a7a303d0807c9056c95ed4a80aeb01f79b2b2b
parent0171936dd8da5686126eb0bc20e773f855a3c522 (diff)
downloadrecommendation-aa5094388e98fc60fb5f4829c4294711ee8267aa.tar.gz
Fixing a small flaw in the proof of Lemma 2
-rw-r--r--dual2.tex213
1 files changed, 64 insertions, 149 deletions
diff --git a/dual2.tex b/dual2.tex
index ca07954..a7f9e61 100644
--- a/dual2.tex
+++ b/dual2.tex
@@ -30,13 +30,13 @@ Let $\alpha\in\mathbf{R}^+$, consider the perturbed optimization problem:
\end{equation}
and denote by $L^*_c(\alpha)$ its optimal value. Note that we have $L^*_c = L^*_c(0)$.
-We will assume that $\alpha\leq\frac{1}{n}$ so that \eqref{eq:perturbed-primal} has at
+We will assume that $\alpha<\frac{1}{n}$ so that \eqref{eq:perturbed-primal} has at
least one feasible point: $(\frac{1}{n},\ldots,\frac{1}{n})$.
Let us introduce the lagrangian of problem, \eqref{eq:perturbed-primal}:
\begin{displaymath}
\mathcal{L}_{c, \alpha}(\lambda, \mu, \nu, \xi) \defeq L(\lambda)
- + \T{\mu}(\lambda-\mathbf{\alpha}) + \T{\nu}(\mathbf{1}-\lambda) + \xi(1-\T{c}\lambda)
+ + \T{\mu}(\lambda-\alpha\mathbf{1}) + \T{\nu}(\mathbf{1}-\lambda) + \xi(1-\T{c}\lambda)
\end{displaymath}
so that:
\begin{displaymath}
@@ -111,9 +111,9 @@ dual feasibility, the KKT conditions give $\forall i\in\{1, \ldots, n\}$:
\begin{lemma}\label{lemma:proximity}
We have:
\begin{displaymath}
- L^*_c - \alpha n\leq L^*_c(\alpha) \leq L^*_c
+ L^*_c - \alpha n^2(n-1)\leq L^*_c(\alpha) \leq L^*_c
\end{displaymath}
-In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n$.
+In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n^2(n-1)$.
\end{lemma}
\begin{proof}
@@ -127,27 +127,67 @@ In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n$.
\end{displaymath}
Note that $\mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*)
- = \mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*) - \T{\alpha}\mu^*$ and that
- for any $\lambda$ feasible for problem \eqref{eq:primal},
- $\mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*) \geq L(\lambda)$. Hence,
+ = \mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*)
+ - \alpha\T{\mathbf{1}}\mu^*$, and that for any $\lambda$ feasible for
+ problem \eqref{eq:primal}, $\mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*)
+ \geq L(\lambda)$. Hence,
\begin{displaymath}
- L^*_{c}(\alpha) \geq \mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*) - \T{\alpha}\mu^*
+ L^*_{c}(\alpha) \geq L(\lambda) - \alpha\T{\mathbf{1}}\mu^*
\end{displaymath}
for any $\lambda$ feasible for \eqref{eq:primal}. In particular, for $\lambda^*$ primal optimal for \eqref{eq:primal}:
\begin{equation}\label{eq:local-1}
- L^*_{c}(\alpha) \geq L^*_c - \T{\alpha}\mu^*
+ L^*_{c}(\alpha) \geq L^*_c - \alpha\T{\mathbf{1}}\mu^*
\end{equation}
- Note that from the KKT conditions, we cannot have both $\mu_i^*$ and $\nu_i^*$ different from zero, hence:
+ Let us denote by the $M$ the support of $\mu^*$, that is $M\defeq
+ \{i|\mu_i^* > 0\}$, and by $\lambda^*$ a primal optimal point for
+ $\eqref{eq:perturbed-primal}$. From the KKT conditions we see that:
\begin{displaymath}
- \sum_{i=1}^n \mu_i^* = \sum_{i=1}^n \xi^*c_i - \partial_i L(\lambda^*) \leq \sum_{i=1}^n\xi^*c_i
+ M = \{i|\lambda_i^* = \alpha\}
\end{displaymath}
- Using the KKT conditions again, if $\lambda_i^*>\alpha$, $\xi^*c_i \leq
- \partial_i L(\lambda^*)$. In particular, using
- Lemma~\ref{lemma:derivative-bounds}, $\xi^*c_i\leq 1$. As a consequence,
- $\T{\alpha}\mu^*\leq\alpha n$. This, in addition to \eqref{eq:local-1}
- gives the left-hand side of the lemma's inequality.
+ Let us first assume that, $|M|\geq 1$, then we have that $\T{c}\lambda^*
+ = 1$, otherwise we could increase the coordinates of $\lambda^*$ in $M$,
+ which would increase the value of the objective function and contradict the
+ optimality of $\lambda^*$. Note also, that $|M|\leq n-1$, otherwise, since
+ $\alpha< \frac{1}{n}$, we would have $\T{c}\lambda^*\ < 1$, which again
+ contradicts the optimality of $\lambda^*$. Let us write:
+ \begin{displaymath}
+ 1 = \T{c}\lambda^* = \alpha\sum_{i\in M}c_i + \sum_{i\in \bar{M}}\lambda_i^*c_i
+ \leq \alpha (n-1) + (n-1)\max_{i\in \bar{M}} c_i
+ \end{displaymath}
+ That is:
+ \begin{equation}\label{local-2}
+ \max_{i\in\bar{M}} c_i \geq \frac{1 - (n-1)\alpha}{n-1}> \frac{1}{n(n-1)}
+ \end{equation}
+ where the last inequality uses again that $\alpha<\frac{1}{n}$. From the
+ KKT conditions, we see that for $i\in M$, $\nu_i^* = 0$ and:
+ \begin{equation}\label{local-3}
+ \mu_i^* = \xi^*c_i - \partial_i L(\lambda^*)\leq \xi^*c_i\leq \xi^*
+ \end{equation}
+ since $\partial_i L(\lambda^*)\geq 0$ and $c_i\leq 1$.
+
+ Furthermore, using the KKT conditions again, we have that:
+ \begin{equation}\label{local-4}
+ \xi^* = \inf_{i\in \bar{M}}\frac{\partial_i L(\lambda^*)}{c_i}\leq \inf_{i\in \bar{M}} \frac{1}{c_i}
+ = \frac{1}{\max_{i\in\bar{M}} c_i}
+ \end{equation}
+ where the inequality uses Lemma~\ref{lemma:derivative-bounds}.
+
+ Combining \eqref{local-2}, \eqref{local-3} and \eqref{local-4}, we get that:
+ \begin{displaymath}
+ \sum_{i\in M}\mu_i^* \leq |M|\xi^* \leq n\xi^*\leq \frac{n}{\max_{i\in\bar{M}} c_i} \leq n^2(n-1)
+ \end{displaymath}
+
+ Finally let us write:
+ \begin{displaymath}
+ \T{\mathbf{1}}\mu^* = \sum_{i=1}^n \mu^*_i = \sum_{i\in M}\mu_i^*
+ \end{displaymath}
+
+ Either $|M|=0$, in which case $\T{\mathbf{1}}\mu^* = 0$, either $|M|\geq
+ 1$, in which case $\T{\mathbf{1}}\mu^*\leq n^2(n-1)$ from the inequality above.
+ In both cases, $\T{\mathbf{1}}\mu^* \leq n^2(n-1)$, which, in addition to
+ \eqref{eq:local-1} proves the lemma.
\end{proof}
\begin{lemma}\label{lemma:monotonicity}
@@ -189,15 +229,16 @@ In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n$.
\begin{proof}
Let $\varepsilon$ in $(0, 1]$. The routine works as follows: set $\alpha\defeq
-\varepsilon(\delta + n)^{-1}$ and return an approximation $\tilde{L}^*_c$ of
+\varepsilon(\delta + n^2(n-1))^{-1}$ and return an approximation $\tilde{L}^*_c$ of
$L^*_c(\alpha)$ with an accuracy $\frac{1}{2^{n+1}}\alpha\delta b$ computed by
-a standard convex optimization algorithm
+a standard convex optimization algorithm. Note that this choice of $\alpha$
+implies $\alpha<\frac{1}{n}$ as requested.
\begin{enumerate}
\item using Lemma~\ref{lemma:proximity}:
\begin{displaymath}
|\tilde{L}^*_c - L^*_c| \leq |\tilde{L}^*_c - L^*_c(\alpha)| + |L^*_c(\alpha) - L^*_c|
- \leq \alpha\delta + \alpha n = \varepsilon
+ \leq \alpha\delta + \alpha n^2(n-1) = \varepsilon
\end{displaymath}
\item let $c' = (c_i', c_{-i})$ with $c_i'\leq c_i-\delta$, then:
@@ -206,145 +247,19 @@ a standard convex optimization algorithm
\geq L^*_c + \frac{\alpha\delta b}{2^{n+1}}
\geq \tilde{L}^*_c
\end{displaymath}
-where the first and inequality come from the accuracy of the approximation, and the inner inequality follows from Lemma~\ref{lemma:monotonicity}.
+where the first and inequality come from the accuracy of the approximation, and
+the inner inequality follows from Lemma~\ref{lemma:monotonicity}.
\item the accuracy of the approximation $\tilde{L}^*_c$ is:
\begin{displaymath}
- A\defeq\frac{\varepsilon\delta b}{2^{n+1}(\delta + n)}
+ A\defeq\frac{\varepsilon\delta b}{2^{n+1}(\delta + n^2(n-1))}
\end{displaymath}
\sloppy
hence, the standard convex optimization algorithm runs in time $O(poly(n, d,\log\log A^{-1}))$. Note that:
\begin{displaymath}
\log\log A^{-1} = O\bigg(\log n\; \log\log\frac{1}{\epsilon\delta b}\bigg)
\end{displaymath}
-which yields the wanted running time for the routing.\qedhere
+which yields the wanted running time for the routine.\qedhere
\end{enumerate}
\end{proof}
-
-\section{Previous Junk}
-\paragraph{Primal reformulation}
-We define $S(\lambda) \defeq I_d + \sum_{i=1}^n \lambda_i x_i x_i^T$. We can
-reformulate the primal:
-\begin{align}
- \mathrm{maximize}\quad & \log\det W\\
- \textrm{subject to}\quad & W = S(\lambda)\;;\;c^T\lambda \leq 1 \;;\; 0\leq\lambda\leq \mathbf{1}
-\end{align}
-
-\paragraph{Dual}
-\begin{align}
- \mathrm{minimize}\quad & \log\det\big(N^{-1}\big) + \mathrm{Tr}\, N - n + \sum_{i=1}^n x_i^T N x_i \\
- & - \Bigg(\min_{1\leq i\leq n} \frac{x_i^TNx_i}{c_i}\Bigg)\Big(\sum_{i=1}^n c_i - 1\Big) \\
- \textrm{subject to}\quad & N\in \mathcal{S}_n^{++}(\mathbf{R})
-\end{align}
-
-Let us define:
-\begin{equation}
-\begin{split}
- f(N, c) \defeq \log\det\big(N^{-1}\big) + \mathrm{Tr}\, N - n + \sum_{i=1}^n x_i^T N x_i \\- \Bigg(\min_{1\leq i\leq n} \frac{x_i^TNx_i}{c_i}\Bigg)\Big(\sum_{i=1}^n c_i - 1\Big)
-\end{split}
-\end{equation}
-
-So that the dual reads:
-\begin{align}
- \mathrm{minimize}\quad & f(N, c)\\
- \textrm{subject to}\quad & N\in \mathcal{S}_n^{++}(\mathbf{R})
-\end{align}
-
-
-\paragraph{Duality} By duality $L^*(c)$ is equal to the optimal value of the dual optimization
-problem:
-
-\begin{displaymath}
- L^*(c) = \inf_{N\in\mathcal{S}_n(\mathbf{R})} f(N, c) = f(N^*, c) = \log\det S(\lambda^*)
-\end{displaymath}
-
-\paragraph{KKT conditions}
-Let $\xi$ denote the dual variable associated with the constraint $c^T\lambda
-\leq B$. If $\xi^*$ is dual optimal and $\lambda^*$ primal optimal:
-\begin{equation}
- \mathrm{if}\; 0 < \lambda^*_i < 1,\; \frac{x_i^T
- A(\lambda^*)^{-1}x_i}{c_i} = \xi^*
-\end{equation}
-\begin{equation}
- \mathrm{if}\; \lambda^*_i = 1,\; \frac{x_i^T A(\lambda^*)^{-1}x_i}{c_i} > \xi^*
-\end{equation}
-\begin{equation}
- \mathrm{if}\; \lambda^*_i = 0,\; \frac{x_i^T A(\lambda^*)^{-1}x_i}{c_i} < \xi^*
-\end{equation}
-
-Furthermore, if $W^*\in\mathcal{M}_n(\mathbf{R})$ is primal optimal and
-$N^*\in\mathcal{S}_n(\mathbf{R})$ dual optimal:
-
-\begin{equation}
- W^* = S(\lambda^*)\;;\; N^* = (W^*)^{-1}\;;\; 0\leq\lambda^*\leq \mathbf{1}
-\end{equation}
-
-In particular $N^* = S(\lambda^*)^{-1}$ and:
-\begin{equation}
- S(\mathbf{1})^{-1} \leq N^* \leq S(0)^{-1} = I_d
-\end{equation}
-
-
-\paragraph{Sensitivity analysis, first flavor}
-
-Let $i$ such that $0 < \lambda_i < 1$ and $c' = (c_i + \delta, c_{-i})$, we can
-prove:
-\begin{displaymath}
-L^*(c') \leq L^*(c) - \xi^*\lambda_i^*\delta
-\end{displaymath}
-where $\lambda^*$ and $\xi^*$ are respectively primal and dual optimal for
-$P_c$. Using the KKT conditions and \eqref{lemma:derivative-bounds}, we can bound
-$\xi^*$ and get:
-\begin{equation}\label{eq:sensitivity}
- L^*(c) \geq L^*(c') + \delta\lambda_i^* \frac{x_i^T A(\mathbf{1})^{-1}
- x_i}{c_i}
-\end{equation}
-
-
-However, note that $\lambda_i^*$ can be arbitrarily small, so even a change of
-$\delta$ in the cost can lead to no variation of the optimal value.
-Intuitively, it does not harm if an element $i$ that has only a small fraction
-of itself in the solution sees its cost reduced.
-
-The problem seems to come from the fact that we can have arbitrarily small
-fractions of elements in the solution : this is clearly an artifact of looking
-at a continuous relaxation of the problem, because in the real life, an element
-is either in or out of the solution set.
-
-It is probably possible to prove that if we enforce the $\lambda_i$ to be
-greater than some minimal fraction $\zeta$, then we do not lose to much in
-terms of the optimal value (especially after rounding). Having $\lambda_i\geq
-\zeta$ will allow us to prove that if $i$ changes its cost, then the optimal
-value changes by some strictly positive constant depending only on $\zeta$ (and
-$\delta$ of course).
-
-\paragraph{Sensitivity analysis, second flavor}
-Let us now assume that some user $i$ reduces its cost by $\delta$ and define $c' = (c_i + \delta, c_{-i})$. Then we have:
-\begin{align}
- f(N, c')
- & = f(N, c) + \Bigg(\min_{1\leq i\leq n} \frac{x_i^TNx_i}{c_i}\Bigg)\Bigg(\sum_{i=1}^n c_i - 1\Bigg) - \Bigg(\min_{1\leq i\leq n} \frac{x_i^TNx_i}{c_i'}\Bigg)\Bigg(\sum_{i=1}^n c_i' - 1\Bigg)\\
- & = f(N, c) + \delta\min_{1\leq i\leq n} \frac{x_i^TNx_i}{c_i} + \Bigg(\sum_{i=1}^n c_i - 1\Bigg) \Bigg(\Bigg(\min_{1\leq i\leq n} \frac{x_i^TNx_i}{c_i}\Bigg) - \Bigg(\min_{1\leq i\leq n} \frac{x_i^TNx_i}{c_i'}\Bigg)\Bigg)
-\end{align}
-
-\textbf{Problem:} we have no control over the third term of the last sum (the
-min difference). Otherwise, when we can ignore this term (for example when the
-user who changes his cost is not the one at which the min is reached), we get:
-
-\begin{equation}
-L^*(c') \geq L^*(c) + \delta\min_{1\leq i\leq n} \frac{x_i^TNx_i}{c_i}
-\end{equation}
-
-\paragraph{Junk:}
-\begin{align*}
- \mathrm{minimize}\quad & \log\det\big(N^{-1}\big) + \mathrm{Tr}\, N - n + \sum_{i=1}^n \nu_i + \xi \\
- \textrm{subject to}\quad & \nu \geq 0\; ;\; \xi \geq 0 \; ; \; N\in \mathcal{S}_n^{++}(\mathbf{R})\; ;\\
- & \forall\; 1\leq i \leq n, \; \nu_i + \xi c_i = x_i^T N x_i
-\end{align*}
-
-\begin{align*}
- \mathrm{minimize}\quad & \log\det\big(N^{-1}\big) + \mathrm{Tr}\, N - n + \sum_{i=1}^n x_i^T N x_i - \xi\Big(\sum_{i=1}^n c_i - 1\Big) \\
- \textrm{subject to}\quad & \xi \geq 0 \; ; \; N\in \mathcal{S}_n^{++}(\mathbf{R})\; ;\\
- & \forall\;1\leq i \leq n, \; \frac{x_i^T N x_i}{c_i} \geq \xi
-\end{align*}
-
\end{document}