Fixing a small flaw in the proof of Lemma 2

author: Thibaut Horel <thibaut.horel@gmail.com> 2013-06-05 18:32:36 +0200
committer: Thibaut Horel <thibaut.horel@gmail.com> 2013-06-05 18:32:36 +0200
commit: aa5094388e98fc60fb5f4829c4294711ee8267aa (patch)
tree: 86a7a303d0807c9056c95ed4a80aeb01f79b2b2b
parent: 0171936dd8da5686126eb0bc20e773f855a3c522 (diff)
download: recommendation-aa5094388e98fc60fb5f4829c4294711ee8267aa.tar.gz
1 files changed, 64 insertions, 149 deletions
diff --git a/dual2.tex b/dual2.tex
index ca07954..a7f9e61 100644
--- a/dual2.tex
+++ b/dual2.tex
@@ -30,13 +30,13 @@ Let $\alpha\in\mathbf{R}^+$, consider the perturbed optimization problem:
 \end{equation}
 and denote by $L^*_c(\alpha)$ its optimal value. Note that we have $L^*_c = L^*_c(0)$.
 
-We will assume that $\alpha\leq\frac{1}{n}$ so that \eqref{eq:perturbed-primal} has at
+We will assume that $\alpha<\frac{1}{n}$ so that \eqref{eq:perturbed-primal} has at
 least one feasible point: $(\frac{1}{n},\ldots,\frac{1}{n})$.
 
 Let us introduce the lagrangian of problem, \eqref{eq:perturbed-primal}:
 \begin{displaymath}
     \mathcal{L}_{c, \alpha}(\lambda, \mu, \nu, \xi) \defeq L(\lambda) 
-    + \T{\mu}(\lambda-\mathbf{\alpha}) + \T{\nu}(\mathbf{1}-\lambda) + \xi(1-\T{c}\lambda)
+    + \T{\mu}(\lambda-\alpha\mathbf{1}) + \T{\nu}(\mathbf{1}-\lambda) + \xi(1-\T{c}\lambda)
 \end{displaymath}
 so that:
 \begin{displaymath}
@@ -111,9 +111,9 @@ dual feasibility, the KKT conditions give $\forall i\in\{1, \ldots, n\}$:
 \begin{lemma}\label{lemma:proximity}
 We have:
 \begin{displaymath}
-    L^*_c - \alpha n\leq L^*_c(\alpha) \leq L^*_c
+    L^*_c - \alpha n^2(n-1)\leq L^*_c(\alpha) \leq L^*_c
 \end{displaymath}
-In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n$.
+In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n^2(n-1)$.
 \end{lemma}
 
 \begin{proof}
@@ -127,27 +127,67 @@ In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n$.
     \end{displaymath}
 
     Note that $\mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*)
-    = \mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*) - \T{\alpha}\mu^*$ and that
-    for any $\lambda$ feasible for problem \eqref{eq:primal},
-    $\mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*) \geq L(\lambda)$. Hence,
+    = \mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*)
+    - \alpha\T{\mathbf{1}}\mu^*$, and that for any $\lambda$ feasible for
+    problem \eqref{eq:primal}, $\mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*)
+    \geq L(\lambda)$. Hence,
     \begin{displaymath}
-        L^*_{c}(\alpha) \geq \mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*) - \T{\alpha}\mu^*
+        L^*_{c}(\alpha) \geq L(\lambda) - \alpha\T{\mathbf{1}}\mu^*
     \end{displaymath}
     for any $\lambda$ feasible for \eqref{eq:primal}. In particular, for $\lambda^*$ primal optimal for \eqref{eq:primal}:
     \begin{equation}\label{eq:local-1}
-        L^*_{c}(\alpha) \geq L^*_c - \T{\alpha}\mu^*
+        L^*_{c}(\alpha) \geq L^*_c - \alpha\T{\mathbf{1}}\mu^*
     \end{equation}
 
-    Note that from the KKT conditions, we cannot have both $\mu_i^*$ and $\nu_i^*$ different from zero, hence:
+    Let us denote by the $M$ the support of $\mu^*$, that is $M\defeq
+    \{i|\mu_i^* > 0\}$, and by $\lambda^*$ a primal optimal point for
+    $\eqref{eq:perturbed-primal}$.  From the KKT conditions we see that:
     \begin{displaymath}
-        \sum_{i=1}^n \mu_i^* = \sum_{i=1}^n \xi^*c_i - \partial_i L(\lambda^*) \leq \sum_{i=1}^n\xi^*c_i
+        M = \{i|\lambda_i^* = \alpha\}
     \end{displaymath}
 
-    Using the KKT conditions again, if $\lambda_i^*>\alpha$, $\xi^*c_i \leq
-    \partial_i L(\lambda^*)$. In particular, using
-    Lemma~\ref{lemma:derivative-bounds}, $\xi^*c_i\leq 1$. As a consequence,
-    $\T{\alpha}\mu^*\leq\alpha n$. This, in addition to \eqref{eq:local-1}
-    gives the left-hand side of the lemma's inequality.
+    Let us first assume that, $|M|\geq 1$, then we have that $\T{c}\lambda^*
+    = 1$, otherwise we could increase the coordinates of $\lambda^*$ in $M$,
+    which would increase the value of the objective function and contradict the
+    optimality of $\lambda^*$. Note also, that $|M|\leq n-1$, otherwise, since
+    $\alpha< \frac{1}{n}$, we would have $\T{c}\lambda^*\ < 1$, which again
+    contradicts the optimality of $\lambda^*$. Let us write:
+    \begin{displaymath}
+        1 = \T{c}\lambda^* = \alpha\sum_{i\in M}c_i + \sum_{i\in \bar{M}}\lambda_i^*c_i
+        \leq \alpha (n-1) + (n-1)\max_{i\in \bar{M}} c_i
+    \end{displaymath}
+    That is:
+    \begin{equation}\label{local-2}
+        \max_{i\in\bar{M}} c_i \geq \frac{1 - (n-1)\alpha}{n-1}> \frac{1}{n(n-1)}
+    \end{equation}
+    where the last inequality uses again that $\alpha<\frac{1}{n}$. From the
+    KKT conditions, we see that for $i\in M$, $\nu_i^* = 0$ and:
+    \begin{equation}\label{local-3}
+        \mu_i^* = \xi^*c_i - \partial_i L(\lambda^*)\leq \xi^*c_i\leq \xi^*
+    \end{equation}
+    since $\partial_i L(\lambda^*)\geq 0$ and $c_i\leq 1$.
+
+    Furthermore, using the KKT conditions again, we have that:
+    \begin{equation}\label{local-4}
+        \xi^* = \inf_{i\in \bar{M}}\frac{\partial_i L(\lambda^*)}{c_i}\leq \inf_{i\in \bar{M}} \frac{1}{c_i}
+        = \frac{1}{\max_{i\in\bar{M}} c_i}
+    \end{equation}
+    where the inequality uses Lemma~\ref{lemma:derivative-bounds}.
+
+    Combining \eqref{local-2}, \eqref{local-3} and \eqref{local-4}, we get that:
+    \begin{displaymath}
+        \sum_{i\in M}\mu_i^* \leq |M|\xi^* \leq n\xi^*\leq \frac{n}{\max_{i\in\bar{M}} c_i} \leq n^2(n-1)
+    \end{displaymath}
+    
+    Finally let us write:
+    \begin{displaymath}
+        \T{\mathbf{1}}\mu^* = \sum_{i=1}^n \mu^*_i = \sum_{i\in M}\mu_i^*
+    \end{displaymath}
+
+    Either $|M|=0$, in which case $\T{\mathbf{1}}\mu^* = 0$, either $|M|\geq
+    1$, in which case $\T{\mathbf{1}}\mu^*\leq n^2(n-1)$ from the inequality above.
+    In both cases, $\T{\mathbf{1}}\mu^* \leq n^2(n-1)$, which, in addition to
+    \eqref{eq:local-1} proves the lemma.
 \end{proof} 
 
 \begin{lemma}\label{lemma:monotonicity}
@@ -189,15 +229,16 @@ In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n$.
 
 \begin{proof}
 Let $\varepsilon$ in $(0, 1]$. The routine works as follows: set $\alpha\defeq
-\varepsilon(\delta + n)^{-1}$ and return an approximation $\tilde{L}^*_c$ of
+\varepsilon(\delta + n^2(n-1))^{-1}$ and return an approximation $\tilde{L}^*_c$ of
 $L^*_c(\alpha)$ with an accuracy $\frac{1}{2^{n+1}}\alpha\delta b$ computed by
-a standard convex optimization algorithm
+a standard convex optimization algorithm. Note that this choice of $\alpha$
+implies $\alpha<\frac{1}{n}$ as requested.
 
 \begin{enumerate}
     \item using Lemma~\ref{lemma:proximity}:
 \begin{displaymath}
         |\tilde{L}^*_c - L^*_c| \leq |\tilde{L}^*_c - L^*_c(\alpha)| + |L^*_c(\alpha) - L^*_c|
-                                \leq \alpha\delta + \alpha n = \varepsilon
+        \leq \alpha\delta + \alpha n^2(n-1) = \varepsilon
 \end{displaymath}
 
 \item let $c' = (c_i', c_{-i})$ with $c_i'\leq c_i-\delta$, then:
@@ -206,145 +247,19 @@ a standard convex optimization algorithm
                      \geq L^*_c + \frac{\alpha\delta b}{2^{n+1}}
     \geq \tilde{L}^*_c
 \end{displaymath}
-where the first and inequality come from the accuracy of the approximation, and the inner inequality follows from Lemma~\ref{lemma:monotonicity}.
+where the first and inequality come from the accuracy of the approximation, and
+the inner inequality follows from Lemma~\ref{lemma:monotonicity}.
 
 \item the accuracy of the approximation $\tilde{L}^*_c$ is:
 \begin{displaymath}
-    A\defeq\frac{\varepsilon\delta b}{2^{n+1}(\delta + n)}
+    A\defeq\frac{\varepsilon\delta b}{2^{n+1}(\delta + n^2(n-1))}
 \end{displaymath}
 \sloppy
 hence, the standard convex optimization algorithm runs in time $O(poly(n, d,\log\log A^{-1}))$. Note that:
 \begin{displaymath}
     \log\log A^{-1} = O\bigg(\log n\; \log\log\frac{1}{\epsilon\delta b}\bigg)
 \end{displaymath}
-which yields the wanted running time for the routing.\qedhere
+which yields the wanted running time for the routine.\qedhere
 \end{enumerate}
 \end{proof}
-
-\section{Previous Junk}
-\paragraph{Primal reformulation}
-We define $S(\lambda) \defeq I_d + \sum_{i=1}^n \lambda_i x_i x_i^T$. We can
-reformulate the primal:
-\begin{align}
-    \mathrm{maximize}\quad & \log\det W\\
-    \textrm{subject to}\quad & W = S(\lambda)\;;\;c^T\lambda \leq 1 \;;\; 0\leq\lambda\leq \mathbf{1}
-\end{align}
-
-\paragraph{Dual}
-\begin{align}
-    \mathrm{minimize}\quad & \log\det\big(N^{-1}\big) + \mathrm{Tr}\, N - n + \sum_{i=1}^n x_i^T N x_i  \\
-    & - \Bigg(\min_{1\leq i\leq n} \frac{x_i^TNx_i}{c_i}\Bigg)\Big(\sum_{i=1}^n c_i - 1\Big) \\
-    \textrm{subject to}\quad & N\in \mathcal{S}_n^{++}(\mathbf{R})
-\end{align}
-
-Let us define:
-\begin{equation}
-\begin{split}
-    f(N, c) \defeq \log\det\big(N^{-1}\big) + \mathrm{Tr}\, N - n + \sum_{i=1}^n x_i^T N x_i  \\- \Bigg(\min_{1\leq i\leq n} \frac{x_i^TNx_i}{c_i}\Bigg)\Big(\sum_{i=1}^n c_i - 1\Big)
-\end{split}
-\end{equation}
-
-So that the dual reads:
-\begin{align}
-    \mathrm{minimize}\quad & f(N, c)\\
-    \textrm{subject to}\quad & N\in \mathcal{S}_n^{++}(\mathbf{R})
-\end{align}
-
-
-\paragraph{Duality} By duality $L^*(c)$ is equal to the optimal value of the dual optimization
-problem:
-
-\begin{displaymath}
-    L^*(c) = \inf_{N\in\mathcal{S}_n(\mathbf{R})} f(N, c) = f(N^*, c) = \log\det S(\lambda^*)
-\end{displaymath}
-
-\paragraph{KKT conditions}
-Let $\xi$ denote the dual variable associated with the constraint $c^T\lambda
-\leq B$. If $\xi^*$ is dual optimal and $\lambda^*$ primal optimal:
-\begin{equation}
-    \mathrm{if}\; 0 < \lambda^*_i < 1,\; \frac{x_i^T
-    A(\lambda^*)^{-1}x_i}{c_i} = \xi^*
-\end{equation}
-\begin{equation}
-    \mathrm{if}\; \lambda^*_i = 1,\; \frac{x_i^T A(\lambda^*)^{-1}x_i}{c_i} > \xi^*
-\end{equation}
-\begin{equation}
-    \mathrm{if}\; \lambda^*_i = 0,\; \frac{x_i^T A(\lambda^*)^{-1}x_i}{c_i} < \xi^*
-\end{equation}
-
-Furthermore, if $W^*\in\mathcal{M}_n(\mathbf{R})$ is primal optimal and
-$N^*\in\mathcal{S}_n(\mathbf{R})$ dual optimal:
-
-\begin{equation}
-    W^* = S(\lambda^*)\;;\; N^* = (W^*)^{-1}\;;\; 0\leq\lambda^*\leq \mathbf{1}
-\end{equation}
-
-In particular $N^* = S(\lambda^*)^{-1}$ and:
-\begin{equation}
-    S(\mathbf{1})^{-1} \leq N^* \leq S(0)^{-1} = I_d
-\end{equation}
-
-
-\paragraph{Sensitivity analysis, first flavor}
-
-Let $i$ such that $0 < \lambda_i < 1$ and $c' = (c_i + \delta, c_{-i})$, we can
-prove:
-\begin{displaymath}
-L^*(c') \leq L^*(c) - \xi^*\lambda_i^*\delta
-\end{displaymath}
-where $\lambda^*$ and $\xi^*$ are respectively primal and dual optimal for
-$P_c$. Using the KKT conditions and \eqref{lemma:derivative-bounds}, we can bound
-$\xi^*$ and get:
-\begin{equation}\label{eq:sensitivity}
-    L^*(c) \geq L^*(c') + \delta\lambda_i^* \frac{x_i^T A(\mathbf{1})^{-1}
-    x_i}{c_i}
-\end{equation}
-
-
-However, note that $\lambda_i^*$ can be arbitrarily small, so even a change of
-$\delta$ in the cost can lead to no variation of the optimal value.
-Intuitively, it does not harm if an element $i$ that has only a small fraction
-of itself in the solution sees its cost reduced.
-
-The problem seems to come from the fact that we can have arbitrarily small
-fractions of elements in the solution : this is clearly an artifact of looking
-at a continuous relaxation of the problem, because in the real life, an element
-is either in or out of the solution set. 
-
-It is probably possible to prove that if we enforce the $\lambda_i$ to be
-greater than some minimal fraction $\zeta$, then we do not lose to much in
-terms of the optimal value (especially after rounding). Having $\lambda_i\geq
-\zeta$ will allow us to prove that if $i$ changes its cost, then the optimal
-value changes by some strictly positive constant depending only on $\zeta$ (and
-$\delta$ of course).
-
-\paragraph{Sensitivity analysis, second flavor}
-Let us now assume that some user $i$ reduces its cost by $\delta$ and define $c' = (c_i + \delta, c_{-i})$. Then we have:
-\begin{align}
-    f(N, c')
-    & = f(N, c) + \Bigg(\min_{1\leq i\leq n} \frac{x_i^TNx_i}{c_i}\Bigg)\Bigg(\sum_{i=1}^n c_i - 1\Bigg) - \Bigg(\min_{1\leq i\leq n} \frac{x_i^TNx_i}{c_i'}\Bigg)\Bigg(\sum_{i=1}^n c_i' - 1\Bigg)\\
-    & = f(N, c) + \delta\min_{1\leq i\leq n} \frac{x_i^TNx_i}{c_i} + \Bigg(\sum_{i=1}^n c_i - 1\Bigg) \Bigg(\Bigg(\min_{1\leq i\leq n} \frac{x_i^TNx_i}{c_i}\Bigg) - \Bigg(\min_{1\leq i\leq n} \frac{x_i^TNx_i}{c_i'}\Bigg)\Bigg)
-\end{align}
-
-\textbf{Problem:} we have no control over the third term of the last sum (the
-min difference). Otherwise, when we can ignore this term (for example when the
-user who changes his cost is not the one at which the min is reached), we get:
-
-\begin{equation}
-L^*(c') \geq L^*(c) + \delta\min_{1\leq i\leq n} \frac{x_i^TNx_i}{c_i}
-\end{equation}
-
-\paragraph{Junk:}
-\begin{align*}
-    \mathrm{minimize}\quad & \log\det\big(N^{-1}\big) + \mathrm{Tr}\, N - n + \sum_{i=1}^n \nu_i + \xi \\
-    \textrm{subject to}\quad & \nu \geq 0\; ;\; \xi \geq 0 \; ; \; N\in \mathcal{S}_n^{++}(\mathbf{R})\; ;\\
-    & \forall\; 1\leq i \leq n, \; \nu_i + \xi c_i = x_i^T N x_i
-\end{align*}
-
-\begin{align*}
-    \mathrm{minimize}\quad & \log\det\big(N^{-1}\big) + \mathrm{Tr}\, N - n + \sum_{i=1}^n x_i^T N x_i  - \xi\Big(\sum_{i=1}^n c_i - 1\Big) \\
-    \textrm{subject to}\quad & \xi \geq 0 \; ; \; N\in \mathcal{S}_n^{++}(\mathbf{R})\; ;\\
-    & \forall\;1\leq i \leq n, \; \frac{x_i^T N x_i}{c_i} \geq \xi
-\end{align*}
-
 \end{document}
author	Thibaut Horel <thibaut.horel@gmail.com>	2013-06-05 18:32:36 +0200
committer	Thibaut Horel <thibaut.horel@gmail.com>	2013-06-05 18:32:36 +0200
commit	aa5094388e98fc60fb5f4829c4294711ee8267aa (patch)
tree	86a7a303d0807c9056c95ed4a80aeb01f79b2b2b
parent	0171936dd8da5686126eb0bc20e773f855a3c522 (diff)
download	recommendation-aa5094388e98fc60fb5f4829c4294711ee8267aa.tar.gz