summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2013-06-03 20:02:51 +0200
committerThibaut Horel <thibaut.horel@gmail.com>2013-06-03 20:02:51 +0200
commit0171936dd8da5686126eb0bc20e773f855a3c522 (patch)
tree9a9fc7b7ffa2cd1d82ecd8e4b0adc72018ae9183
parent5fcdedb9aa29c0a776c1804d81261ae40ec48794 (diff)
downloadrecommendation-0171936dd8da5686126eb0bc20e773f855a3c522.tar.gz
Fix the truthfulness issue
-rw-r--r--dual2.tex350
1 files changed, 350 insertions, 0 deletions
diff --git a/dual2.tex b/dual2.tex
new file mode 100644
index 0000000..ca07954
--- /dev/null
+++ b/dual2.tex
@@ -0,0 +1,350 @@
+\documentclass{article}
+\usepackage[T1]{fontenc}
+\usepackage[utf8]{inputenc}
+\usepackage{amsmath, amsfonts, amsthm}
+\newtheorem{lemma}{Lemma}
+\newtheorem{proposition}{Proposition}
+\input{definitions}
+
+\begin{document}
+Let $c$ be a cost vector in $[0,1]^n$, and $x_1,\ldots,x_n$, $n$ vectors in
+$\mathbf{R}^d$ such that for all $i\in\{1,\ldots,n\}$, $b\leq \T{x_i}{x_i}\leq
+1$ for some $b\in(0,1]$. Let us consider the following convex optimization
+problem:
+\begin{equation}\tag{$P_c$}\label{eq:primal}
+ \begin{split}
+ \mathrm{maximize}\quad & L(\lambda) \defeq\log\det\left(I_d + \sum_{i=1}^n
+ \lambda_i x_i x_i^T\right)\\
+ \textrm{subject to}\quad & c^T\lambda \leq 1 \;;\; 0\leq\lambda\leq \mathbf{1}
+\end{split}
+\end{equation}
+We denote by $L^*_c$ its optimal value.
+
+Let $\alpha\in\mathbf{R}^+$, consider the perturbed optimization problem:
+\begin{equation}\tag{$P_{c, \alpha}$}\label{eq:perturbed-primal}
+ \begin{split}
+ \mathrm{maximize}\quad & L(\lambda) \defeq\log\det\left(I_d + \sum_{i=1}^n
+ \lambda_i x_i x_i^T\right)\\
+ \textrm{subject to}\quad & c^T\lambda \leq 1 \;;\; \alpha\leq\lambda\leq \mathbf{1}
+\end{split}
+\end{equation}
+and denote by $L^*_c(\alpha)$ its optimal value. Note that we have $L^*_c = L^*_c(0)$.
+
+We will assume that $\alpha\leq\frac{1}{n}$ so that \eqref{eq:perturbed-primal} has at
+least one feasible point: $(\frac{1}{n},\ldots,\frac{1}{n})$.
+
+Let us introduce the lagrangian of problem, \eqref{eq:perturbed-primal}:
+\begin{displaymath}
+ \mathcal{L}_{c, \alpha}(\lambda, \mu, \nu, \xi) \defeq L(\lambda)
+ + \T{\mu}(\lambda-\mathbf{\alpha}) + \T{\nu}(\mathbf{1}-\lambda) + \xi(1-\T{c}\lambda)
+\end{displaymath}
+so that:
+\begin{displaymath}
+ L^*_c(\alpha) = \min_{\mu, \nu, \xi\geq 0}\max_\lambda \mathcal{L}_{c, \alpha}(\lambda, \mu, \nu, \xi)
+\end{displaymath}
+Similarly, we define $\mathcal{L}_{c}\defeq\mathcal{L}_{c, 0}$ the lagrangian of \eqref{eq:primal}.
+
+Let $\lambda^*$ be primal optimal for \eqref{eq:perturbed-primal}, and $(\mu^*,
+\nu^*, \xi^*)$ be dual optimal for the same problem. In addition to primal and
+dual feasibility, the KKT conditions give $\forall i\in\{1, \ldots, n\}$:
+\begin{gather*}
+ \partial_i L(\lambda^*) + \mu_i^* - \nu_i^* - \xi^* c_i = 0\\
+ \mu_i^*(\lambda_i^* - \alpha) = 0\\
+ \nu_i^*(1 - \lambda_i^*) = 0
+\end{gather*}
+
+\begin{lemma}\label{lemma:derivative-bounds}
+ Let $\partial_i L(\lambda)$ denote the $i$-th derivative of $L$, for $i\in\{1,\ldots, n\}$, then:
+ \begin{displaymath}
+ \forall\lambda\in[0, 1]^n,\;\frac{b}{2^n} \leq \partial_i L(\lambda) \leq 1
+ \end{displaymath}
+\end{lemma}
+
+\begin{proof}
+ Let us define:
+ \begin{displaymath}
+ S(\lambda)\defeq I_d + \sum_{i=1}^n \lambda_i x_i\T{x_i}
+ \quad\mathrm{and}\quad
+ S_k \defeq I_d + \sum_{i=1}^n x_i\T{x_i}
+ \end{displaymath}
+
+ We have $\partial_i L(\lambda) = \T{x_i}S(\lambda)^{-1}x_i$. Since
+ $S(\lambda)\geq I_d$, $\partial_i L(\lambda)\leq \T{x_i}x_i \leq 1$, which
+ is the right-hand side of the lemma.
+
+ For the left-hand side, note that $S(\lambda) \leq S_n$. Hence
+ $\partial_iL(\lambda)\geq \T{x_i}S_n^{-1}x_i$.
+
+ Using the Sherman-Morrison formula, for all $k\geq 1$:
+ \begin{displaymath}
+ \T{x_i}S_k^{-1} x_i = \T{x_i}S_{k-1}^{-1}x_i
+ - \frac{(\T{x_i}S_{k-1}^{-1}x_k)^2}{1+\T{x_k}S_{k-1}^{-1}x_k}
+ \end{displaymath}
+
+ By the Cauchy-Schwarz inequality:
+ \begin{displaymath}
+ (\T{x_i}S_{k-1}^{-1}x_k)^2 \leq \T{x_i}S_{k-1}^{-1}x_i\;\T{x_k}S_{k-1}^{-1}x_k
+ \end{displaymath}
+
+ Hence:
+ \begin{displaymath}
+ \T{x_i}S_k^{-1} x_i \geq \T{x_i}S_{k-1}^{-1}x_i
+ - \T{x_i}S_{k-1}^{-1}x_i\frac{\T{x_k}S_{k-1}^{-1}x_k}{1+\T{x_k}S_{k-1}^{-1}x_k}
+ \end{displaymath}
+
+ But $\T{x_k}S_{k-1}^{-1}x_k\leq 1$ and $\frac{a}{1+a}\leq \frac{1}{2}$ if
+ $0\leq a\leq 1$, so:
+ \begin{displaymath}
+ \T{x_i}S_{k}^{-1}x_i \geq \T{x_i}S_{k-1}^{-1}x_i
+ - \frac{1}{2}\T{x_i}S_{k-1}^{-1}x_i\geq \frac{\T{x_i}S_{k-1}^{-1}x_i}{2}
+ \end{displaymath}
+
+ By induction:
+ \begin{displaymath}
+ \T{x_i}S_n^{-1} x_i \geq \frac{\T{x_i}x_i}{2^n}
+ \end{displaymath}
+
+ Using that $\T{x_i}{x_i}\geq b$ concludes the proof of the left-hand side
+ of the lemma's inequality.
+\end{proof}
+
+\begin{lemma}\label{lemma:proximity}
+We have:
+\begin{displaymath}
+ L^*_c - \alpha n\leq L^*_c(\alpha) \leq L^*_c
+\end{displaymath}
+In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n$.
+\end{lemma}
+
+\begin{proof}
+ $\alpha\mapsto L^*_c(\alpha)$ is a decreasing function as it is the
+ maximum value of the $L$ function over a set-decreasing domain, which gives
+ the rightmost inequality.
+
+ Let $\mu^*, \nu^*, \xi^*$ be dual optimal for $(P_{c, \alpha})$, that is:
+ \begin{displaymath}
+ L^*_{c}(\alpha) = \max_\lambda \mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*)
+ \end{displaymath}
+
+ Note that $\mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*)
+ = \mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*) - \T{\alpha}\mu^*$ and that
+ for any $\lambda$ feasible for problem \eqref{eq:primal},
+ $\mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*) \geq L(\lambda)$. Hence,
+ \begin{displaymath}
+ L^*_{c}(\alpha) \geq \mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*) - \T{\alpha}\mu^*
+ \end{displaymath}
+ for any $\lambda$ feasible for \eqref{eq:primal}. In particular, for $\lambda^*$ primal optimal for \eqref{eq:primal}:
+ \begin{equation}\label{eq:local-1}
+ L^*_{c}(\alpha) \geq L^*_c - \T{\alpha}\mu^*
+ \end{equation}
+
+ Note that from the KKT conditions, we cannot have both $\mu_i^*$ and $\nu_i^*$ different from zero, hence:
+ \begin{displaymath}
+ \sum_{i=1}^n \mu_i^* = \sum_{i=1}^n \xi^*c_i - \partial_i L(\lambda^*) \leq \sum_{i=1}^n\xi^*c_i
+ \end{displaymath}
+
+ Using the KKT conditions again, if $\lambda_i^*>\alpha$, $\xi^*c_i \leq
+ \partial_i L(\lambda^*)$. In particular, using
+ Lemma~\ref{lemma:derivative-bounds}, $\xi^*c_i\leq 1$. As a consequence,
+ $\T{\alpha}\mu^*\leq\alpha n$. This, in addition to \eqref{eq:local-1}
+ gives the left-hand side of the lemma's inequality.
+\end{proof}
+
+\begin{lemma}\label{lemma:monotonicity}
+ If $c'$ = $(c_i', c_{-i})$, with $c_i'\leq c_i - \delta$, we have:
+ \begin{displaymath}
+ L^*_{c'}(\alpha) \geq L^*_c(\alpha) + \frac{\alpha\delta b}{2^n}
+ \end{displaymath}
+\end{lemma}
+
+\begin{proof}
+ Let $\mu^*, \nu^*, \xi^*$ be dual optimal for $(P_{c', \alpha})$. Noting that, $\mathcal{L}_{c', \alpha}(\lambda, \mu^*, \nu^*, \xi^*) \geq
+ \mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*) + \lambda_i\xi^*\delta$, we get similarly to Lemma~\ref{lemma:proximity}:
+ \begin{displaymath}
+ L^*_{c'}(\alpha) \geq L(\lambda) + \lambda_i\xi^*\delta
+ \end{displaymath}
+ for any $\lambda$ feasible for \eqref{eq:perturbed-primal}. In particular, for $\lambda^*$ primal optimal for \eqref{eq:perturbed-primal}:
+ \begin{displaymath}
+ L^*_{c'}(\alpha) \geq L^*_{c}(\alpha) + \alpha\xi^*\delta
+ \end{displaymath}
+ since $\lambda_i^*\geq \alpha$.
+
+ Using the KKT conditions for $(P_{c', \alpha})$, we can write:
+ \begin{displaymath}
+ \xi^* = \inf_{i:\lambda^{'*}_i>\alpha} \frac{\T{x_i}S(\lambda^{'*})^{-1}x_i}{c_i'}
+ \end{displaymath}
+ with $\lambda^{'*}$ optimal for $(P_{c', \alpha})$. Since $c_i'\leq 1$, using Lemma~\ref{lemma:derivative-bounds}, we get that $\xi^*\geq \frac{b}{2^n}$, which concludes the proof.
+\end{proof}
+
+\begin{proposition}
+ Let $\delta\in(0,1]$. For any $\varepsilon\in(0,1]$, there exists
+ a routine which computes an approximate solution $\tilde{L}^*_c$ to
+ \eqref{eq:primal} such that:
+ \begin{enumerate}
+ \item $|\tilde{L}^*_c - L^*_c| \leq \varepsilon$
+ \item for all $c' = (c_i', c_{-i})$ with $c_i'\leq c_i-\delta$, $\tilde{L}^*_c \leq \tilde{L}^*_{c'}$
+ \item the routine's running time is $O\big(poly(n, d, \log\log\frac{1}{b\varepsilon\delta})\big)$
+ \end{enumerate}
+\end{proposition}
+
+\begin{proof}
+Let $\varepsilon$ in $(0, 1]$. The routine works as follows: set $\alpha\defeq
+\varepsilon(\delta + n)^{-1}$ and return an approximation $\tilde{L}^*_c$ of
+$L^*_c(\alpha)$ with an accuracy $\frac{1}{2^{n+1}}\alpha\delta b$ computed by
+a standard convex optimization algorithm
+
+\begin{enumerate}
+ \item using Lemma~\ref{lemma:proximity}:
+\begin{displaymath}
+ |\tilde{L}^*_c - L^*_c| \leq |\tilde{L}^*_c - L^*_c(\alpha)| + |L^*_c(\alpha) - L^*_c|
+ \leq \alpha\delta + \alpha n = \varepsilon
+\end{displaymath}
+
+\item let $c' = (c_i', c_{-i})$ with $c_i'\leq c_i-\delta$, then:
+\begin{displaymath}
+ \tilde{L}^*_{c'} \geq L^*_{c'} - \frac{\alpha\delta b}{2^{n+1}}
+ \geq L^*_c + \frac{\alpha\delta b}{2^{n+1}}
+ \geq \tilde{L}^*_c
+\end{displaymath}
+where the first and inequality come from the accuracy of the approximation, and the inner inequality follows from Lemma~\ref{lemma:monotonicity}.
+
+\item the accuracy of the approximation $\tilde{L}^*_c$ is:
+\begin{displaymath}
+ A\defeq\frac{\varepsilon\delta b}{2^{n+1}(\delta + n)}
+\end{displaymath}
+\sloppy
+hence, the standard convex optimization algorithm runs in time $O(poly(n, d,\log\log A^{-1}))$. Note that:
+\begin{displaymath}
+ \log\log A^{-1} = O\bigg(\log n\; \log\log\frac{1}{\epsilon\delta b}\bigg)
+\end{displaymath}
+which yields the wanted running time for the routing.\qedhere
+\end{enumerate}
+\end{proof}
+
+\section{Previous Junk}
+\paragraph{Primal reformulation}
+We define $S(\lambda) \defeq I_d + \sum_{i=1}^n \lambda_i x_i x_i^T$. We can
+reformulate the primal:
+\begin{align}
+ \mathrm{maximize}\quad & \log\det W\\
+ \textrm{subject to}\quad & W = S(\lambda)\;;\;c^T\lambda \leq 1 \;;\; 0\leq\lambda\leq \mathbf{1}
+\end{align}
+
+\paragraph{Dual}
+\begin{align}
+ \mathrm{minimize}\quad & \log\det\big(N^{-1}\big) + \mathrm{Tr}\, N - n + \sum_{i=1}^n x_i^T N x_i \\
+ & - \Bigg(\min_{1\leq i\leq n} \frac{x_i^TNx_i}{c_i}\Bigg)\Big(\sum_{i=1}^n c_i - 1\Big) \\
+ \textrm{subject to}\quad & N\in \mathcal{S}_n^{++}(\mathbf{R})
+\end{align}
+
+Let us define:
+\begin{equation}
+\begin{split}
+ f(N, c) \defeq \log\det\big(N^{-1}\big) + \mathrm{Tr}\, N - n + \sum_{i=1}^n x_i^T N x_i \\- \Bigg(\min_{1\leq i\leq n} \frac{x_i^TNx_i}{c_i}\Bigg)\Big(\sum_{i=1}^n c_i - 1\Big)
+\end{split}
+\end{equation}
+
+So that the dual reads:
+\begin{align}
+ \mathrm{minimize}\quad & f(N, c)\\
+ \textrm{subject to}\quad & N\in \mathcal{S}_n^{++}(\mathbf{R})
+\end{align}
+
+
+\paragraph{Duality} By duality $L^*(c)$ is equal to the optimal value of the dual optimization
+problem:
+
+\begin{displaymath}
+ L^*(c) = \inf_{N\in\mathcal{S}_n(\mathbf{R})} f(N, c) = f(N^*, c) = \log\det S(\lambda^*)
+\end{displaymath}
+
+\paragraph{KKT conditions}
+Let $\xi$ denote the dual variable associated with the constraint $c^T\lambda
+\leq B$. If $\xi^*$ is dual optimal and $\lambda^*$ primal optimal:
+\begin{equation}
+ \mathrm{if}\; 0 < \lambda^*_i < 1,\; \frac{x_i^T
+ A(\lambda^*)^{-1}x_i}{c_i} = \xi^*
+\end{equation}
+\begin{equation}
+ \mathrm{if}\; \lambda^*_i = 1,\; \frac{x_i^T A(\lambda^*)^{-1}x_i}{c_i} > \xi^*
+\end{equation}
+\begin{equation}
+ \mathrm{if}\; \lambda^*_i = 0,\; \frac{x_i^T A(\lambda^*)^{-1}x_i}{c_i} < \xi^*
+\end{equation}
+
+Furthermore, if $W^*\in\mathcal{M}_n(\mathbf{R})$ is primal optimal and
+$N^*\in\mathcal{S}_n(\mathbf{R})$ dual optimal:
+
+\begin{equation}
+ W^* = S(\lambda^*)\;;\; N^* = (W^*)^{-1}\;;\; 0\leq\lambda^*\leq \mathbf{1}
+\end{equation}
+
+In particular $N^* = S(\lambda^*)^{-1}$ and:
+\begin{equation}
+ S(\mathbf{1})^{-1} \leq N^* \leq S(0)^{-1} = I_d
+\end{equation}
+
+
+\paragraph{Sensitivity analysis, first flavor}
+
+Let $i$ such that $0 < \lambda_i < 1$ and $c' = (c_i + \delta, c_{-i})$, we can
+prove:
+\begin{displaymath}
+L^*(c') \leq L^*(c) - \xi^*\lambda_i^*\delta
+\end{displaymath}
+where $\lambda^*$ and $\xi^*$ are respectively primal and dual optimal for
+$P_c$. Using the KKT conditions and \eqref{lemma:derivative-bounds}, we can bound
+$\xi^*$ and get:
+\begin{equation}\label{eq:sensitivity}
+ L^*(c) \geq L^*(c') + \delta\lambda_i^* \frac{x_i^T A(\mathbf{1})^{-1}
+ x_i}{c_i}
+\end{equation}
+
+
+However, note that $\lambda_i^*$ can be arbitrarily small, so even a change of
+$\delta$ in the cost can lead to no variation of the optimal value.
+Intuitively, it does not harm if an element $i$ that has only a small fraction
+of itself in the solution sees its cost reduced.
+
+The problem seems to come from the fact that we can have arbitrarily small
+fractions of elements in the solution : this is clearly an artifact of looking
+at a continuous relaxation of the problem, because in the real life, an element
+is either in or out of the solution set.
+
+It is probably possible to prove that if we enforce the $\lambda_i$ to be
+greater than some minimal fraction $\zeta$, then we do not lose to much in
+terms of the optimal value (especially after rounding). Having $\lambda_i\geq
+\zeta$ will allow us to prove that if $i$ changes its cost, then the optimal
+value changes by some strictly positive constant depending only on $\zeta$ (and
+$\delta$ of course).
+
+\paragraph{Sensitivity analysis, second flavor}
+Let us now assume that some user $i$ reduces its cost by $\delta$ and define $c' = (c_i + \delta, c_{-i})$. Then we have:
+\begin{align}
+ f(N, c')
+ & = f(N, c) + \Bigg(\min_{1\leq i\leq n} \frac{x_i^TNx_i}{c_i}\Bigg)\Bigg(\sum_{i=1}^n c_i - 1\Bigg) - \Bigg(\min_{1\leq i\leq n} \frac{x_i^TNx_i}{c_i'}\Bigg)\Bigg(\sum_{i=1}^n c_i' - 1\Bigg)\\
+ & = f(N, c) + \delta\min_{1\leq i\leq n} \frac{x_i^TNx_i}{c_i} + \Bigg(\sum_{i=1}^n c_i - 1\Bigg) \Bigg(\Bigg(\min_{1\leq i\leq n} \frac{x_i^TNx_i}{c_i}\Bigg) - \Bigg(\min_{1\leq i\leq n} \frac{x_i^TNx_i}{c_i'}\Bigg)\Bigg)
+\end{align}
+
+\textbf{Problem:} we have no control over the third term of the last sum (the
+min difference). Otherwise, when we can ignore this term (for example when the
+user who changes his cost is not the one at which the min is reached), we get:
+
+\begin{equation}
+L^*(c') \geq L^*(c) + \delta\min_{1\leq i\leq n} \frac{x_i^TNx_i}{c_i}
+\end{equation}
+
+\paragraph{Junk:}
+\begin{align*}
+ \mathrm{minimize}\quad & \log\det\big(N^{-1}\big) + \mathrm{Tr}\, N - n + \sum_{i=1}^n \nu_i + \xi \\
+ \textrm{subject to}\quad & \nu \geq 0\; ;\; \xi \geq 0 \; ; \; N\in \mathcal{S}_n^{++}(\mathbf{R})\; ;\\
+ & \forall\; 1\leq i \leq n, \; \nu_i + \xi c_i = x_i^T N x_i
+\end{align*}
+
+\begin{align*}
+ \mathrm{minimize}\quad & \log\det\big(N^{-1}\big) + \mathrm{Tr}\, N - n + \sum_{i=1}^n x_i^T N x_i - \xi\Big(\sum_{i=1}^n c_i - 1\Big) \\
+ \textrm{subject to}\quad & \xi \geq 0 \; ; \; N\in \mathcal{S}_n^{++}(\mathbf{R})\; ;\\
+ & \forall\;1\leq i \leq n, \; \frac{x_i^T N x_i}{c_i} \geq \xi
+\end{align*}
+
+\end{document}