From 07d48e21fb6fc62b1a85b9d80c25560529a9a0b5 Mon Sep 17 00:00:00 2001 From: Thibaut Horel Date: Fri, 28 Jun 2013 00:16:44 +0200 Subject: Moving the proofs to the appendix, improving the flow --- appendix.tex | 669 +++++++++++++++++++++++++++++++++++++++++++++++++++++- approximation.tex | 602 ++++++------------------------------------------ definitions.tex | 2 +- intro.tex | 2 +- main.tex | 19 +- paper.tex | 8 +- problem.tex | 47 ++-- proofs.tex | 122 ---------- 8 files changed, 760 insertions(+), 711 deletions(-) diff --git a/appendix.tex b/appendix.tex index 2c83e04..dd643d7 100644 --- a/appendix.tex +++ b/appendix.tex @@ -1,6 +1,558 @@ +\subsection{Proof of Proposition~\ref{prop:relaxation}} + +\begin{lemma}\label{lemma:relaxation-ratio} +For all $\lambda\in[0,1]^{n},$ + $ \frac{1}{2} + \,L(\lambda)\leq + F(\lambda)\leq L(\lambda)$. +\end{lemma} + +\begin{proof} + The bound $F_{\mathcal{N}}(\lambda)\leq L_{\mathcal{N}(\lambda)}$ follows by the concavity of the $\log\det$ function. + To show the lower bound, + we first prove that $\frac{1}{2}$ is a lower bound of the ratio $\partial_i + F(\lambda)/\partial_i L(\lambda)$, where + $\partial_i\, \cdot$ denotes the partial derivative with respect to the + $i$-th variable. + + Let us start by computing the derivatives of $F$ and + $L$ with respect to the $i$-th component. + Observe that + \begin{displaymath} + \partial_i P_\mathcal{N}^\lambda(S) = \left\{ + \begin{aligned} + & P_{\mathcal{N}\setminus\{i\}}^\lambda(S\setminus\{i\})\;\textrm{if}\; + i\in S, \\ + & - P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\;\textrm{if}\; + i\in \mathcal{N}\setminus S. \\ + \end{aligned}\right. + \end{displaymath} + Hence, + \begin{displaymath} + \partial_i F(\lambda) = + \sum_{\substack{S\subseteq\mathcal{N}\\ i\in S}} + P_{\mathcal{N}\setminus\{i\}}^\lambda(S\setminus\{i\})V(S) + - \sum_{\substack{S\subseteq\mathcal{N}\\ i\in \mathcal{N}\setminus S}} + P_{\mathcal{N}\setminus\{i\}}^\lambda(S)V(S). + \end{displaymath} + Now, using that every $S$ such that $i\in S$ can be uniquely written as + $S'\cup\{i\}$, we can write: + \begin{displaymath} + \partial_i F(\lambda) = + \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}} + P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\big(V(S\cup\{i\}) + - V(S)\big). + \end{displaymath} + The marginal contribution of $i$ to + $S$ can be written as +\begin{align*} +V(S\cup \{i\}) - V(S)& = \frac{1}{2}\log\det(I_d + + \T{X_S}X_S + x_i\T{x_i}) + - \frac{1}{2}\log\det(I_d + \T{X_S}X_S)\\ + & = \frac{1}{2}\log\det(I_d + x_i\T{x_i}(I_d + +\T{X_S}X_S)^{-1}) + = \frac{1}{2}\log(1 + \T{x_i}A(S)^{-1}x_i) +\end{align*} +where $A(S) \defeq I_d+ \T{X_S}X_S$, and the last equality follows from the +Sylvester's determinant identity~\cite{sylvester}. +% $ V(S\cup\{i\}) - V(S) = \frac{1}{2}\log\left(1 + \T{x_i} A(S)^{-1}x_i\right)$. +Using this, + \begin{displaymath} + \partial_i F(\lambda) = \frac{1}{2} + \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}} + P_{\mathcal{N}\setminus\{i\}}^\lambda(S) + \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big) + \end{displaymath} + The computation of the derivative of $L$ uses standard matrix + calculus: writing $\tilde{A}(\lambda) = I_d+\sum_{i\in + \mathcal{N}}\lambda_ix_i\T{x_i}$, + \begin{displaymath} + \det \tilde{A}(\lambda + h\cdot e_i) = \det\big(\tilde{A}(\lambda) + + hx_i\T{x_i}\big) + =\det \tilde{A}(\lambda)\big(1+ + h\T{x_i}\tilde{A}(\lambda)^{-1}x_i\big). + \end{displaymath} + Hence, + \begin{displaymath} + \log\det\tilde{A}(\lambda + h\cdot e_i)= \log\det\tilde{A}(\lambda) + + h\T{x_i}\tilde{A}(\lambda)^{-1}x_i + o(h), + \end{displaymath} + which implies + \begin{displaymath} + \partial_i L(\lambda) + =\frac{1}{2} \T{x_i}\tilde{A}(\lambda)^{-1}x_i. + \end{displaymath} + +For two symmetric matrices $A$ and $B$, we write $A\succ B$ ($A\succeq B$) if +$A-B$ is positive definite (positive semi-definite). This order allows us to +define the notion of a \emph{decreasing} as well as \emph{convex} matrix +function, similarly to their real counterparts. With this definition, matrix +inversion is decreasing and convex over symmetric positive definite +matrices (see Example 3.48 p. 110 in \cite{boyd2004convex}). +In particular, +\begin{gather*} + \forall S\subseteq\mathcal{N},\quad A(S)^{-1} \succeq A(S\cup\{i\})^{-1} +\end{gather*} +as $A(S)\preceq A(S\cup\{i\})$. Observe that + % \begin{gather*} + % \forall +$P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\geq P_{\mathcal{N}\setminus\{i\}}^\lambda(S\cup\{i\})$ for all $S\subseteq\mathcal{N}\setminus\{i\}$, and + % ,\\ + $P_{\mathcal{N}\setminus\{i\}}^\lambda(S) \geq P_\mathcal{N}^\lambda(S),$ for all $S\subseteq\mathcal{N}$. + %\end{gather*} + Hence, + \begin{align*} + \partial_i F(\lambda) + % & = \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}} P_\mathcal{N}^\lambda(S) \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big)\\ + & \geq \frac{1}{4} + \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}} + P_{\mathcal{N}\setminus\{i\}}^\lambda(S) + \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big)\\ + &\hspace{-3.5em}+\frac{1}{4} + \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}} + P_{\mathcal{N}\setminus\{i\}}^\lambda(S\cup\{i\}) + \log\Big(1 + \T{x_i}A(S\cup\{i\})^{-1}x_i\Big)\\ + &\geq \frac{1}{4} + \sum_{S\subseteq\mathcal{N}} + P_\mathcal{N}^\lambda(S) + \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big). + \end{align*} + Using that $A(S)\succeq I_d$ we get that $\T{x_i}A(S)^{-1}x_i \leq + \norm{x_i}_2^2 \leq 1$. Moreover, $\log(1+x)\geq x$ for all $x\leq 1$. + Hence, + \begin{displaymath} + \partial_i F(\lambda) \geq + \frac{1}{4} + \T{x_i}\bigg(\sum_{S\subseteq\mathcal{N}}P_\mathcal{N}^\lambda(S)A(S)^{-1}\bigg)x_i. + \end{displaymath} + Finally, using that the inverse is a matrix convex function over symmetric + positive definite matrices: + \begin{displaymath} + \partial_i F(\lambda) \geq + \frac{1}{4} + \T{x_i}\bigg(\sum_{S\subseteq\mathcal{N}}P_\mathcal{N}^\lambda(S)A(S)\bigg)^{-1}x_i + = \frac{1}{4}\T{x_i}\tilde{A}(\lambda)^{-1}x_i + = \frac{1}{2} + \partial_i L(\lambda). + \end{displaymath} + +Having bound the ratio between the partial derivatives, we now bound the ratio $F(\lambda)/L(\lambda)$ from below. Consider the following cases. + First, if the minimum of the ratio + $F(\lambda)/L(\lambda)$ is attained at a point interior to the hypercube, then it is + a critical point, \emph{i.e.}, $\partial_i \big(F(\lambda)/L(\lambda)\big)=0$ for all $i\in \mathcal{N}$; hence, at such a critical point: + \begin{equation}\label{eq:lhopital} + \frac{F(\lambda)}{L(\lambda)} + = \frac{\partial_i F(\lambda)}{\partial_i + L(\lambda)} \geq \frac{1}{2}. + \end{equation} + Second, if the minimum is attained as + $\lambda$ converges to zero in, \emph{e.g.}, the $l_2$ norm, by the Taylor approximation, one can write: + \begin{displaymath} + \frac{F(\lambda)}{L(\lambda)} + \sim_{\lambda\rightarrow 0} + \frac{\sum_{i\in \mathcal{N}}\lambda_i\partial_i F(0)} + {\sum_{i\in\mathcal{N}}\lambda_i\partial_i L(0)} + \geq \frac{1}{2}, + \end{displaymath} + \emph{i.e.}, the ratio $\frac{F(\lambda)}{L(\lambda)}$ is necessarily bounded from below by 1/2 for small enough $\lambda$. + Finally, if the minimum is attained on a face of the hypercube $[0,1]^n$ (a face is + defined as a subset of the hypercube where one of the variable is fixed to + 0 or 1), without loss of generality, we can assume that the minimum is + attained on the face where the $n$-th variable has been fixed + to 0 or 1. Then, either the minimum is attained at a point interior to the + face or on a boundary of the face. In the first sub-case, relation + \eqref{eq:lhopital} still characterizes the minimum for $i< n$. + In the second sub-case, by repeating the argument again by induction, we see + that all is left to do is to show that the bound holds for the vertices of + the cube (the faces of dimension 1). The vertices are exactly the binary + points, for which we know that both relaxations are equal to the value + function $V$. Hence, the ratio is equal to 1 on the vertices. +\end{proof} + +We now prove that $F$ admits the following exchange property: let +$\lambda$ be a feasible element of $[0,1]^n$, it is possible to trade one +fractional component of $\lambda$ for another until one of them becomes +integral, obtaining a new element $\tilde{\lambda}$ which is both feasible and +for which $F(\tilde{\lambda})\geq F(\lambda)$. Here, by feasibility of a point +$\lambda$, we mean that it satisfies the budget constraint $\sum_{i=1}^n +\lambda_i c_i \leq B$. This rounding property is referred to in the literature +as \emph{cross-convexity} (see, \emph{e.g.}, \cite{dughmi}), or +$\varepsilon$-convexity by \citeN{pipage}. + +\begin{lemma}[Rounding]\label{lemma:rounding} + For any feasible $\lambda\in[0,1]^{n}$, there exists a feasible + $\bar{\lambda}\in[0,1]^{n}$ such that at most one of its components is + fractional %, that is, lies in $(0,1)$ and: + and $F_{\mathcal{N}}(\lambda)\leq F_{\mathcal{N}}(\bar{\lambda})$. +\end{lemma} + +\begin{proof} + We give a rounding procedure which, given a feasible $\lambda$ with at least + two fractional components, returns some feasible $\lambda'$ with one less fractional + component such that $F(\lambda) \leq F(\lambda')$. + + Applying this procedure recursively yields the lemma's result. + Let us consider such a feasible $\lambda$. Let $i$ and $j$ be two + fractional components of $\lambda$ and let us define the following + function: + \begin{displaymath} + F_\lambda(\varepsilon) = F(\lambda_\varepsilon) + \quad\textrm{where} \quad + \lambda_\varepsilon = \lambda + \varepsilon\left(e_i-\frac{c_i}{c_j}e_j\right) + \end{displaymath} + It is easy to see that if $\lambda$ is feasible, then: + \begin{equation}\label{eq:convex-interval} + \forall\varepsilon\in\Big[\max\Big(-\lambda_i,(\lambda_j-1)\frac{c_j}{c_i}\Big), \min\Big(1-\lambda_i, \lambda_j + \frac{c_j}{c_i}\Big)\Big],\; + \lambda_\varepsilon\;\;\textrm{is feasible} + \end{equation} + Furthermore, the function $F_\lambda$ is convex; indeed: + \begin{align*} + F_\lambda(\varepsilon) + & = \mathbb{E}_{S'\sim P_{\mathcal{N}\setminus\{i,j\}}^\lambda(S')}\Big[ + (\lambda_i+\varepsilon)\Big(\lambda_j-\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{i,j\})\\ + & + (\lambda_i+\varepsilon)\Big(1-\lambda_j+\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{i\}) + + (1-\lambda_i-\varepsilon)\Big(\lambda_j-\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{j\})\\ + & + (1-\lambda_i-\varepsilon)\Big(1-\lambda_j+\varepsilon\frac{c_i}{c_j}\Big)V(S')\Big] + \end{align*} + Thus, $F_\lambda$ is a degree 2 polynomial whose dominant coefficient is: + \begin{displaymath} + \frac{c_i}{c_j}\mathbb{E}_{S'\sim + P_{\mathcal{N}\setminus\{i,j\}}^\lambda(S')}\Big[ + V(S'\cup\{i\})+V(S'\cup\{i\})\\ + -V(S'\cup\{i,j\})-V(S')\Big] + \end{displaymath} + which is positive by submodularity of $V$. Hence, the maximum of + $F_\lambda$ over the interval given in \eqref{eq:convex-interval} is + attained at one of its limit, at which either the $i$-th or $j$-th component of + $\lambda_\varepsilon$ becomes integral. +\end{proof} + +\subsubsection*{End of the proof of Proposition~\ref{prop:relaxation}} + +Let us consider a feasible point $\lambda^*\in[0,1]^{n}$ such that +$L(\lambda^*) = L^*_c$. By applying Lemma~\ref{lemma:relaxation-ratio} and +Lemma~\ref{lemma:rounding} we get a feasible point $\bar{\lambda}$ with at most +one fractional component such that +\begin{equation}\label{eq:e1} + L(\lambda^*) \leq 2 F(\bar{\lambda}). +\end{equation} + Let $\lambda_i$ denote the fractional component of $\bar{\lambda}$ and $S$ + denote the set whose indicator vector is $\bar{\lambda} - \lambda_i e_i$. + By definition of the multi-linear extension $F$: + \begin{displaymath} + F(\bar{\lambda}) = (1-\lambda_i)V(S) +\lambda_i V(S\cup\{i\}). + \end{displaymath} + By submodularity of $V$, $V(S\cup\{i\})\leq V(S) + V(\{i\})$. Hence, + \begin{displaymath} + F(\bar{\lambda}) \leq V(S) + V(i). + \end{displaymath} + Note that since $\bar{\lambda}$ is feasible, $S$ is also feasible and + $V(S)\leq OPT$. Hence, + \begin{equation}\label{eq:e2} + F(\bar{\lambda}) \leq OPT + \max_{i\in\mathcal{N}} V(i). + \end{equation} +Together, \eqref{eq:e1} and \eqref{eq:e2} imply the proposition.\qedhere + +\begin{proof}[Proof of Lemma~\ref{lemma:derivative-bounds}] + Let us define: + \begin{displaymath} + S(\lambda)\defeq I_d + \sum_{i=1}^n \lambda_i x_i\T{x_i} + \quad\mathrm{and}\quad + S_k \defeq I_d + \sum_{i=1}^k x_i\T{x_i} + \end{displaymath} + + We have $\partial_i L(\lambda) = \T{x_i}S(\lambda)^{-1}x_i$. Since + $S(\lambda)\geq I_d$, $\partial_i L(\lambda)\leq \T{x_i}x_i \leq 1$, which + is the right-hand side of the lemma. + + For the left-hand side, note that $S(\lambda) \leq S_n$. Hence + $\partial_iL(\lambda)\geq \T{x_i}S_n^{-1}x_i$. + + Using the Sherman-Morrison formula, for all $k\geq 1$: + \begin{displaymath} + \T{x_i}S_k^{-1} x_i = \T{x_i}S_{k-1}^{-1}x_i + - \frac{(\T{x_i}S_{k-1}^{-1}x_k)^2}{1+\T{x_k}S_{k-1}^{-1}x_k} + \end{displaymath} + + By the Cauchy-Schwarz inequality: + \begin{displaymath} + (\T{x_i}S_{k-1}^{-1}x_k)^2 \leq \T{x_i}S_{k-1}^{-1}x_i\;\T{x_k}S_{k-1}^{-1}x_k + \end{displaymath} + + Hence: + \begin{displaymath} + \T{x_i}S_k^{-1} x_i \geq \T{x_i}S_{k-1}^{-1}x_i + - \T{x_i}S_{k-1}^{-1}x_i\frac{\T{x_k}S_{k-1}^{-1}x_k}{1+\T{x_k}S_{k-1}^{-1}x_k} + \end{displaymath} + + But $\T{x_k}S_{k-1}^{-1}x_k\leq 1$ and $\frac{a}{1+a}\leq \frac{1}{2}$ if + $0\leq a\leq 1$, so: + \begin{displaymath} + \T{x_i}S_{k}^{-1}x_i \geq \T{x_i}S_{k-1}^{-1}x_i + - \frac{1}{2}\T{x_i}S_{k-1}^{-1}x_i\geq \frac{\T{x_i}S_{k-1}^{-1}x_i}{2} + \end{displaymath} + + By induction: + \begin{displaymath} + \T{x_i}S_n^{-1} x_i \geq \frac{\T{x_i}x_i}{2^n} + \end{displaymath} + + Using that $\T{x_i}{x_i}\geq b$ concludes the proof of the left-hand side + of the lemma's inequality. +\end{proof} + +\subsection{Proof of Proposition~\ref{prop:monotonicity}} + +The $\log\det$ function is concave and self-concordant (see +\cite{boyd2004convex}), in this case, the analysis of the barrier method in +in \cite{boyd2004convex} (Section 11.5.5) can be summarized in the following +lemma: + +\begin{lemma}\label{lemma:barrier} +For any $\varepsilon>0$, the barrier method computes an $\varepsilon$-accurate +approximation of $L^*_c$ in time $O(poly(n,d,\log\log\varepsilon^{-1})$. +\end{lemma} + +We show that the optimal value of \eqref{eq:perturbed-primal} is close to the +optimal value of \eqref{eq:primal} (Lemma~\ref{lemma:proximity}) while being +well-behaved with respect to changes of the cost +(Lemma~\ref{lemma:monotonicity}). These lemmas together imply +Proposition~\ref{prop:monotonicity}. + +\begin{lemma}\label{lemma:derivative-bounds} + Let $\partial_i L(\lambda)$ denote the $i$-th derivative of $L$, for $i\in\{1,\ldots, n\}$, then: + \begin{displaymath} + \forall\lambda\in[0, 1]^n,\;\frac{b}{2^n} \leq \partial_i L(\lambda) \leq 1 + \end{displaymath} +\end{lemma} + +\begin{proof} + Let us define: + \begin{displaymath} + S(\lambda)\defeq I_d + \sum_{i=1}^n \lambda_i x_i\T{x_i} + \quad\mathrm{and}\quad + S_k \defeq I_d + \sum_{i=1}^k x_i\T{x_i} + \end{displaymath} + + We have $\partial_i L(\lambda) = \T{x_i}S(\lambda)^{-1}x_i$. Since + $S(\lambda)\geq I_d$, $\partial_i L(\lambda)\leq \T{x_i}x_i \leq 1$, which + is the right-hand side of the lemma. + + For the left-hand side, note that $S(\lambda) \leq S_n$. Hence + $\partial_iL(\lambda)\geq \T{x_i}S_n^{-1}x_i$. + + Using the Sherman-Morrison formula, for all $k\geq 1$: + \begin{displaymath} + \T{x_i}S_k^{-1} x_i = \T{x_i}S_{k-1}^{-1}x_i + - \frac{(\T{x_i}S_{k-1}^{-1}x_k)^2}{1+\T{x_k}S_{k-1}^{-1}x_k} + \end{displaymath} + + By the Cauchy-Schwarz inequality: + \begin{displaymath} + (\T{x_i}S_{k-1}^{-1}x_k)^2 \leq \T{x_i}S_{k-1}^{-1}x_i\;\T{x_k}S_{k-1}^{-1}x_k + \end{displaymath} + + Hence: + \begin{displaymath} + \T{x_i}S_k^{-1} x_i \geq \T{x_i}S_{k-1}^{-1}x_i + - \T{x_i}S_{k-1}^{-1}x_i\frac{\T{x_k}S_{k-1}^{-1}x_k}{1+\T{x_k}S_{k-1}^{-1}x_k} + \end{displaymath} + + But $\T{x_k}S_{k-1}^{-1}x_k\leq 1$ and $\frac{a}{1+a}\leq \frac{1}{2}$ if + $0\leq a\leq 1$, so: + \begin{displaymath} + \T{x_i}S_{k}^{-1}x_i \geq \T{x_i}S_{k-1}^{-1}x_i + - \frac{1}{2}\T{x_i}S_{k-1}^{-1}x_i\geq \frac{\T{x_i}S_{k-1}^{-1}x_i}{2} + \end{displaymath} + + By induction: + \begin{displaymath} + \T{x_i}S_n^{-1} x_i \geq \frac{\T{x_i}x_i}{2^n} + \end{displaymath} + + Using that $\T{x_i}{x_i}\geq b$ concludes the proof of the left-hand side + of the lemma's inequality. +\end{proof} + +Let us introduce the lagrangian of problem, \eqref{eq:perturbed-primal}: + +\begin{displaymath} + \mathcal{L}_{c, \alpha}(\lambda, \mu, \nu, \xi) \defeq L(\lambda) + + \T{\mu}(\lambda-\alpha\mathbf{1}) + \T{\nu}(\mathbf{1}-\lambda) + \xi(1-\T{c}\lambda) +\end{displaymath} +so that: +\begin{displaymath} + L^*_c(\alpha) = \min_{\mu, \nu, \xi\geq 0}\max_\lambda \mathcal{L}_{c, \alpha}(\lambda, \mu, \nu, \xi) +\end{displaymath} +Similarly, we define $\mathcal{L}_{c}\defeq\mathcal{L}_{c, 0}$ the lagrangian of \eqref{eq:primal}. + +Let $\lambda^*$ be primal optimal for \eqref{eq:perturbed-primal}, and $(\mu^*, +\nu^*, \xi^*)$ be dual optimal for the same problem. In addition to primal and +dual feasibility, the KKT conditions give $\forall i\in\{1, \ldots, n\}$: +\begin{gather*} + \partial_i L(\lambda^*) + \mu_i^* - \nu_i^* - \xi^* c_i = 0\\ + \mu_i^*(\lambda_i^* - \alpha) = 0\\ + \nu_i^*(1 - \lambda_i^*) = 0 +\end{gather*} + +\begin{lemma}\label{lemma:proximity} +We have: +\begin{displaymath} + L^*_c - \alpha n^2\leq L^*_c(\alpha) \leq L^*_c +\end{displaymath} +In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n^2$. +\end{lemma} + +\begin{proof} + $\alpha\mapsto L^*_c(\alpha)$ is a decreasing function as it is the + maximum value of the $L$ function over a set-decreasing domain, which gives + the rightmost inequality. + + Let $\mu^*, \nu^*, \xi^*$ be dual optimal for $(P_{c, \alpha})$, that is: + \begin{displaymath} + L^*_{c}(\alpha) = \max_\lambda \mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*) + \end{displaymath} + + Note that $\mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*) + = \mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*) + - \alpha\T{\mathbf{1}}\mu^*$, and that for any $\lambda$ feasible for + problem \eqref{eq:primal}, $\mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*) + \geq L(\lambda)$. Hence, + \begin{displaymath} + L^*_{c}(\alpha) \geq L(\lambda) - \alpha\T{\mathbf{1}}\mu^* + \end{displaymath} + for any $\lambda$ feasible for \eqref{eq:primal}. In particular, for $\lambda$ primal optimal for $\eqref{eq:primal}$: + \begin{equation}\label{eq:local-1} + L^*_{c}(\alpha) \geq L^*_c - \alpha\T{\mathbf{1}}\mu^* + \end{equation} + + Let us denote by the $M$ the support of $\mu^*$, that is $M\defeq + \{i|\mu_i^* > 0\}$, and by $\lambda^*$ a primal optimal point for + $\eqref{eq:perturbed-primal}$. From the KKT conditions we see that: + \begin{displaymath} + M \subseteq \{i|\lambda_i^* = \alpha\} + \end{displaymath} + + + Let us first assume that $|M| = 0$, then $\T{\mathbf{1}}\mu^*=0$ and the lemma follows. + + We will now assume that $|M|\geq 1$. In this case $\T{c}\lambda^* + = 1$, otherwise we could increase the coordinates of $\lambda^*$ in $M$, + which would increase the value of the objective function and contradict the + optimality of $\lambda^*$. Note also, that $|M|\leq n-1$, otherwise, since + $\alpha< \frac{1}{n}$, we would have $\T{c}\lambda^*\ < 1$, which again + contradicts the optimality of $\lambda^*$. Let us write: + \begin{displaymath} + 1 = \T{c}\lambda^* = \alpha\sum_{i\in M}c_i + \sum_{i\in \bar{M}}\lambda_i^*c_i + \leq \alpha |M| + (n-|M|)\max_{i\in \bar{M}} c_i + \end{displaymath} + That is: + \begin{equation}\label{local-2} + \max_{i\in\bar{M}} c_i \geq \frac{1 - |M|\alpha}{n-|M|}> \frac{1}{n} + \end{equation} + where the last inequality uses again that $\alpha<\frac{1}{n}$. From the + KKT conditions, we see that for $i\in M$, $\nu_i^* = 0$ and: + \begin{equation}\label{local-3} + \mu_i^* = \xi^*c_i - \partial_i L(\lambda^*)\leq \xi^*c_i\leq \xi^* + \end{equation} + since $\partial_i L(\lambda^*)\geq 0$ and $c_i\leq 1$. + + Furthermore, using the KKT conditions again, we have that: + \begin{equation}\label{local-4} + \xi^* \leq \inf_{i\in \bar{M}}\frac{\partial_i L(\lambda^*)}{c_i}\leq \inf_{i\in \bar{M}} \frac{1}{c_i} + = \frac{1}{\max_{i\in\bar{M}} c_i} + \end{equation} + where the last inequality uses Lemma~\ref{lemma:derivative-bounds}. + + Combining \eqref{local-2}, \eqref{local-3} and \eqref{local-4}, we get that: + \begin{displaymath} + \sum_{i\in M}\mu_i^* \leq |M|\xi^* \leq n\xi^*\leq \frac{n}{\max_{i\in\bar{M}} c_i} \leq n^2 + \end{displaymath} + + This implies that: + \begin{displaymath} + \T{\mathbf{1}}\mu^* = \sum_{i=1}^n \mu^*_i = \sum_{i\in M}\mu_i^*\leq n^2 + \end{displaymath} + which in addition to \eqref{eq:local-1} proves the lemma. +\end{proof} + +\begin{lemma}\label{lemma:monotonicity} + If $c'$ = $(c_i', c_{-i})$, with $c_i'\leq c_i - \delta$, we have: + \begin{displaymath} + L^*_{c'}(\alpha) \geq L^*_c(\alpha) + \frac{\alpha\delta b}{2^n} + \end{displaymath} +\end{lemma} + +\begin{proof} + Let $\mu^*, \nu^*, \xi^*$ be dual optimal for $(P_{c', \alpha})$. Noting that: + \begin{displaymath} + \mathcal{L}_{c', \alpha}(\lambda, \mu^*, \nu^*, \xi^*) \geq + \mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*) + \lambda_i\xi^*\delta, + \end{displaymath} + we get similarly to Lemma~\ref{lemma:proximity}: + \begin{displaymath} + L^*_{c'}(\alpha) \geq L(\lambda) + \lambda_i\xi^*\delta + \end{displaymath} + for any $\lambda$ feasible for \eqref{eq:perturbed-primal}. In particular, for $\lambda^*$ primal optimal for \eqref{eq:perturbed-primal}: + \begin{displaymath} + L^*_{c'}(\alpha) \geq L^*_{c}(\alpha) + \alpha\xi^*\delta + \end{displaymath} + since $\lambda_i^*\geq \alpha$. + + Using the KKT conditions for $(P_{c', \alpha})$, we can write: + \begin{displaymath} + \xi^* = \inf_{i:\lambda^{'*}_i>\alpha} \frac{\T{x_i}S(\lambda^{'*})^{-1}x_i}{c_i'} + \end{displaymath} + with $\lambda^{'*}$ optimal for $(P_{c', \alpha})$. Since $c_i'\leq 1$, using Lemma~\ref{lemma:derivative-bounds}, we get that $\xi^*\geq \frac{b}{2^n}$, which concludes the proof. +\end{proof} + +\subsubsection*{End of the proof of Proposition~\ref{prop:monotonicity}} + +Let $\tilde{L}^*_c$ be the approximation computed by +Algorithm~\ref{alg:monotone}. +\begin{enumerate} + \item using Lemma~\ref{lemma:proximity}: +\begin{displaymath} + |\tilde{L}^*_c - L^*_c| \leq |\tilde{L}^*_c - L^*_c(\alpha)| + |L^*_c(\alpha) - L^*_c| + \leq \alpha\delta + \alpha n^2 = \varepsilon +\end{displaymath} +which proves the $\varepsilon$-accuracy. + +\item for the $\delta$-decreasingness, let $c' = (c_i', c_{-i})$ with $c_i'\leq + c_i-\delta$, then: +\begin{displaymath} + \tilde{L}^*_{c'} \geq L^*_{c'} - \frac{\alpha\delta b}{2^{n+1}} + \geq L^*_c + \frac{\alpha\delta b}{2^{n+1}} + \geq \tilde{L}^*_c +\end{displaymath} +where the first and inequality come from the accuracy of the approximation, and +the inner inequality follows from Lemma~\ref{lemma:monotonicity}. + +\item the accuracy of the approximation $\tilde{L}^*_c$ is: +\begin{displaymath} + A\defeq\frac{\varepsilon\delta b}{2^{n+1}(\delta + n^2)} +\end{displaymath} + +Note that: +\begin{displaymath} + \log\log A^{-1} = O\bigg(\log\log\frac{1}{\varepsilon\delta b} + \log n\bigg) +\end{displaymath} +Using Lemma~\ref{lemma:barrier} concludes the proof of the running time.\qed +\end{enumerate} + +\subsection{Proof of Theorem~\ref{thm:main}}\label{sec:proofofmainthm} + +We now present the proof of Theorem~\ref{thm:main}. $\delta$-truthfulness and +individual rationality follow from $\delta$-monotonicity and threshold +payments. $\delta$-monotonicity and budget feasibility follow the same steps as the +analysis of \citeN{chen}; for the sake of completeness, we restate their proof +here. + \begin{lemma}\label{lemma:monotone} Our mechanism for \EDP{} is $\delta$-monotone and budget feasible. \end{lemma} + \begin{proof} Consider an agent $i$ with cost $c_i$ that is selected by the mechanism, and suppose that she reports @@ -28,11 +580,6 @@ Our mechanism for \EDP{} is $\delta$-monotone and budget feasible. Suppose now that when $i$ reports $c_i$, $OPT'_{-i^*} < C V(i^*)$. Then $s_i(c_i,c_{-i})=1$ iff $i = i^*$. Reporting $c_{i^*}'\leq c_{i^*}$ does not change $V(i^*)$ nor $OPT'_{-i^*} \leq C V(i^*)$; thus $s_{i^*}(c_{i^*}',c_{-i^*})=1$, so the mechanism is monotone. -%\end{proof} -%\begin{lemma}\label{lemma:budget-feasibility} -%The mechanism is budget feasible. -%\end{lemma} -%\begin{proof} To show budget feasibility, suppose that $OPT'_{-i^*} < C V(i^*)$. Then the mechanism selects $i^*$. Since the bid of $i^*$ does not affect the above condition, the threshold payment of $i^*$ is $B$ and the mechanism is budget feasible. Suppose that $OPT'_{-i^*} \geq C V(i^*)$. @@ -57,4 +604,116 @@ Hence, the total payment is bounded by the telescopic sum: \end{displaymath} \end{proof} +The complexity of the mechanism is given by the following lemma. + +\begin{lemma}[Complexity]\label{lemma:complexity} + For any $\varepsilon > 0$ and any $\delta>0$, the complexity of the mechanism is + $O\big(poly(n, d, \log\log\frac{1}{b\varepsilon\delta})\big)$ +\end{lemma} + +\begin{proof} + The value function $V$ in \eqref{modified} can be computed in time + $O(\text{poly}(n, d))$ and the mechanism only involves a linear + number of queries to the function $V$. + + By Proposition~\ref{prop:monotonicity}, line 3 of Algorithm~\ref{mechanism} + can be computed in time + $O(\text{poly}(n, d, \log\log \varepsilon^{-1}))$. Hence the allocation + function's complexity is as stated. + %Payments can be easily computed in time $O(\text{poly}(n, d))$ as in prior work. +\junk{ + Using Singer's characterization of the threshold payments + \cite{singer-mechanisms}, one can verify that they can be computed in time + $O(\text{poly}(n, d))$. + } +\end{proof} + +Finally, we prove the approximation ratio of the mechanism. + +We use the following lemma from \cite{chen} which bounds $OPT$ in terms of +the value of $S_G$, as computed in Algorithm \ref{mechanism}, and $i^*$, the +element of maximum value. + +\begin{lemma}[\cite{chen}]\label{lemma:greedy-bound} +Let $S_G$ be the set computed in Algorithm \ref{mechanism} and let +$i^*=\argmax_{i\in\mathcal{N}} V(\{i\})$. We have: +\begin{displaymath} +OPT \leq \frac{e}{e-1}\big( 3 V(S_G) + 2 V(i^*)\big). +\end{displaymath} +\end{lemma} + +Using Proposition~\ref{prop:relaxation} and~\ref{lemma:greedy-bound} we can complete the proof of +Theorem~\ref{thm:main} by showing that, for any $\varepsilon > 0$, if +$OPT_{-i}'$, the optimal value of $L$ when $i^*$ is excluded from +$\mathcal{N}$, has been computed to a precision $\varepsilon$, then the set +$S^*$ allocated by the mechanism is such that: +\begin{equation} \label{approxbound} +OPT +\leq \frac{10e\!-\!3 + \sqrt{64e^2\!-\!24e\!+\!9}}{2(e\!-\!1)} V(S^*)\! ++ \! \varepsilon . +\end{equation} +To see this, let $L^*_{-i^*}$ be the true maximum value of $L$ subject to +$\lambda_{i^*}=0$, $\sum_{i\in \mathcal{N}\setminus{i^*}}c_i\leq B$. From line +3 of Algorithm~\ref{mechanism}, we have +$L^*_{-i^*}-\varepsilon\leq OPT_{-i^*}' \leq L^*_{-i^*}+\varepsilon$. + +If the condition on line 4 of the algorithm holds, then +\begin{displaymath} + V(i^*) \geq \frac{1}{C}L^*_{-i^*}-\frac{\varepsilon}{C} \geq + \frac{1}{C}OPT_{-i^*} -\frac{\varepsilon}{C}. +\end{displaymath} +Indeed, $L^*_{-i^*}\geq OPT_{-i^*}$ as $L$ is a fractional relaxation of $V$. Also, $OPT \leq OPT_{-i^*} + V(i^*)$, +hence, +\begin{equation}\label{eq:bound1} + OPT\leq (1+C)V(i^*) + \varepsilon. +\end{equation} + +If the condition does not hold, by observing that $L^*_{-i^*}\leq L^*_c$ and +applying Proposition~\ref{prop:relaxation}, we get +\begin{displaymath} + V(i^*)\leq \frac{1}{C}L^*_{-i^*} + \frac{\varepsilon}{C} + \leq \frac{1}{C} \big(2 OPT + 2 V(i^*)\big) + \frac{\varepsilon}{C}. +\end{displaymath} +Applying Lemma~\ref{lemma:greedy-bound}, +\begin{displaymath} + V(i^*) \leq \frac{1}{C}\left(\frac{2e}{e-1}\big(3 V(S_G) + + 2 V(i^*)\big) + 2 V(i^*)\right) + \frac{\varepsilon}{C}. +\end{displaymath} +Thus, if $C$ is such that $C(e-1) -6e +2 > 0$, +\begin{align*} + V(i^*) \leq \frac{6e}{C(e-1)- 6e + 2} V(S_G) + + \frac{(e-1)\varepsilon}{C(e-1)- 6e + 2}. +\end{align*} +Finally, using Lemma~\ref{lemma:greedy-bound} again, we get +\begin{equation}\label{eq:bound2} + OPT(V, \mathcal{N}, B) \leq + \frac{3e}{e-1}\left( 1 + \frac{4e}{C (e-1) -6e +2}\right) V(S_G) + + \frac{2e\varepsilon}{C(e-1)- 6e + 2}. +\end{equation} +To minimize the coefficients of $V_{i^*}$ and $V(S_G)$ in \eqref{eq:bound1} +and \eqref{eq:bound2} respectively, we wish to chose $C$ that minimizes +\begin{displaymath} + \max\left(1+C,\frac{3e}{e-1}\left( 1 + \frac{4e}{C (e-1) -6e +2} + \right)\right). +\end{displaymath} +This function has two minima, only one of those is such that $C(e-1) -6e ++2 \geq 0$. This minimum is +\begin{equation}\label{eq:constant} + C = \frac{8e-1 + \sqrt{64e^2-24e + 9}}{2(e-1)}. +\end{equation} +For this minimum, $\frac{2e\varepsilon}{C^*(e-1)- 6e + 2}\leq \varepsilon.$ +Placing the expression of $C$ in \eqref{eq:bound1} and \eqref{eq:bound2} +gives the approximation ratio in \eqref{approxbound}, and concludes the proof +of Theorem~\ref{thm:main}.\hspace*{\stretch{1}}\qed + +\subsection{Proof of Theorem \ref{thm:lowerbound}} +Suppose, for contradiction, that such a mechanism exists. Consider two +experiments with dimension $d=2$, such that $x_1 = e_1=[1 ,0]$, $x_2=e_2=[0,1]$ +and $c_1=c_2=B/2+\epsilon$. Then, one of the two experiments, say, $x_1$, must +be in the set selected by the mechanism, otherwise the ratio is unbounded, +a contradiction. If $x_1$ lowers its value to $B/2-\epsilon$, by monotonicity +it remains in the solution; by threshold payment, it is paid at least +$B/2+\epsilon$. So $x_2$ is not included in the solution by budget feasibility +and individual rationality: hence, the selected set attains a value $\log2$, +while the optimal value is $2\log 2$.\hspace*{\stretch{1}}\qed diff --git a/approximation.tex b/approximation.tex index 926ca1d..81a6d0c 100644 --- a/approximation.tex +++ b/approximation.tex @@ -1,16 +1,27 @@ -\EDP{} is NP-hard, but designing a mechanism for this problem will involve -being able to find an approximation $\tilde{L}^*(c)$ of $OPT$ with the -following three properties: first, it must be non-decreasing along -coordinate-axis, second it must have a constant approximation ratio to $OPT$ -and finally, it must be computable in polynomial time. +As noted above, \EDP{} is NP-hard. Designing a mechanism for this problem, as +we will see in Section~\ref{sec:mechanism}, will involve being able to find an approximation of its optimal value +$OPT$ defined in \eqref{eq:non-strategic}. In addition to being computable in +polynomial time and having a bounded approximation ratio to $OPT$, we will also +require this approximation to be non-increasing in the following sense: -This approximation will be obtained by introducing a concave optimization +\begin{definition} +Let $f$ be a function from $\reals^n$ to $\reals$. We say that $f$ is +\emph{non-decreasing (resp. non-increasing) along the $i$-th coordinate} iff: +\begin{displaymath} +\forall x\in\reals^n,\; +t\mapsto f(x+ te_i)\; \text{is non-decreasing (resp. non-increasing)} +\end{displaymath} +where $e_i$ is the $i$-th canonical basis vector of $\reals^n$. + +We say that $f$ is \emph{non-decreasing} (resp. \emph{non-increasing}) iff it +is non-decreasing (resp. non-increasing) along all coordinates. +\end{definition} + + +Such an approximation will be obtained by introducing a concave optimization problem with a constant approximation ratio to \EDP{} -(Proposition~\ref{prop:relaxation}). Using Newton's method, it is then -possible to solve this concave optimization problem to an arbitrary precision. -However, this approximation breaks the monotonicity of the approximation. -Finding a monotone approximate solution to the concave problem will be the -object of (Section~\ref{sec:monotonicity}). +(Proposition~\ref{prop:relaxation}) and then showing how to approximately solve +this problem in a monotone way. \subsection{A concave relaxation of \EDP}\label{sec:concave} @@ -41,14 +52,14 @@ guarantees for an interesting class of optimization problems through the \emph{pipage rounding} framework, which has been successfully applied in \citeN{chen, singer-influence}. -However, for the specific function $V$ defined in \eqref{modified}, the +For the specific function $V$ defined in \eqref{modified}, the multi-linear extension cannot be computed (and \emph{a fortiori} maximized) in polynomial time. Hence, we introduce a new function $L$: \begin{equation}\label{eq:our-relaxation} \forall\,\lambda\in[0,1]^n,\quad L(\lambda) \defeq \log\det\left(I_d + \sum_{i\in\mathcal{N}} \lambda_i x_i\T{x_i}\right), \end{equation} -Note that the relaxation $L$ that we introduced in \eqref{eq:our-relaxation}, +Note that this relaxation, follows naturally from the \emph{multi-linear} extension by swapping the expectation and $V$ in \eqref{eq:multi-linear}: \begin{displaymath} @@ -64,330 +75,73 @@ a relaxation. We define: \leq B\right\} \end{equation} -The key property of the relaxation $L$, which is our main technical result, is -that it has constant approximation ratios to the multi-linear extension $F$. - -\begin{lemma}\label{lemma:relaxation-ratio} - % The following inequality holds: -For all $\lambda\in[0,1]^{n},$ - %\begin{displaymath} - $ \frac{1}{2} - \,L(\lambda)\leq - F(\lambda)\leq L(\lambda)$. - %\end{displaymath} -\end{lemma} -\begin{proof} - The bound $F_{\mathcal{N}}(\lambda)\leq L_{\mathcal{N}(\lambda)}$ follows by the concavity of the $\log\det$ function. - To show the lower bound, - we first prove that $\frac{1}{2}$ is a lower bound of the ratio $\partial_i - F(\lambda)/\partial_i L(\lambda)$, where - $\partial_i\, \cdot$ denotes the partial derivative with respect to the - $i$-th variable. - - Let us start by computing the derivatives of $F$ and - $L$ with respect to the $i$-th component. - Observe that - \begin{displaymath} - \partial_i P_\mathcal{N}^\lambda(S) = \left\{ - \begin{aligned} - & P_{\mathcal{N}\setminus\{i\}}^\lambda(S\setminus\{i\})\;\textrm{if}\; - i\in S, \\ - & - P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\;\textrm{if}\; - i\in \mathcal{N}\setminus S. \\ - \end{aligned}\right. - \end{displaymath} - Hence, - \begin{displaymath} - \partial_i F(\lambda) = - \sum_{\substack{S\subseteq\mathcal{N}\\ i\in S}} - P_{\mathcal{N}\setminus\{i\}}^\lambda(S\setminus\{i\})V(S) - - \sum_{\substack{S\subseteq\mathcal{N}\\ i\in \mathcal{N}\setminus S}} - P_{\mathcal{N}\setminus\{i\}}^\lambda(S)V(S). - \end{displaymath} - Now, using that every $S$ such that $i\in S$ can be uniquely written as - $S'\cup\{i\}$, we can write: - \begin{displaymath} - \partial_i F(\lambda) = - \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}} - P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\big(V(S\cup\{i\}) - - V(S)\big). - \end{displaymath} - The marginal contribution of $i$ to - $S$ can be written as -\begin{align*} -V(S\cup \{i\}) - V(S)& = \frac{1}{2}\log\det(I_d - + \T{X_S}X_S + x_i\T{x_i}) - - \frac{1}{2}\log\det(I_d + \T{X_S}X_S)\\ - & = \frac{1}{2}\log\det(I_d + x_i\T{x_i}(I_d + -\T{X_S}X_S)^{-1}) - = \frac{1}{2}\log(1 + \T{x_i}A(S)^{-1}x_i) -\end{align*} -where $A(S) \defeq I_d+ \T{X_S}X_S$, and the last equality follows from the -Sylvester's determinant identity~\cite{sylvester}. -% $ V(S\cup\{i\}) - V(S) = \frac{1}{2}\log\left(1 + \T{x_i} A(S)^{-1}x_i\right)$. -Using this, - \begin{displaymath} - \partial_i F(\lambda) = \frac{1}{2} - \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}} - P_{\mathcal{N}\setminus\{i\}}^\lambda(S) - \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big) - \end{displaymath} - The computation of the derivative of $L$ uses standard matrix - calculus: writing $\tilde{A}(\lambda) = I_d+\sum_{i\in - \mathcal{N}}\lambda_ix_i\T{x_i}$, - \begin{displaymath} - \det \tilde{A}(\lambda + h\cdot e_i) = \det\big(\tilde{A}(\lambda) - + hx_i\T{x_i}\big) - =\det \tilde{A}(\lambda)\big(1+ - h\T{x_i}\tilde{A}(\lambda)^{-1}x_i\big). - \end{displaymath} - Hence, - \begin{displaymath} - \log\det\tilde{A}(\lambda + h\cdot e_i)= \log\det\tilde{A}(\lambda) - + h\T{x_i}\tilde{A}(\lambda)^{-1}x_i + o(h), - \end{displaymath} - which implies - \begin{displaymath} - \partial_i L(\lambda) - =\frac{1}{2} \T{x_i}\tilde{A}(\lambda)^{-1}x_i. - \end{displaymath} - -For two symmetric matrices $A$ and $B$, we write $A\succ B$ ($A\succeq B$) if -$A-B$ is positive definite (positive semi-definite). This order allows us to -define the notion of a \emph{decreasing} as well as \emph{convex} matrix -function, similarly to their real counterparts. With this definition, matrix -inversion is decreasing and convex over symmetric positive definite -matrices (see Example 3.48 p. 110 in \cite{boyd2004convex}). -In particular, -\begin{gather*} - \forall S\subseteq\mathcal{N},\quad A(S)^{-1} \succeq A(S\cup\{i\})^{-1} -\end{gather*} -as $A(S)\preceq A(S\cup\{i\})$. Observe that - % \begin{gather*} - % \forall -$P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\geq P_{\mathcal{N}\setminus\{i\}}^\lambda(S\cup\{i\})$ for all $S\subseteq\mathcal{N}\setminus\{i\}$, and - % ,\\ - $P_{\mathcal{N}\setminus\{i\}}^\lambda(S) \geq P_\mathcal{N}^\lambda(S),$ for all $S\subseteq\mathcal{N}$. - %\end{gather*} - Hence, - \begin{align*} - \partial_i F(\lambda) - % & = \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}} P_\mathcal{N}^\lambda(S) \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big)\\ - & \geq \frac{1}{4} - \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}} - P_{\mathcal{N}\setminus\{i\}}^\lambda(S) - \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big)\\ - &\hspace{-3.5em}+\frac{1}{4} - \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}} - P_{\mathcal{N}\setminus\{i\}}^\lambda(S\cup\{i\}) - \log\Big(1 + \T{x_i}A(S\cup\{i\})^{-1}x_i\Big)\\ - &\geq \frac{1}{4} - \sum_{S\subseteq\mathcal{N}} - P_\mathcal{N}^\lambda(S) - \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big). - \end{align*} - Using that $A(S)\succeq I_d$ we get that $\T{x_i}A(S)^{-1}x_i \leq - \norm{x_i}_2^2 \leq 1$. Moreover, $\log(1+x)\geq x$ for all $x\leq 1$. - Hence, - \begin{displaymath} - \partial_i F(\lambda) \geq - \frac{1}{4} - \T{x_i}\bigg(\sum_{S\subseteq\mathcal{N}}P_\mathcal{N}^\lambda(S)A(S)^{-1}\bigg)x_i. - \end{displaymath} - Finally, using that the inverse is a matrix convex function over symmetric - positive definite matrices: - \begin{displaymath} - \partial_i F(\lambda) \geq - \frac{1}{4} - \T{x_i}\bigg(\sum_{S\subseteq\mathcal{N}}P_\mathcal{N}^\lambda(S)A(S)\bigg)^{-1}x_i - = \frac{1}{4}\T{x_i}\tilde{A}(\lambda)^{-1}x_i - = \frac{1}{2} - \partial_i L(\lambda). - \end{displaymath} - -Having bound the ratio between the partial derivatives, we now bound the ratio $F(\lambda)/L(\lambda)$ from below. Consider the following cases. - First, if the minimum of the ratio - $F(\lambda)/L(\lambda)$ is attained at a point interior to the hypercube, then it is - a critical point, \emph{i.e.}, $\partial_i \big(F(\lambda)/L(\lambda)\big)=0$ for all $i\in \mathcal{N}$; hence, at such a critical point: - \begin{equation}\label{eq:lhopital} - \frac{F(\lambda)}{L(\lambda)} - = \frac{\partial_i F(\lambda)}{\partial_i - L(\lambda)} \geq \frac{1}{2}. - \end{equation} - Second, if the minimum is attained as - $\lambda$ converges to zero in, \emph{e.g.}, the $l_2$ norm, by the Taylor approximation, one can write: - \begin{displaymath} - \frac{F(\lambda)}{L(\lambda)} - \sim_{\lambda\rightarrow 0} - \frac{\sum_{i\in \mathcal{N}}\lambda_i\partial_i F(0)} - {\sum_{i\in\mathcal{N}}\lambda_i\partial_i L(0)} - \geq \frac{1}{2}, - \end{displaymath} - \emph{i.e.}, the ratio $\frac{F(\lambda)}{L(\lambda)}$ is necessarily bounded from below by 1/2 for small enough $\lambda$. - Finally, if the minimum is attained on a face of the hypercube $[0,1]^n$ (a face is - defined as a subset of the hypercube where one of the variable is fixed to - 0 or 1), without loss of generality, we can assume that the minimum is - attained on the face where the $n$-th variable has been fixed - to 0 or 1. Then, either the minimum is attained at a point interior to the - face or on a boundary of the face. In the first sub-case, relation - \eqref{eq:lhopital} still characterizes the minimum for $i< n$. - In the second sub-case, by repeating the argument again by induction, we see - that all is left to do is to show that the bound holds for the vertices of - the cube (the faces of dimension 1). The vertices are exactly the binary - points, for which we know that both relaxations are equal to the value - function $V$. Hence, the ratio is equal to 1 on the vertices. -\end{proof} - -We now prove that $F$ admits the following exchange property: let -$\lambda$ be a feasible element of $[0,1]^n$, it is possible to trade one -fractional component of $\lambda$ for another until one of them becomes -integral, obtaining a new element $\tilde{\lambda}$ which is both feasible and -for which $F(\tilde{\lambda})\geq F(\lambda)$. Here, by feasibility of a point -$\lambda$, we mean that it satisfies the budget constraint $\sum_{i=1}^n -\lambda_i c_i \leq B$. This rounding property is referred to in the literature -as \emph{cross-convexity} (see, \emph{e.g.}, \cite{dughmi}), or -$\varepsilon$-convexity by \citeN{pipage}. - -\begin{lemma}[Rounding]\label{lemma:rounding} - For any feasible $\lambda\in[0,1]^{n}$, there exists a feasible - $\bar{\lambda}\in[0,1]^{n}$ such that at most one of its components is - fractional %, that is, lies in $(0,1)$ and: - and $F_{\mathcal{N}}(\lambda)\leq F_{\mathcal{N}}(\bar{\lambda})$. -\end{lemma} -\begin{proof} - We give a rounding procedure which, given a feasible $\lambda$ with at least - two fractional components, returns some feasible $\lambda'$ with one less fractional - component such that $F(\lambda) \leq F(\lambda')$. - - Applying this procedure recursively yields the lemma's result. - Let us consider such a feasible $\lambda$. Let $i$ and $j$ be two - fractional components of $\lambda$ and let us define the following - function: - \begin{displaymath} - F_\lambda(\varepsilon) = F(\lambda_\varepsilon) - \quad\textrm{where} \quad - \lambda_\varepsilon = \lambda + \varepsilon\left(e_i-\frac{c_i}{c_j}e_j\right) - \end{displaymath} - It is easy to see that if $\lambda$ is feasible, then: - \begin{equation}\label{eq:convex-interval} - \forall\varepsilon\in\Big[\max\Big(-\lambda_i,(\lambda_j-1)\frac{c_j}{c_i}\Big), \min\Big(1-\lambda_i, \lambda_j - \frac{c_j}{c_i}\Big)\Big],\; - \lambda_\varepsilon\;\;\textrm{is feasible} - \end{equation} - Furthermore, the function $F_\lambda$ is convex; indeed: - \begin{align*} - F_\lambda(\varepsilon) - & = \mathbb{E}_{S'\sim P_{\mathcal{N}\setminus\{i,j\}}^\lambda(S')}\Big[ - (\lambda_i+\varepsilon)\Big(\lambda_j-\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{i,j\})\\ - & + (\lambda_i+\varepsilon)\Big(1-\lambda_j+\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{i\}) - + (1-\lambda_i-\varepsilon)\Big(\lambda_j-\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{j\})\\ - & + (1-\lambda_i-\varepsilon)\Big(1-\lambda_j+\varepsilon\frac{c_i}{c_j}\Big)V(S')\Big] - \end{align*} - Thus, $F_\lambda$ is a degree 2 polynomial whose dominant coefficient is: - \begin{displaymath} - \frac{c_i}{c_j}\mathbb{E}_{S'\sim - P_{\mathcal{N}\setminus\{i,j\}}^\lambda(S')}\Big[ - V(S'\cup\{i\})+V(S'\cup\{i\})\\ - -V(S'\cup\{i,j\})-V(S')\Big] - \end{displaymath} - which is positive by submodularity of $V$. Hence, the maximum of - $F_\lambda$ over the interval given in \eqref{eq:convex-interval} is - attained at one of its limit, at which either the $i$-th or $j$-th component of - $\lambda_\varepsilon$ becomes integral. -\end{proof} - -Using Lemma~\ref{lemma:rounding}, we can relate the multi-linear extension to -$OPT$, and Lemma~\ref{lemma:relaxation-ratio} relates our relaxation $L$ to the -multi-linear extension. Putting these together gives us the following result: +The key property of our relaxation $L$ is that it has a bounded approximation +ratio to the multi-linear relaxation $F$. This is one of our main technical +contributions and is stated and proved in Lemma~\ref{lemma:relaxation-ratio} +found in Appendix. Moreover, the multi-linear relaxation $F$ has an exchange +property (see Lemma~\ref{lemma:rounding}) which allows us to relate its value to +$OPT$ through rounding. Together, these properties give the following +proposition which is also proved in the Appendix. \begin{proposition}\label{prop:relaxation} $L^*_c \leq 2 OPT + 2\max_{i\in\mathcal{N}}V(i)$. \end{proposition} -\begin{proof} -Let us consider a feasible point $\lambda^*\in[0,1]^{n}$ such that -$L(\lambda^*) = L^*_c$. By applying Lemma~\ref{lemma:relaxation-ratio} and -Lemma~\ref{lemma:rounding} we get a feasible point $\bar{\lambda}$ with at most -one fractional component such that -\begin{equation}\label{eq:e1} - L(\lambda^*) \leq 2 F(\bar{\lambda}). -\end{equation} - Let $\lambda_i$ denote the fractional component of $\bar{\lambda}$ and $S$ - denote the set whose indicator vector is $\bar{\lambda} - \lambda_i e_i$. - By definition of the multi-linear extension $F$: - \begin{displaymath} - F(\bar{\lambda}) = (1-\lambda_i)V(S) +\lambda_i V(S\cup\{i\}). - \end{displaymath} - By submodularity of $V$, $V(S\cup\{i\})\leq V(S) + V(\{i\})$. Hence, - \begin{displaymath} - F(\bar{\lambda}) \leq V(S) + V(i). - \end{displaymath} - Note that since $\bar{\lambda}$ is feasible, $S$ is also feasible and - $V(S)\leq OPT$. Hence, - \begin{equation}\label{eq:e2} - F(\bar{\lambda}) \leq OPT + \max_{i\in\mathcal{N}} V(i). - \end{equation} - Together, \eqref{eq:e1} and \eqref{eq:e2} imply the lemma. -\end{proof} - -\subsection{A monotonous estimator}\label{sec:monotonicity} +\subsection{Solving a convex problem monotonously}\label{sec:monotonicity} -The $\log\det$ function is concave and self-concordant (see -\cite{boyd2004convex}), in this case, the analysis of the barrier method in -in \cite{boyd2004convex} (Section 11.5.5) can be summarized in the following -lemma: +Note, that the feasible set in Problem~\eqref{eq:primal} increases (for the +inclusion) when the cost decreases. As a consequence, $c\mapsto L^*_c$ is +non-increasing. -\begin{lemma}\label{lemma:barrier} -For any $\varepsilon>0$, the barrier method computes an $\varepsilon$-accurate -approximation of $L^*_c$ in time $O(poly(n,d,\log\log\varepsilon^{-1})$. -\end{lemma} +Furthermore, \eqref{eq:primal} being a convex optimization problem, using +standard convex optimization algorithms (Lemma~\ref{lemma:barrier} gives +a formal statement for our specific problem) we can compute +a $\varepsilon$-accurate approximation of its optimal value as defined below. -Note however that even though $L^*_c$ is non-decreasing along coordinate axis -(if one of the cost decreases, then the feasible set of \eqref{eq:primal} -increases), this will not necessarily be the case for an $\varepsilon$-accurate -approximation of $L^*_c$ and Lemma~\ref{lemma:barrier} in itself is not -sufficient to provide an approximation satisfying the -properties requested at the beginning of Section~\ref{sec:approximation}. +\begin{definition} +$a$ is an $\varepsilon$-accurate approximation of $b$ iff $|a-b|\leq \varepsilon$. +\end{definition} -The estimator we will construct in this section will have a slightly weaker -form of coordinate-wise monotonicity: \emph{$\delta$-monotonicity}. +Note however that an $\varepsilon$-accurate approximation of a non-increasing +function is not in general non-increasing itself. The goal of this section is +to approximate $L^*_c$ while preserving monotonicity. The estimator we +construct has a weaker form of monotonicity that we call +\emph{$\delta$-monotonicity}. \begin{definition} Let $f$ be a function from $\reals^n$ to $\reals$, we say that $f$ is -\emph{$\delta$-increasing} iff: -\begin{displaymath} +\emph{$\delta$-increasing along the $i$-th coordinate} iff: +\begin{equation}\label{eq:dd} \forall x\in\reals^n,\; \forall \mu\geq\delta,\; - \forall i\in\{1,\ldots,n\},\; f(x+\mu e_i)\geq f(x) -\end{displaymath} -where $e_i$ is the $i$-th basis vector of $\reals^n$. We define -\emph{$\delta$-decreasing} functions similarly. +\end{equation} +where $e_i$ is the $i$-th canonical basis vector of $\reals^n$. By extension, +$f$ is $\delta$-increasing iff it is $\delta$-increasing along all coordinates. + +We define \emph{$\delta$-decreasing} functions by reversing the inequality in +\eqref{eq:dd}. \end{definition} -For the ease of presentation, we normalize the costs by dividing them by the -budget $B$ so that the budget constraint in \eqref{eq:primal} now reads -$\T{c}\lambda\leq 1$. We consider a perturbation of \eqref{eq:primal} by -introducing: +We consider a perturbation of \eqref{eq:primal} by introducing: \begin{equation}\tag{$P_{c, \alpha}$}\label{eq:perturbed-primal} L^*_c(\alpha) \defeq \max_{\lambda\in[\alpha, 1]^{n}} \left\{L(\lambda) \Big| \sum_{i=1}^{n} \lambda_i c_i - \leq 1\right\} + \leq B\right\} \end{equation} Note that we have $L^*_c = L^*_c(0)$. We will also assume that -$\alpha<\frac{1}{n}$ so that \eqref{eq:perturbed-primal} has at least one -feasible point: $(\frac{1}{n},\ldots,\frac{1}{n})$. - -The $\delta$-decreasing approximation of $L^*_c$ is obtained by computing an -approximate solution of \eqref{eq:perturbed-primal}. +$\alpha<\frac{1}{nB}$ so that \eqref{eq:perturbed-primal} has at least one +feasible point: $(\frac{1}{nB},\ldots,\frac{1}{nB})$. By computing +an approximation of $L^*_c(\alpha)$ as in Algorithm~\ref{alg:monotone}, we +obtain a $\delta$-decreasing approximation of $L^*_c$. -\begin{algorithm}[h] +\begin{algorithm}[t] \caption{}\label{alg:monotone} \begin{algorithmic}[1] - \State $\alpha \gets \varepsilon(\delta+n^2)^{-1} $ + \State $\alpha \gets \varepsilon B^{-1}(\delta+n^2)^{-1} $ \State Compute a $\frac{1}{2^{n+1}}\alpha\delta b$-accurate approximation of - $L^*_c(\alpha)$ using the barrier method + $L^*_c(\alpha)$ \end{algorithmic} \end{algorithm} @@ -399,231 +153,3 @@ approximate solution of \eqref{eq:perturbed-primal}. \end{proposition} -We show that the optimal value of \eqref{eq:perturbed-primal} is close to the -optimal value of \eqref{eq:primal} (Lemma~\ref{lemma:proximity}) while being -well-behaved with respect to changes of the cost -(Lemma~\ref{lemma:monotonicity}). These lemmas together imply -Proposition~\ref{prop:monotonicity}. - - -\begin{lemma}\label{lemma:derivative-bounds} - Let $\partial_i L(\lambda)$ denote the $i$-th derivative of $L$, for $i\in\{1,\ldots, n\}$, then: - \begin{displaymath} - \forall\lambda\in[0, 1]^n,\;\frac{b}{2^n} \leq \partial_i L(\lambda) \leq 1 - \end{displaymath} -\end{lemma} - -\begin{proof} - Let us define: - \begin{displaymath} - S(\lambda)\defeq I_d + \sum_{i=1}^n \lambda_i x_i\T{x_i} - \quad\mathrm{and}\quad - S_k \defeq I_d + \sum_{i=1}^k x_i\T{x_i} - \end{displaymath} - - We have $\partial_i L(\lambda) = \T{x_i}S(\lambda)^{-1}x_i$. Since - $S(\lambda)\geq I_d$, $\partial_i L(\lambda)\leq \T{x_i}x_i \leq 1$, which - is the right-hand side of the lemma. - - For the left-hand side, note that $S(\lambda) \leq S_n$. Hence - $\partial_iL(\lambda)\geq \T{x_i}S_n^{-1}x_i$. - - Using the Sherman-Morrison formula, for all $k\geq 1$: - \begin{displaymath} - \T{x_i}S_k^{-1} x_i = \T{x_i}S_{k-1}^{-1}x_i - - \frac{(\T{x_i}S_{k-1}^{-1}x_k)^2}{1+\T{x_k}S_{k-1}^{-1}x_k} - \end{displaymath} - - By the Cauchy-Schwarz inequality: - \begin{displaymath} - (\T{x_i}S_{k-1}^{-1}x_k)^2 \leq \T{x_i}S_{k-1}^{-1}x_i\;\T{x_k}S_{k-1}^{-1}x_k - \end{displaymath} - - Hence: - \begin{displaymath} - \T{x_i}S_k^{-1} x_i \geq \T{x_i}S_{k-1}^{-1}x_i - - \T{x_i}S_{k-1}^{-1}x_i\frac{\T{x_k}S_{k-1}^{-1}x_k}{1+\T{x_k}S_{k-1}^{-1}x_k} - \end{displaymath} - - But $\T{x_k}S_{k-1}^{-1}x_k\leq 1$ and $\frac{a}{1+a}\leq \frac{1}{2}$ if - $0\leq a\leq 1$, so: - \begin{displaymath} - \T{x_i}S_{k}^{-1}x_i \geq \T{x_i}S_{k-1}^{-1}x_i - - \frac{1}{2}\T{x_i}S_{k-1}^{-1}x_i\geq \frac{\T{x_i}S_{k-1}^{-1}x_i}{2} - \end{displaymath} - - By induction: - \begin{displaymath} - \T{x_i}S_n^{-1} x_i \geq \frac{\T{x_i}x_i}{2^n} - \end{displaymath} - - Using that $\T{x_i}{x_i}\geq b$ concludes the proof of the left-hand side - of the lemma's inequality. -\end{proof} - -Let us introduce the lagrangian of problem, \eqref{eq:perturbed-primal}: - -\begin{displaymath} - \mathcal{L}_{c, \alpha}(\lambda, \mu, \nu, \xi) \defeq L(\lambda) - + \T{\mu}(\lambda-\alpha\mathbf{1}) + \T{\nu}(\mathbf{1}-\lambda) + \xi(1-\T{c}\lambda) -\end{displaymath} -so that: -\begin{displaymath} - L^*_c(\alpha) = \min_{\mu, \nu, \xi\geq 0}\max_\lambda \mathcal{L}_{c, \alpha}(\lambda, \mu, \nu, \xi) -\end{displaymath} -Similarly, we define $\mathcal{L}_{c}\defeq\mathcal{L}_{c, 0}$ the lagrangian of \eqref{eq:primal}. - -Let $\lambda^*$ be primal optimal for \eqref{eq:perturbed-primal}, and $(\mu^*, -\nu^*, \xi^*)$ be dual optimal for the same problem. In addition to primal and -dual feasibility, the KKT conditions give $\forall i\in\{1, \ldots, n\}$: -\begin{gather*} - \partial_i L(\lambda^*) + \mu_i^* - \nu_i^* - \xi^* c_i = 0\\ - \mu_i^*(\lambda_i^* - \alpha) = 0\\ - \nu_i^*(1 - \lambda_i^*) = 0 -\end{gather*} - -\begin{lemma}\label{lemma:proximity} -We have: -\begin{displaymath} - L^*_c - \alpha n^2\leq L^*_c(\alpha) \leq L^*_c -\end{displaymath} -In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n^2$. -\end{lemma} - -\begin{proof} - $\alpha\mapsto L^*_c(\alpha)$ is a decreasing function as it is the - maximum value of the $L$ function over a set-decreasing domain, which gives - the rightmost inequality. - - Let $\mu^*, \nu^*, \xi^*$ be dual optimal for $(P_{c, \alpha})$, that is: - \begin{displaymath} - L^*_{c}(\alpha) = \max_\lambda \mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*) - \end{displaymath} - - Note that $\mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*) - = \mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*) - - \alpha\T{\mathbf{1}}\mu^*$, and that for any $\lambda$ feasible for - problem \eqref{eq:primal}, $\mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*) - \geq L(\lambda)$. Hence, - \begin{displaymath} - L^*_{c}(\alpha) \geq L(\lambda) - \alpha\T{\mathbf{1}}\mu^* - \end{displaymath} - for any $\lambda$ feasible for \eqref{eq:primal}. In particular, for $\lambda$ primal optimal for $\eqref{eq:primal}$: - \begin{equation}\label{eq:local-1} - L^*_{c}(\alpha) \geq L^*_c - \alpha\T{\mathbf{1}}\mu^* - \end{equation} - - Let us denote by the $M$ the support of $\mu^*$, that is $M\defeq - \{i|\mu_i^* > 0\}$, and by $\lambda^*$ a primal optimal point for - $\eqref{eq:perturbed-primal}$. From the KKT conditions we see that: - \begin{displaymath} - M \subseteq \{i|\lambda_i^* = \alpha\} - \end{displaymath} - - - Let us first assume that $|M| = 0$, then $\T{\mathbf{1}}\mu^*=0$ and the lemma follows. - - We will now assume that $|M|\geq 1$. In this case $\T{c}\lambda^* - = 1$, otherwise we could increase the coordinates of $\lambda^*$ in $M$, - which would increase the value of the objective function and contradict the - optimality of $\lambda^*$. Note also, that $|M|\leq n-1$, otherwise, since - $\alpha< \frac{1}{n}$, we would have $\T{c}\lambda^*\ < 1$, which again - contradicts the optimality of $\lambda^*$. Let us write: - \begin{displaymath} - 1 = \T{c}\lambda^* = \alpha\sum_{i\in M}c_i + \sum_{i\in \bar{M}}\lambda_i^*c_i - \leq \alpha |M| + (n-|M|)\max_{i\in \bar{M}} c_i - \end{displaymath} - That is: - \begin{equation}\label{local-2} - \max_{i\in\bar{M}} c_i \geq \frac{1 - |M|\alpha}{n-|M|}> \frac{1}{n} - \end{equation} - where the last inequality uses again that $\alpha<\frac{1}{n}$. From the - KKT conditions, we see that for $i\in M$, $\nu_i^* = 0$ and: - \begin{equation}\label{local-3} - \mu_i^* = \xi^*c_i - \partial_i L(\lambda^*)\leq \xi^*c_i\leq \xi^* - \end{equation} - since $\partial_i L(\lambda^*)\geq 0$ and $c_i\leq 1$. - - Furthermore, using the KKT conditions again, we have that: - \begin{equation}\label{local-4} - \xi^* \leq \inf_{i\in \bar{M}}\frac{\partial_i L(\lambda^*)}{c_i}\leq \inf_{i\in \bar{M}} \frac{1}{c_i} - = \frac{1}{\max_{i\in\bar{M}} c_i} - \end{equation} - where the last inequality uses Lemma~\ref{lemma:derivative-bounds}. - - Combining \eqref{local-2}, \eqref{local-3} and \eqref{local-4}, we get that: - \begin{displaymath} - \sum_{i\in M}\mu_i^* \leq |M|\xi^* \leq n\xi^*\leq \frac{n}{\max_{i\in\bar{M}} c_i} \leq n^2 - \end{displaymath} - - This implies that: - \begin{displaymath} - \T{\mathbf{1}}\mu^* = \sum_{i=1}^n \mu^*_i = \sum_{i\in M}\mu_i^*\leq n^2 - \end{displaymath} - which in addition to \eqref{eq:local-1} proves the lemma. -\end{proof} - -\begin{lemma}\label{lemma:monotonicity} - If $c'$ = $(c_i', c_{-i})$, with $c_i'\leq c_i - \delta$, we have: - \begin{displaymath} - L^*_{c'}(\alpha) \geq L^*_c(\alpha) + \frac{\alpha\delta b}{2^n} - \end{displaymath} -\end{lemma} - -\begin{proof} - Let $\mu^*, \nu^*, \xi^*$ be dual optimal for $(P_{c', \alpha})$. Noting that: - \begin{displaymath} - \mathcal{L}_{c', \alpha}(\lambda, \mu^*, \nu^*, \xi^*) \geq - \mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*) + \lambda_i\xi^*\delta, - \end{displaymath} - we get similarly to Lemma~\ref{lemma:proximity}: - \begin{displaymath} - L^*_{c'}(\alpha) \geq L(\lambda) + \lambda_i\xi^*\delta - \end{displaymath} - for any $\lambda$ feasible for \eqref{eq:perturbed-primal}. In particular, for $\lambda^*$ primal optimal for \eqref{eq:perturbed-primal}: - \begin{displaymath} - L^*_{c'}(\alpha) \geq L^*_{c}(\alpha) + \alpha\xi^*\delta - \end{displaymath} - since $\lambda_i^*\geq \alpha$. - - Using the KKT conditions for $(P_{c', \alpha})$, we can write: - \begin{displaymath} - \xi^* = \inf_{i:\lambda^{'*}_i>\alpha} \frac{\T{x_i}S(\lambda^{'*})^{-1}x_i}{c_i'} - \end{displaymath} - with $\lambda^{'*}$ optimal for $(P_{c', \alpha})$. Since $c_i'\leq 1$, using Lemma~\ref{lemma:derivative-bounds}, we get that $\xi^*\geq \frac{b}{2^n}$, which concludes the proof. -\end{proof} - - -\subsubsection*{End of the proof of Proposition~\ref{prop:monotonicity}} - -Let $\tilde{L}^*_c$ be the approximation computed by -Algorithm~\ref{alg:monotone}. -\begin{enumerate} - \item using Lemma~\ref{lemma:proximity}: -\begin{displaymath} - |\tilde{L}^*_c - L^*_c| \leq |\tilde{L}^*_c - L^*_c(\alpha)| + |L^*_c(\alpha) - L^*_c| - \leq \alpha\delta + \alpha n^2 = \varepsilon -\end{displaymath} -which proves the $\varepsilon$-accuracy. - -\item for the $\delta$-decreasingness, let $c' = (c_i', c_{-i})$ with $c_i'\leq - c_i-\delta$, then: -\begin{displaymath} - \tilde{L}^*_{c'} \geq L^*_{c'} - \frac{\alpha\delta b}{2^{n+1}} - \geq L^*_c + \frac{\alpha\delta b}{2^{n+1}} - \geq \tilde{L}^*_c -\end{displaymath} -where the first and inequality come from the accuracy of the approximation, and -the inner inequality follows from Lemma~\ref{lemma:monotonicity}. - -\item the accuracy of the approximation $\tilde{L}^*_c$ is: -\begin{displaymath} - A\defeq\frac{\varepsilon\delta b}{2^{n+1}(\delta + n^2)} -\end{displaymath} - -Note that: -\begin{displaymath} - \log\log A^{-1} = O\bigg(\log\log\frac{1}{\varepsilon\delta b} + \log n\bigg) -\end{displaymath} -Using Lemma~\ref{lemma:barrier} concludes the proof of the running time.\qed -\end{enumerate} diff --git a/definitions.tex b/definitions.tex index a05edae..d474853 100644 --- a/definitions.tex +++ b/definitions.tex @@ -8,7 +8,7 @@ \newtheorem{theorem}[lemma]{Theorem} \newtheorem{corollary}[lemma]{Corollary} \theoremstyle{definition} -\newtheorem*{definition}{Definition} +\newtheorem{definition}[lemma]{Definition} \newcommand{\citeN}{\citet} %\newcommand*{\defeq}{\stackrel{\text{def}}{=}} \newcommand*{\defeq}{\equiv} diff --git a/intro.tex b/intro.tex index 6002b46..0712540 100644 --- a/intro.tex +++ b/intro.tex @@ -68,7 +68,7 @@ From a technical perspective, we present a convex relaxation of \eqref{obj}, and %Our approach to mechanisms for experimental design --- by % optimizing the information gain in parameters like $\beta$ which are estimated through the data analysis process --- is general. We give examples of this approach beyond linear regression to a general class that includes logistic regression and learning binary functions, and show that the corresponding budgeted mechanism design problem is also expressed through a submodular optimization. Hence, prior work \cite{chen,singer-mechanisms} immediately applies, and gives randomized, universally truthful, polynomial time, constant factor approximation mechanisms for problems in this class. Getting deterministic, truthful, polynomial time mechanisms with a constant approximation factor for this class or specific problems in it, like we did for \EDP, remains an open problem. -In what follows, we describe related work in Section~\ref{sec:related}. We briefly review experimental design and budget feasible mechanisms in Section~\ref{sec:peel} and define \SEDP\ formally. In Section~\ref{sec:main} we present our mechanism for \SEDP\ and state our main results, which are proved in Section~\ref{sec:proofs}. A generalization of our framework to machine learning tasks beyond linear regression is presented in Section~\ref{sec:ext}. +In what follows, we describe related work in Section~\ref{sec:related}. We briefly review experimental design and budget feasible mechanisms in Section~\ref{sec:peel} and define \SEDP\ formally. In Section~\ref{sec:main} we present our mechanism for \SEDP\ and state our main results. A generalization of our framework to machine learning tasks beyond linear regression is presented in Section~\ref{sec:ext}. \junk{ diff --git a/main.tex b/main.tex index 0e8457c..ff26c5d 100644 --- a/main.tex +++ b/main.tex @@ -53,9 +53,9 @@ Instead, \citeN{singer-mechanisms} and \citeN{chen} suggest to approximate $OPT_{-i^*}$ by a quantity $OPT'_{-i^*}$ satisfying the following properties: \begin{itemize} - \item $OPT'_{-i^*}$ must not depend on agent $i^*$'s reported - cost and must be monotone: it can only increase when agents decrease - their costs. This is to ensure the monotonicity of the allocation function. + \item $OPT'_{-i^*}$ must not depend on agent $i^*$'s reported cost and must + be decreasing with respect to the costs. This is to ensure the monotonicity + of the allocation function. \item $OPT'_{-i^*}$ must be close to $OPT_{-i^*}$ to maintain a bounded approximation ratio. \item $OPT'_{-i^*}$ must be computable in polynomial time. @@ -74,18 +74,23 @@ $\delta$-decreasing, the resulting mechanism will be $\delta$-truthful as defined below. \begin{definition} -Let $\mathcal{M}= (S, p)$ a mechanism, and let $s$ denote the indicator -function of $S$ ($s_i(c) = \id_{i\in S(c)}$). We say that $\mathcal{M}$ is +Let $\mathcal{M}= (S, p)$ a mechanism, and let $s_i$ denote the indicator +function of $i\in S(c)$, $s_i(c) \defeq \id_{i\in S(c)}$. We say that $\mathcal{M}$ is $\delta$-truthful iff: \begin{displaymath} \forall c\in\reals_+^n,\; -\forall\mu\geq\delta,\; +\forall\mu\;\text{with}\; |\mu|\geq\delta,\; \forall i\in\{1,\ldots,n\},\; p(c)-s_i(c)\cdot c_i \geq p(c+\mu e_i) - s_i(c+\mu e_i)\cdot c_i \end{displaymath} -where $e_i$ is the $i$-th basis vector of $\reals^n$. +where $e_i$ is the $i$-th canonical basis vector of $\reals^n$. \end{definition} +Intuitively, $\delta$-truthfulness implies that agents have no incentive to lie +by more than $\delta$ about their reported costs. In practical applications, +the bids being discretized, if $\delta$ is smaller than the discretization +step, the mechanism will be truthful in effect. + $\delta$-truthfulness will follow from $\delta$-monotonicity by the following variant of Myerson's theorem. diff --git a/paper.tex b/paper.tex index 8a8775c..7b821b6 100644 --- a/paper.tex +++ b/paper.tex @@ -5,9 +5,9 @@ \usepackage{amsmath,amsfonts} \usepackage{algorithm, algpseudocode} \usepackage{bbm,color,verbatim} -\usepackage[pagebackref=true,breaklinks=true,colorlinks=true]{hyperref} \usepackage{amsthm} \input{definitions} +\usepackage[pagebackref=true,breaklinks=true,colorlinks=true]{hyperref} \title{Budget Feasible Mechanisms for Experimental Design} \author{ Thibaut Horel\\École Normale Supérieure\\\texttt{thibaut.horel@ens.fr} @@ -33,10 +33,8 @@ \input{problem} \section{Approximation results}\label{sec:approximation} \input{approximation} -\section{Mechanism for \SEDP{}} +\section{Mechanism for \SEDP{}}\label{sec:mechanism} \input{main} -\section{Proofs}\label{sec:proofs} -\input{proofs} \section{Extensions}\label{sec:ext} \input{general} %\section{Conclusion} @@ -44,6 +42,6 @@ \bibliographystyle{abbrvnat} \bibliography{notes} \newpage -\section*{Appendix} +\section{Appendix} \input{appendix} \end{document} diff --git a/problem.tex b/problem.tex index 6bfac6b..a5db2ac 100644 --- a/problem.tex +++ b/problem.tex @@ -146,22 +146,20 @@ $ \mathcal{N}$ of experiments to be purchased, while the payment function returns a vector of payments $[p_i(c)]_{i\in \mathcal{N}}$. Let $s_i(c) = \id_{i\in S(c)}$ be the binary indicator of $i\in S(c)$. As usual, we seek mechanisms that have the following properties \cite{singer-mechanisms}: \begin{itemize} -\item \emph{Normalization.} Unallocated experiments receive zero payments, \emph{i.e.}, - \begin{align}s_i(c)=0\text{ implies }p_i(c)=0.\label{normal}\end{align} -\item\emph{Individual Rationality.} Payments for allocated experiments exceed costs, \emph{i.e.}, - \begin{align} p_i(c)\geq c_i\cdot s_i(c).\label{ir}\end{align} -\item \emph{No Positive Transfers.} Payments are non-negative, \emph{i.e.}, -\begin{align}p_i(c)\geq 0\label{pt}.\end{align} - \item \emph{Truthfulness/Incentive Compatibility.} An experiment subject has no incentive to missreport her true cost. Formally, let $c_{-i}$ - be a vector of costs of all agents except $i$. Then, % $c_{-i}$ being the same). Let $[p_i']_{i\in \mathcal{N}} = p(c_i', -% c_{-i})$, then the -\begin{align} p_i(c_i,c_{-i}) - s_i(c_i,c_{-i})\cdot c_i \geq p_i(c_i',c_{-i}) - s_i(c_i',c_{-i})\cdot c_i, \label{truthful}\end{align} for every $i \in \mathcal{N}$ and every two cost vectors $(c_i,c_{-i})$ and $(c_i',c_{-i})$. - \item \emph{Budget Feasibility.} The sum of the payments should not exceed the - budget constraint, \emph{i.e.} - %\begin{displaymath} - \begin{align} \sum_{i\in\mathcal{N}} p_i(c) \leq B.\label{budget}\end{align} - %\end{displaymath} +\item \emph{Normalization.} Unallocated experiments receive zero payments: $s_i(c)=0\text{ implies }p_i(c)=0.\label{normal}$ +\item\emph{Individual Rationality.} Payments for allocated experiments exceed +costs: $p_i(c)\geq c_i\cdot s_i(c).\label{ir}$ +\item \emph{No Positive Transfers.} Payments are non-negative: $p_i(c)\geq 0\label{pt}$. +\item \emph{Truthfulness/Incentive Compatibility.} Reporting her true cost is +a dominant strategy for an experiment subject. Formally, let $c_{-i}$ + be a vector of costs of all agents except $i$. Then, $p_i(c_i,c_{-i}) + - s_i(c_i,c_{-i})\cdot c_i \geq p_i(c_i',c_{-i}) - s_i(c_i',c_{-i})\cdot c_i, + \label{truthful}$ for every $i \in \mathcal{N}$ and every two cost vectors $(c_i,c_{-i})$ and $(c_i',c_{-i})$. + \item \emph{Budget Feasibility.} The sum of the payments should not exceed the + budget constraint, \emph{i.e.} $\sum_{i\in\mathcal{N}} p_i(c) \leq + B.\label{budget}$ \end{itemize} + %We define the \emph{Strategic} version of \EDP{} as %\begin{center} %\textsc{StrategicExperimentalDesignProblem} (SEDP) @@ -172,15 +170,13 @@ returns a vector of payments $[p_i(c)]_{i\in \mathcal{N}}$. %\end{align*} %%\end{subequations} %\end{center} + Given that the full information problem \EDP{} is NP-hard, we further seek mechanisms that have the following two additional properties: \begin{itemize} \item \emph{Approximation Ratio.} The value of the allocated set should not be too far from the optimum value of the full information case \eqref{eq:non-strategic}. Formally, there must exist some $\alpha\geq 1$ - such that: - \begin{displaymath} - OPT \leq \alpha V(S). - \end{displaymath} + such that $OPT \leq \alpha V(S)$. % The approximation ratio captures the \emph{price of truthfulness}, \emph{i.e.}, the relative value loss incurred by adding the truthfulness constraint. % to the value function maximization. We stress here that the approximation ratio is defined with respect \emph{to the maximal value in the full information case}. \item \emph{Computational Efficiency.} The allocation and payment function @@ -614,19 +610,6 @@ TODO? Explain what are the points which are the most valuable : points which are aligned along directions where the spread over the already existing points is small. -\subsection{Auction} - -TODO Explain the optimization problem, why it has to be formulated as an auction -problem. Explain the goals: -\begin{itemize} - \item truthful - \item individually rational - \item budget feasible - \item has a good approximation ratio - -TODO Explain what is already known: it is ok when the function is submodular. When -should we introduce the notion of submodularity? -\end{itemize} \end{comment} diff --git a/proofs.tex b/proofs.tex index 4cefc25..e69de29 100644 --- a/proofs.tex +++ b/proofs.tex @@ -1,122 +0,0 @@ -\subsection{Proof of Theorem~\ref{thm:main}}\label{sec:proofofmainthm} - -We now present the proof of Theorem~\ref{thm:main}. $\delta$-truthfulness and -individual rationality follow from $\delta$-monotonicity and threshold -payments. $\delta$-monotonicity and budget feasibility follow the same steps as the -analysis of \citeN{chen}; for the sake of completeness, we restate their proof -in the Appendix. - -The complexity of the mechanism is given by the following lemma. - -\begin{lemma}[Complexity]\label{lemma:complexity} - For any $\varepsilon > 0$ and any $\delta>0$, the complexity of the mechanism is - $O\big(poly(n, d, \log\log\frac{1}{b\varepsilon\delta})\big)$ -\end{lemma} - -\begin{proof} - The value function $V$ in \eqref{modified} can be computed in time - $O(\text{poly}(n, d))$ and the mechanism only involves a linear - number of queries to the function $V$. - - By Proposition~\ref{prop:monotonicity}, line 3 of Algorithm~\ref{mechanism} - can be computed in time - $O(\text{poly}(n, d, \log\log \varepsilon^{-1}))$. Hence the allocation - function's complexity is as stated. - %Payments can be easily computed in time $O(\text{poly}(n, d))$ as in prior work. -\junk{ - Using Singer's characterization of the threshold payments - \cite{singer-mechanisms}, one can verify that they can be computed in time - $O(\text{poly}(n, d))$. - } -\end{proof} - -Finally, we prove the approximation ratio of the mechanism. - -We use the following lemma from \cite{chen} which bounds $OPT$ in terms of -the value of $S_G$, as computed in Algorithm \ref{mechanism}, and $i^*$, the -element of maximum value. - -\begin{lemma}[\cite{chen}]\label{lemma:greedy-bound} -Let $S_G$ be the set computed in Algorithm \ref{mechanism} and let -$i^*=\argmax_{i\in\mathcal{N}} V(\{i\})$. We have: -\begin{displaymath} -OPT \leq \frac{e}{e-1}\big( 3 V(S_G) + 2 V(i^*)\big). -\end{displaymath} -\end{lemma} - -Using Proposition~\ref{prop:relaxation} and~\ref{lemma:greedy-bound} we can complete the proof of -Theorem~\ref{thm:main} by showing that, for any $\varepsilon > 0$, if -$OPT_{-i}'$, the optimal value of $L$ when $i^*$ is excluded from -$\mathcal{N}$, has been computed to a precision $\varepsilon$, then the set -$S^*$ allocated by the mechanism is such that: -\begin{equation} \label{approxbound} -OPT -\leq \frac{10e\!-\!3 + \sqrt{64e^2\!-\!24e\!+\!9}}{2(e\!-\!1)} V(S^*)\! -+ \! \varepsilon . -\end{equation} -To see this, let $L^*_{-i^*}$ be the true maximum value of $L$ subject to -$\lambda_{i^*}=0$, $\sum_{i\in \mathcal{N}\setminus{i^*}}c_i\leq B$. From line -3 of Algorithm~\ref{mechanism}, we have -$L^*_{-i^*}-\varepsilon\leq OPT_{-i^*}' \leq L^*_{-i^*}+\varepsilon$. - -If the condition on line 4 of the algorithm holds, then -\begin{displaymath} - V(i^*) \geq \frac{1}{C}L^*_{-i^*}-\frac{\varepsilon}{C} \geq - \frac{1}{C}OPT_{-i^*} -\frac{\varepsilon}{C}. -\end{displaymath} -Indeed, $L^*_{-i^*}\geq OPT_{-i^*}$ as $L$ is a fractional relaxation of $V$. Also, $OPT \leq OPT_{-i^*} + V(i^*)$, -hence, -\begin{equation}\label{eq:bound1} - OPT\leq (1+C)V(i^*) + \varepsilon. -\end{equation} - -If the condition does not hold, by observing that $L^*_{-i^*}\leq L^*_c$ and -applying Proposition~\ref{prop:relaxation}, we get -\begin{displaymath} - V(i^*)\leq \frac{1}{C}L^*_{-i^*} + \frac{\varepsilon}{C} - \leq \frac{1}{C} \big(2 OPT + 2 V(i^*)\big) + \frac{\varepsilon}{C}. -\end{displaymath} -Applying Lemma~\ref{lemma:greedy-bound}, -\begin{displaymath} - V(i^*) \leq \frac{1}{C}\left(\frac{2e}{e-1}\big(3 V(S_G) - + 2 V(i^*)\big) + 2 V(i^*)\right) + \frac{\varepsilon}{C}. -\end{displaymath} -Thus, if $C$ is such that $C(e-1) -6e +2 > 0$, -\begin{align*} - V(i^*) \leq \frac{6e}{C(e-1)- 6e + 2} V(S_G) - + \frac{(e-1)\varepsilon}{C(e-1)- 6e + 2}. -\end{align*} -Finally, using Lemma~\ref{lemma:greedy-bound} again, we get -\begin{equation}\label{eq:bound2} - OPT(V, \mathcal{N}, B) \leq - \frac{3e}{e-1}\left( 1 + \frac{4e}{C (e-1) -6e +2}\right) V(S_G) - + \frac{2e\varepsilon}{C(e-1)- 6e + 2}. -\end{equation} -To minimize the coefficients of $V_{i^*}$ and $V(S_G)$ in \eqref{eq:bound1} -and \eqref{eq:bound2} respectively, we wish to chose $C$ that minimizes -\begin{displaymath} - \max\left(1+C,\frac{3e}{e-1}\left( 1 + \frac{4e}{C (e-1) -6e +2} - \right)\right). -\end{displaymath} -This function has two minima, only one of those is such that $C(e-1) -6e -+2 \geq 0$. This minimum is -\begin{equation}\label{eq:constant} - C = \frac{8e-1 + \sqrt{64e^2-24e + 9}}{2(e-1)}. -\end{equation} -For this minimum, $\frac{2e\varepsilon}{C^*(e-1)- 6e + 2}\leq \varepsilon.$ -Placing the expression of $C$ in \eqref{eq:bound1} and \eqref{eq:bound2} -gives the approximation ratio in \eqref{approxbound}, and concludes the proof -of Theorem~\ref{thm:main}.\hspace*{\stretch{1}}\qed - - -\subsection{Proof of Theorem \ref{thm:lowerbound}} - -Suppose, for contradiction, that such a mechanism exists. Consider two -experiments with dimension $d=2$, such that $x_1 = e_1=[1 ,0]$, $x_2=e_2=[0,1]$ -and $c_1=c_2=B/2+\epsilon$. Then, one of the two experiments, say, $x_1$, must -be in the set selected by the mechanism, otherwise the ratio is unbounded, -a contradiction. If $x_1$ lowers its value to $B/2-\epsilon$, by monotonicity -it remains in the solution; by threshold payment, it is paid at least -$B/2+\epsilon$. So $x_2$ is not included in the solution by budget feasibility -and individual rationality: hence, the selected set attains a value $\log2$, -while the optimal value is $2\log 2$.\hspace*{\stretch{1}}\qed -- cgit v1.2.3-70-g09d2