summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--appendix.tex3
-rw-r--r--approximation.tex599
-rw-r--r--definitions.tex1
-rw-r--r--dual.tex263
-rw-r--r--main.tex25
-rw-r--r--paper.tex40
-rw-r--r--proofs.tex327
8 files changed, 629 insertions, 631 deletions
diff --git a/.gitignore b/.gitignore
index 1d236e2..73b4789 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,4 +3,6 @@
*.aux
*.bbl
*.blg
+*.out
+*.brf
/*.pdf
diff --git a/appendix.tex b/appendix.tex
index b1d35d6..53bc328 100644
--- a/appendix.tex
+++ b/appendix.tex
@@ -22,7 +22,8 @@ Our mechanism for \EDP{} is monotone and budget feasible.
\frac{B}{2}\frac{V(S_i\cup\{i\})-V(S_i)}{V(S_i\cup\{i\})}
\leq \frac{B}{2}\frac{V(S_i'\cup\{i\})-V(S_i')}{V(S_i'\cup\{i\})}
\end{align*}
- by the monotonicity and submodularity of $V$. Hence $i\in S_G'$. As $OPT'_{-i^*}$, is the optimal value of \eqref{relax} under relaxation $L$ when $i^*$ is excluded from $\mathcal{N}$, reducing the costs can only increase this value, so under $c'_i\leq c_i$ the greedy set is still allocated and $s_i(c_i',c_{-i}) =1$.
+ by the monotonicity and submodularity of $V$. Hence $i\in S_G'$. As
+ $OPT'_{-i^*}$, is the optimal value of \eqref{eq:primal} under relaxation $L$ when $i^*$ is excluded from $\mathcal{N}$, reducing the costs can only increase this value, so under $c'_i\leq c_i$ the greedy set is still allocated and $s_i(c_i',c_{-i}) =1$.
Suppose now that when $i$ reports $c_i$, $OPT'_{-i^*} < C V(i^*)$. Then $s_i(c_i,c_{-i})=1$ iff $i = i^*$.
Reporting $c_{i^*}'\leq c_{i^*}$ does not change $V(i^*)$ nor
$OPT'_{-i^*} \leq C V(i^*)$; thus $s_{i^*}(c_{i^*}',c_{-i^*})=1$, so the mechanism is monotone.
diff --git a/approximation.tex b/approximation.tex
index 8b13789..ced53ed 100644
--- a/approximation.tex
+++ b/approximation.tex
@@ -1 +1,600 @@
+Even though \EDP{} is NP-hard, designing a mechanism for this problem will
+involve being able to find an approximation $\tilde{L}^*(c)$ of $OPT$
+monotonous with respect to coordinate-wise changes of the cost: if $c$ and $c'$
+are two cost vectors such that $c'=(c_i', c_{-i})$ with $c_i' \leq c_i$, then
+we want $\tilde{L}(c')\geq \tilde{L}(c)$. Furthermore, we seek an approximation
+that can be computed in polynomial time.
+This approximation will be obtained by introducing a concave optimization
+problem with a constant approximation ratio to \EDP{}
+(Proposition~\ref{prop:relaxation}). Using Newton's method, it is then
+possible to solve this concave optimization problem to an arbitrary precision.
+However, this approximation breaks the monotonicity of the approximation.
+Finding a monotone approximate solution to the concave problem will be the
+object of (Section~\ref{sec:monotonicity}).
+
+\subsection{A concave relaxation of \EDP}\label{sec:concave}
+
+Let us introduce a new function $L$:
+\begin{equation}\label{eq:our-relaxation}
+\forall\,\lambda\in[0,1]^n,\quad L(\lambda) \defeq
+\log\det\left(I_d + \sum_{i\in\mathcal{N}} \lambda_i x_i\T{x_i}\right),
+\end{equation}
+
+This function is a relaxation of the value function $V$ defined in
+\eqref{modified} in the following sense: $L(\id_S) = V(S)$ for all
+$S\subseteq\mathcal{N}$, where $\id_S$ denotes the indicator vector of $S$.
+
+The optimization program \eqref{eq:non-strategic} extends naturally to such
+a relaxation. We define:
+\begin{equation}\tag{$P_c$}\label{eq:primal}
+ L^*_c \defeq \max_{\lambda\in[0, 1]^{n}}
+ \left\{L(\lambda) \Big| \sum_{i=1}^{n} \lambda_i c_i
+ \leq B\right\}
+\end{equation}
+
+\begin{proposition}\label{prop:relaxation}
+ $ L^*(c) \leq 2 OPT
+ + 2\max_{i\in\mathcal{N}}V(i)$.
+\end{proposition}
+
+The proof of this proposition follows the \emph{pipage rounding} framework of
+\citeN{pipage}.
+
+This framework uses the \emph{multi-linear} extension $F$ of the submodular
+function $V$. Let $P_\mathcal{N}^\lambda(S)$ be the probability of choosing the
+set $S$ if we select each element $i$ in $\mathcal{N}$ independently with
+probability $\lambda_i$:
+\begin{displaymath}
+ P_\mathcal{N}^\lambda(S) \defeq \prod_{i\in S} \lambda_i
+ \prod_{i\in\mathcal{N}\setminus S}( 1 - \lambda_i).
+\end{displaymath}
+Then, the \emph{multi-linear} extension $F$ is defined by:
+\begin{displaymath}
+ F(\lambda)
+ \defeq \mathbb{E}_{S\sim P_\mathcal{N}^\lambda}\big[V(S)\big]
+ = \sum_{S\subseteq\mathcal{N}} P_\mathcal{N}^\lambda(S) V(S)
+\end{displaymath}
+
+For \EDP{} the multi-linear extension can be written:
+\begin{equation}\label{eq:multi-linear-logdet}
+ F(\lambda) = \mathbb{E}_{S\sim
+ P_\mathcal{N}^\lambda}\bigg[\log\det \big(I_d + \sum_{i\in S} x_i\T{x_i}\big) \Big].
+\end{equation}
+Note that the relaxation $L$ that we introduced in \eqref{eq:our-relaxation},
+follows naturally from the \emph{multi-linear} relaxation by swapping the
+expectation and the $\log\det$ in \eqref{eq:multi-linear-logdet}:
+\begin{displaymath}
+ L(\lambda) = \log\det\left(\mathbb{E}_{S\sim
+ P_\mathcal{N}^\lambda}\bigg[I_d + \sum_{i\in S} x_i\T{x_i} \bigg]\right).
+\end{displaymath}
+
+The proof proceeds as follows:
+\begin{itemize}
+\item First, we prove that $F$ admits the following rounding property: let
+$\lambda$ be a feasible element of $[0,1]^n$, it is possible to trade one
+fractional component of $\lambda$ for another until one of them becomes
+integral, obtaining a new element $\tilde{\lambda}$ which is both feasible and
+for which $F(\tilde{\lambda})\geq F(\lambda)$. Here, by feasibility of a point
+$\lambda$, we mean that it satisfies the budget constraint $\sum_{i=1}^n
+\lambda_i c_i \leq B$. This rounding property is referred to in the literature
+as \emph{cross-convexity} (see, \emph{e.g.}, \cite{dughmi}), or
+$\varepsilon$-convexity by \citeN{pipage}. This is stated and proven in
+Lemma~\ref{lemma:rounding} and allows us to bound $F$ in terms of $OPT$.
+\item Next, we prove the central result of bounding $L$ appropriately in terms
+of the multi-linear relaxation $F$ (Lemma \ref{lemma:relaxation-ratio}).
+\item Finally, we conclude the proof of Proposition~\ref{prop:relaxation} by
+combining Lemma~\ref{lemma:rounding} and Lemma~\ref{lemma:relaxation-ratio}.
+\end{itemize}
+
+\begin{lemma}[Rounding]\label{lemma:rounding}
+ For any feasible $\lambda\in[0,1]^{n}$, there exists a feasible
+ $\bar{\lambda}\in[0,1]^{n}$ such that at most one of its components is
+ fractional %, that is, lies in $(0,1)$ and:
+ and $F_{\mathcal{N}}(\lambda)\leq F_{\mathcal{N}}(\bar{\lambda})$.
+\end{lemma}
+\begin{proof}
+ We give a rounding procedure which, given a feasible $\lambda$ with at least
+ two fractional components, returns some feasible $\lambda'$ with one less fractional
+ component such that $F(\lambda) \leq F(\lambda')$.
+
+ Applying this procedure recursively yields the lemma's result.
+ Let us consider such a feasible $\lambda$. Let $i$ and $j$ be two
+ fractional components of $\lambda$ and let us define the following
+ function:
+ \begin{displaymath}
+ F_\lambda(\varepsilon) = F(\lambda_\varepsilon)
+ \quad\textrm{where} \quad
+ \lambda_\varepsilon = \lambda + \varepsilon\left(e_i-\frac{c_i}{c_j}e_j\right)
+ \end{displaymath}
+ It is easy to see that if $\lambda$ is feasible, then:
+ \begin{equation}\label{eq:convex-interval}
+ \forall\varepsilon\in\Big[\max\Big(-\lambda_i,(\lambda_j-1)\frac{c_j}{c_i}\Big), \min\Big(1-\lambda_i, \lambda_j
+ \frac{c_j}{c_i}\Big)\Big],\;
+ \lambda_\varepsilon\;\;\textrm{is feasible}
+ \end{equation}
+ Furthermore, the function $F_\lambda$ is convex; indeed:
+ \begin{align*}
+ F_\lambda(\varepsilon)
+ & = \mathbb{E}_{S'\sim P_{\mathcal{N}\setminus\{i,j\}}^\lambda(S')}\Big[
+ (\lambda_i+\varepsilon)\Big(\lambda_j-\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{i,j\})\\
+ & + (\lambda_i+\varepsilon)\Big(1-\lambda_j+\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{i\})
+ + (1-\lambda_i-\varepsilon)\Big(\lambda_j-\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{j\})\\
+ & + (1-\lambda_i-\varepsilon)\Big(1-\lambda_j+\varepsilon\frac{c_i}{c_j}\Big)V(S')\Big]
+ \end{align*}
+ Thus, $F_\lambda$ is a degree 2 polynomial whose dominant coefficient is:
+ \begin{displaymath}
+ \frac{c_i}{c_j}\mathbb{E}_{S'\sim
+ P_{\mathcal{N}\setminus\{i,j\}}^\lambda(S')}\Big[
+ V(S'\cup\{i\})+V(S'\cup\{i\})\\
+ -V(S'\cup\{i,j\})-V(S')\Big]
+ \end{displaymath}
+ which is positive by submodularity of $V$. Hence, the maximum of
+ $F_\lambda$ over the interval given in \eqref{eq:convex-interval} is
+ attained at one of its limit, at which either the $i$-th or $j$-th component of
+ $\lambda_\varepsilon$ becomes integral.
+\end{proof}
+
+\begin{lemma}\label{lemma:relaxation-ratio}
+ % The following inequality holds:
+For all $\lambda\in[0,1]^{n},$
+ %\begin{displaymath}
+ $ \frac{1}{2}
+ \,L(\lambda)\leq
+ F(\lambda)\leq L(\lambda)$.
+ %\end{displaymath}
+\end{lemma}
+\begin{proof}
+ The bound $F_{\mathcal{N}}(\lambda)\leq L_{\mathcal{N}(\lambda)}$ follows by the concavity of the $\log\det$ function.
+ To show the lower bound,
+ we first prove that $\frac{1}{2}$ is a lower bound of the ratio $\partial_i
+ F(\lambda)/\partial_i L(\lambda)$, where
+ $\partial_i\, \cdot$ denotes the partial derivative with respect to the
+ $i$-th variable.
+
+ Let us start by computing the derivatives of $F$ and
+ $L$ with respect to the $i$-th component.
+ Observe that
+ \begin{displaymath}
+ \partial_i P_\mathcal{N}^\lambda(S) = \left\{
+ \begin{aligned}
+ & P_{\mathcal{N}\setminus\{i\}}^\lambda(S\setminus\{i\})\;\textrm{if}\;
+ i\in S, \\
+ & - P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\;\textrm{if}\;
+ i\in \mathcal{N}\setminus S. \\
+ \end{aligned}\right.
+ \end{displaymath}
+ Hence,
+ \begin{displaymath}
+ \partial_i F(\lambda) =
+ \sum_{\substack{S\subseteq\mathcal{N}\\ i\in S}}
+ P_{\mathcal{N}\setminus\{i\}}^\lambda(S\setminus\{i\})V(S)
+ - \sum_{\substack{S\subseteq\mathcal{N}\\ i\in \mathcal{N}\setminus S}}
+ P_{\mathcal{N}\setminus\{i\}}^\lambda(S)V(S).
+ \end{displaymath}
+ Now, using that every $S$ such that $i\in S$ can be uniquely written as
+ $S'\cup\{i\}$, we can write:
+ \begin{displaymath}
+ \partial_i F(\lambda) =
+ \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
+ P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\big(V(S\cup\{i\})
+ - V(S)\big).
+ \end{displaymath}
+ The marginal contribution of $i$ to
+ $S$ can be written as
+\begin{align*}
+V(S\cup \{i\}) - V(S)& = \frac{1}{2}\log\det(I_d
+ + \T{X_S}X_S + x_i\T{x_i})
+ - \frac{1}{2}\log\det(I_d + \T{X_S}X_S)\\
+ & = \frac{1}{2}\log\det(I_d + x_i\T{x_i}(I_d +
+\T{X_S}X_S)^{-1})
+ = \frac{1}{2}\log(1 + \T{x_i}A(S)^{-1}x_i)
+\end{align*}
+where $A(S) \defeq I_d+ \T{X_S}X_S$, and the last equality follows from the
+Sylvester's determinant identity~\cite{sylvester}.
+% $ V(S\cup\{i\}) - V(S) = \frac{1}{2}\log\left(1 + \T{x_i} A(S)^{-1}x_i\right)$.
+Using this,
+ \begin{displaymath}
+ \partial_i F(\lambda) = \frac{1}{2}
+ \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
+ P_{\mathcal{N}\setminus\{i\}}^\lambda(S)
+ \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big)
+ \end{displaymath}
+ The computation of the derivative of $L$ uses standard matrix
+ calculus: writing $\tilde{A}(\lambda) = I_d+\sum_{i\in
+ \mathcal{N}}\lambda_ix_i\T{x_i}$,
+ \begin{displaymath}
+ \det \tilde{A}(\lambda + h\cdot e_i) = \det\big(\tilde{A}(\lambda)
+ + hx_i\T{x_i}\big)
+ =\det \tilde{A}(\lambda)\big(1+
+ h\T{x_i}\tilde{A}(\lambda)^{-1}x_i\big).
+ \end{displaymath}
+ Hence,
+ \begin{displaymath}
+ \log\det\tilde{A}(\lambda + h\cdot e_i)= \log\det\tilde{A}(\lambda)
+ + h\T{x_i}\tilde{A}(\lambda)^{-1}x_i + o(h),
+ \end{displaymath}
+ which implies
+ \begin{displaymath}
+ \partial_i L(\lambda)
+ =\frac{1}{2} \T{x_i}\tilde{A}(\lambda)^{-1}x_i.
+ \end{displaymath}
+
+For two symmetric matrices $A$ and $B$, we write $A\succ B$ ($A\succeq B$) if
+$A-B$ is positive definite (positive semi-definite). This order allows us to
+define the notion of a \emph{decreasing} as well as \emph{convex} matrix
+function, similarly to their real counterparts. With this definition, matrix
+inversion is decreasing and convex over symmetric positive definite
+matrices (see Example 3.48 p. 110 in \cite{boyd2004convex}).
+In particular,
+\begin{gather*}
+ \forall S\subseteq\mathcal{N},\quad A(S)^{-1} \succeq A(S\cup\{i\})^{-1}
+\end{gather*}
+as $A(S)\preceq A(S\cup\{i\})$. Observe that
+ % \begin{gather*}
+ % \forall
+$P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\geq P_{\mathcal{N}\setminus\{i\}}^\lambda(S\cup\{i\})$ for all $S\subseteq\mathcal{N}\setminus\{i\}$, and
+ % ,\\
+ $P_{\mathcal{N}\setminus\{i\}}^\lambda(S) \geq P_\mathcal{N}^\lambda(S),$ for all $S\subseteq\mathcal{N}$.
+ %\end{gather*}
+ Hence,
+ \begin{align*}
+ \partial_i F(\lambda)
+ % & = \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}} P_\mathcal{N}^\lambda(S) \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big)\\
+ & \geq \frac{1}{4}
+ \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
+ P_{\mathcal{N}\setminus\{i\}}^\lambda(S)
+ \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big)\\
+ &\hspace{-3.5em}+\frac{1}{4}
+ \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
+ P_{\mathcal{N}\setminus\{i\}}^\lambda(S\cup\{i\})
+ \log\Big(1 + \T{x_i}A(S\cup\{i\})^{-1}x_i\Big)\\
+ &\geq \frac{1}{4}
+ \sum_{S\subseteq\mathcal{N}}
+ P_\mathcal{N}^\lambda(S)
+ \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big).
+ \end{align*}
+ Using that $A(S)\succeq I_d$ we get that $\T{x_i}A(S)^{-1}x_i \leq
+ \norm{x_i}_2^2 \leq 1$. Moreover, $\log(1+x)\geq x$ for all $x\leq 1$.
+ Hence,
+ \begin{displaymath}
+ \partial_i F(\lambda) \geq
+ \frac{1}{4}
+ \T{x_i}\bigg(\sum_{S\subseteq\mathcal{N}}P_\mathcal{N}^\lambda(S)A(S)^{-1}\bigg)x_i.
+ \end{displaymath}
+ Finally, using that the inverse is a matrix convex function over symmetric
+ positive definite matrices:
+ \begin{displaymath}
+ \partial_i F(\lambda) \geq
+ \frac{1}{4}
+ \T{x_i}\bigg(\sum_{S\subseteq\mathcal{N}}P_\mathcal{N}^\lambda(S)A(S)\bigg)^{-1}x_i
+ = \frac{1}{4}\T{x_i}\tilde{A}(\lambda)^{-1}x_i
+ = \frac{1}{2}
+ \partial_i L(\lambda).
+ \end{displaymath}
+
+Having bound the ratio between the partial derivatives, we now bound the ratio $F(\lambda)/L(\lambda)$ from below. Consider the following cases.
+ First, if the minimum of the ratio
+ $F(\lambda)/L(\lambda)$ is attained at a point interior to the hypercube, then it is
+ a critical point, \emph{i.e.}, $\partial_i \big(F(\lambda)/L(\lambda)\big)=0$ for all $i\in \mathcal{N}$; hence, at such a critical point:
+ \begin{equation}\label{eq:lhopital}
+ \frac{F(\lambda)}{L(\lambda)}
+ = \frac{\partial_i F(\lambda)}{\partial_i
+ L(\lambda)} \geq \frac{1}{2}.
+ \end{equation}
+ Second, if the minimum is attained as
+ $\lambda$ converges to zero in, \emph{e.g.}, the $l_2$ norm, by the Taylor approximation, one can write:
+ \begin{displaymath}
+ \frac{F(\lambda)}{L(\lambda)}
+ \sim_{\lambda\rightarrow 0}
+ \frac{\sum_{i\in \mathcal{N}}\lambda_i\partial_i F(0)}
+ {\sum_{i\in\mathcal{N}}\lambda_i\partial_i L(0)}
+ \geq \frac{1}{2},
+ \end{displaymath}
+ \emph{i.e.}, the ratio $\frac{F(\lambda)}{L(\lambda)}$ is necessarily bounded from below by 1/2 for small enough $\lambda$.
+ Finally, if the minimum is attained on a face of the hypercube $[0,1]^n$ (a face is
+ defined as a subset of the hypercube where one of the variable is fixed to
+ 0 or 1), without loss of generality, we can assume that the minimum is
+ attained on the face where the $n$-th variable has been fixed
+ to 0 or 1. Then, either the minimum is attained at a point interior to the
+ face or on a boundary of the face. In the first sub-case, relation
+ \eqref{eq:lhopital} still characterizes the minimum for $i< n$.
+ In the second sub-case, by repeating the argument again by induction, we see
+ that all is left to do is to show that the bound holds for the vertices of
+ the cube (the faces of dimension 1). The vertices are exactly the binary
+ points, for which we know that both relaxations are equal to the value
+ function $V$. Hence, the ratio is equal to 1 on the vertices.
+\end{proof}
+
+To conclude the proof of Proposition~\ref{prop:relaxation}, let us consider
+a feasible point $\lambda^*\in[0,1]^{n}$ such that $L(\lambda^*) = L^*_c$. By
+applying Lemma~\ref{lemma:relaxation-ratio} and Lemma~\ref{lemma:rounding} we
+get a feasible point $\bar{\lambda}$ with at most one fractional component such
+that
+\begin{equation}\label{eq:e1}
+ L(\lambda^*) \leq 2 F(\bar{\lambda}).
+\end{equation}
+ Let $\lambda_i$ denote the fractional component of $\bar{\lambda}$ and $S$
+ denote the set whose indicator vector is $\bar{\lambda} - \lambda_i e_i$.
+ By definition of the multi-linear extension $F$:
+ \begin{displaymath}
+ F(\bar{\lambda}) = (1-\lambda_i)V(S) +\lambda_i V(S\cup\{i\}).
+ \end{displaymath}
+ By submodularity of $V$, $V(S\cup\{i\})\leq V(S) + V(\{i\})$. Hence,
+ \begin{displaymath}
+ F(\bar{\lambda}) \leq V(S) + V(i).
+ \end{displaymath}
+ Note that since $\bar{\lambda}$ is feasible, $S$ is also feasible and
+ $V(S)\leq OPT$. Hence,
+ \begin{equation}\label{eq:e2}
+ F(\bar{\lambda}) \leq OPT + \max_{i\in\mathcal{N}} V(i).
+ \end{equation}
+ Together, \eqref{eq:e1} and \eqref{eq:e2} imply the lemma. \hspace*{\stretch{1}}\qed
+
+\subsection{A monotonous Newton's estimator}\label{sec:monotonicity}
+
+\textbf{TODO} Explain that we only get approximate monotonicity, but even
+that is not immediate since the variation induced by a change of cost on
+coordinate $i$ depends on the allocation at this coordinate which can be
+arbitrarily small.
+
+For the ease of presentation, we normalize the costs by dividing them by the
+budget $B$ so that the budget constraint in \eqref{eq:primal} now reads
+$\T{c}\lambda\leq 1$.
+
+\begin{proposition}\label{prop:monotonicity}
+ Let $\delta\in(0,1]$. For any $\varepsilon\in(0,1]$, there exists
+ an algorithm which computes an approximate solution $\tilde{L}^*_c$ to
+ \eqref{eq:primal} such that:
+ \begin{enumerate}
+ \item $|\tilde{L}^*_c - L^*_c| \leq \varepsilon$
+ \item for all $c' = (c_i', c_{-i})$ with $c_i'\leq c_i-\delta$, $\tilde{L}^*_c \leq \tilde{L}^*_{c'}$
+ \item the routine's running time is $O\big(poly(n, d, \log\log\frac{1}{b\varepsilon\delta})\big)$
+ \end{enumerate}
+\end{proposition}
+
+We consider a perturbation of \eqref{eq:primal} by introducing:
+\begin{equation}\tag{$P_{c, \alpha}$}\label{eq:perturbed-primal}
+ L^*_c(\alpha) \defeq \max_{\lambda\in[\alpha, 1]^{n}}
+ \left\{L(\lambda) \Big| \sum_{i=1}^{n} \lambda_i c_i
+ \leq B\right\}
+\end{equation}
+Note that we have $L^*_c = L^*_c(0)$. We will also assume that
+$\alpha<\frac{1}{n}$ so that \eqref{eq:perturbed-primal} has at least one
+feasible point: $(\frac{1}{n},\ldots,\frac{1}{n})$.
+
+Having introduced this perturbed problem, we show that its optimal value is
+close to the optimal value of \eqref{eq:primal} (Lemma~\ref{lemma:proximity})
+while being well-behaved with respect to changes of the cost
+(Lemma~\ref{lemma:monotonicity}). These lemmas together imply
+Proposition~\ref{prop:monotonicity}.
+
+
+\begin{lemma}\label{lemma:derivative-bounds}
+ Let $\partial_i L(\lambda)$ denote the $i$-th derivative of $L$, for $i\in\{1,\ldots, n\}$, then:
+ \begin{displaymath}
+ \forall\lambda\in[0, 1]^n,\;\frac{b}{2^n} \leq \partial_i L(\lambda) \leq 1
+ \end{displaymath}
+\end{lemma}
+
+\begin{proof}
+ Let us define:
+ \begin{displaymath}
+ S(\lambda)\defeq I_d + \sum_{i=1}^n \lambda_i x_i\T{x_i}
+ \quad\mathrm{and}\quad
+ S_k \defeq I_d + \sum_{i=1}^k x_i\T{x_i}
+ \end{displaymath}
+
+ We have $\partial_i L(\lambda) = \T{x_i}S(\lambda)^{-1}x_i$. Since
+ $S(\lambda)\geq I_d$, $\partial_i L(\lambda)\leq \T{x_i}x_i \leq 1$, which
+ is the right-hand side of the lemma.
+
+ For the left-hand side, note that $S(\lambda) \leq S_n$. Hence
+ $\partial_iL(\lambda)\geq \T{x_i}S_n^{-1}x_i$.
+
+ Using the Sherman-Morrison formula, for all $k\geq 1$:
+ \begin{displaymath}
+ \T{x_i}S_k^{-1} x_i = \T{x_i}S_{k-1}^{-1}x_i
+ - \frac{(\T{x_i}S_{k-1}^{-1}x_k)^2}{1+\T{x_k}S_{k-1}^{-1}x_k}
+ \end{displaymath}
+
+ By the Cauchy-Schwarz inequality:
+ \begin{displaymath}
+ (\T{x_i}S_{k-1}^{-1}x_k)^2 \leq \T{x_i}S_{k-1}^{-1}x_i\;\T{x_k}S_{k-1}^{-1}x_k
+ \end{displaymath}
+
+ Hence:
+ \begin{displaymath}
+ \T{x_i}S_k^{-1} x_i \geq \T{x_i}S_{k-1}^{-1}x_i
+ - \T{x_i}S_{k-1}^{-1}x_i\frac{\T{x_k}S_{k-1}^{-1}x_k}{1+\T{x_k}S_{k-1}^{-1}x_k}
+ \end{displaymath}
+
+ But $\T{x_k}S_{k-1}^{-1}x_k\leq 1$ and $\frac{a}{1+a}\leq \frac{1}{2}$ if
+ $0\leq a\leq 1$, so:
+ \begin{displaymath}
+ \T{x_i}S_{k}^{-1}x_i \geq \T{x_i}S_{k-1}^{-1}x_i
+ - \frac{1}{2}\T{x_i}S_{k-1}^{-1}x_i\geq \frac{\T{x_i}S_{k-1}^{-1}x_i}{2}
+ \end{displaymath}
+
+ By induction:
+ \begin{displaymath}
+ \T{x_i}S_n^{-1} x_i \geq \frac{\T{x_i}x_i}{2^n}
+ \end{displaymath}
+
+ Using that $\T{x_i}{x_i}\geq b$ concludes the proof of the left-hand side
+ of the lemma's inequality.
+\end{proof}
+
+Let us introduce the lagrangian of problem, \eqref{eq:perturbed-primal}:
+
+\begin{displaymath}
+ \mathcal{L}_{c, \alpha}(\lambda, \mu, \nu, \xi) \defeq L(\lambda)
+ + \T{\mu}(\lambda-\alpha\mathbf{1}) + \T{\nu}(\mathbf{1}-\lambda) + \xi(1-\T{c}\lambda)
+\end{displaymath}
+so that:
+\begin{displaymath}
+ L^*_c(\alpha) = \min_{\mu, \nu, \xi\geq 0}\max_\lambda \mathcal{L}_{c, \alpha}(\lambda, \mu, \nu, \xi)
+\end{displaymath}
+Similarly, we define $\mathcal{L}_{c}\defeq\mathcal{L}_{c, 0}$ the lagrangian of \eqref{eq:primal}.
+
+Let $\lambda^*$ be primal optimal for \eqref{eq:perturbed-primal}, and $(\mu^*,
+\nu^*, \xi^*)$ be dual optimal for the same problem. In addition to primal and
+dual feasibility, the KKT conditions give $\forall i\in\{1, \ldots, n\}$:
+\begin{gather*}
+ \partial_i L(\lambda^*) + \mu_i^* - \nu_i^* - \xi^* c_i = 0\\
+ \mu_i^*(\lambda_i^* - \alpha) = 0\\
+ \nu_i^*(1 - \lambda_i^*) = 0
+\end{gather*}
+
+\begin{lemma}\label{lemma:proximity}
+We have:
+\begin{displaymath}
+ L^*_c - \alpha n^2\leq L^*_c(\alpha) \leq L^*_c
+\end{displaymath}
+In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n^2$.
+\end{lemma}
+
+\begin{proof}
+ $\alpha\mapsto L^*_c(\alpha)$ is a decreasing function as it is the
+ maximum value of the $L$ function over a set-decreasing domain, which gives
+ the rightmost inequality.
+
+ Let $\mu^*, \nu^*, \xi^*$ be dual optimal for $(P_{c, \alpha})$, that is:
+ \begin{displaymath}
+ L^*_{c}(\alpha) = \max_\lambda \mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*)
+ \end{displaymath}
+
+ Note that $\mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*)
+ = \mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*)
+ - \alpha\T{\mathbf{1}}\mu^*$, and that for any $\lambda$ feasible for
+ problem \eqref{eq:primal}, $\mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*)
+ \geq L(\lambda)$. Hence,
+ \begin{displaymath}
+ L^*_{c}(\alpha) \geq L(\lambda) - \alpha\T{\mathbf{1}}\mu^*
+ \end{displaymath}
+ for any $\lambda$ feasible for \eqref{eq:primal}. In particular, for $\lambda$ primal optimal for $\eqref{eq:primal}$:
+ \begin{equation}\label{eq:local-1}
+ L^*_{c}(\alpha) \geq L^*_c - \alpha\T{\mathbf{1}}\mu^*
+ \end{equation}
+
+ Let us denote by the $M$ the support of $\mu^*$, that is $M\defeq
+ \{i|\mu_i^* > 0\}$, and by $\lambda^*$ a primal optimal point for
+ $\eqref{eq:perturbed-primal}$. From the KKT conditions we see that:
+ \begin{displaymath}
+ M \subseteq \{i|\lambda_i^* = \alpha\}
+ \end{displaymath}
+
+
+ Let us first assume that $|M| = 0$, then $\T{\mathbf{1}}\mu^*=0$ and the lemma follows.
+
+ We will now assume that $|M|\geq 1$. In this case $\T{c}\lambda^*
+ = 1$, otherwise we could increase the coordinates of $\lambda^*$ in $M$,
+ which would increase the value of the objective function and contradict the
+ optimality of $\lambda^*$. Note also, that $|M|\leq n-1$, otherwise, since
+ $\alpha< \frac{1}{n}$, we would have $\T{c}\lambda^*\ < 1$, which again
+ contradicts the optimality of $\lambda^*$. Let us write:
+ \begin{displaymath}
+ 1 = \T{c}\lambda^* = \alpha\sum_{i\in M}c_i + \sum_{i\in \bar{M}}\lambda_i^*c_i
+ \leq \alpha |M| + (n-|M|)\max_{i\in \bar{M}} c_i
+ \end{displaymath}
+ That is:
+ \begin{equation}\label{local-2}
+ \max_{i\in\bar{M}} c_i \geq \frac{1 - |M|\alpha}{n-|M|}> \frac{1}{n}
+ \end{equation}
+ where the last inequality uses again that $\alpha<\frac{1}{n}$. From the
+ KKT conditions, we see that for $i\in M$, $\nu_i^* = 0$ and:
+ \begin{equation}\label{local-3}
+ \mu_i^* = \xi^*c_i - \partial_i L(\lambda^*)\leq \xi^*c_i\leq \xi^*
+ \end{equation}
+ since $\partial_i L(\lambda^*)\geq 0$ and $c_i\leq 1$.
+
+ Furthermore, using the KKT conditions again, we have that:
+ \begin{equation}\label{local-4}
+ \xi^* \leq \inf_{i\in \bar{M}}\frac{\partial_i L(\lambda^*)}{c_i}\leq \inf_{i\in \bar{M}} \frac{1}{c_i}
+ = \frac{1}{\max_{i\in\bar{M}} c_i}
+ \end{equation}
+ where the last inequality uses Lemma~\ref{lemma:derivative-bounds}.
+
+ Combining \eqref{local-2}, \eqref{local-3} and \eqref{local-4}, we get that:
+ \begin{displaymath}
+ \sum_{i\in M}\mu_i^* \leq |M|\xi^* \leq n\xi^*\leq \frac{n}{\max_{i\in\bar{M}} c_i} \leq n^2
+ \end{displaymath}
+
+ This implies that:
+ \begin{displaymath}
+ \T{\mathbf{1}}\mu^* = \sum_{i=1}^n \mu^*_i = \sum_{i\in M}\mu_i^*\leq n^2
+ \end{displaymath}
+ which in addition to \eqref{eq:local-1} proves the lemma.
+\end{proof}
+
+\begin{lemma}\label{lemma:monotonicity}
+ If $c'$ = $(c_i', c_{-i})$, with $c_i'\leq c_i - \delta$, we have:
+ \begin{displaymath}
+ L^*_{c'}(\alpha) \geq L^*_c(\alpha) + \frac{\alpha\delta b}{2^n}
+ \end{displaymath}
+\end{lemma}
+
+\begin{proof}
+ Let $\mu^*, \nu^*, \xi^*$ be dual optimal for $(P_{c', \alpha})$. Noting that:
+ \begin{displaymath}
+ \mathcal{L}_{c', \alpha}(\lambda, \mu^*, \nu^*, \xi^*) \geq
+ \mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*) + \lambda_i\xi^*\delta,
+ \end{displaymath}
+ we get similarly to Lemma~\ref{lemma:proximity}:
+ \begin{displaymath}
+ L^*_{c'}(\alpha) \geq L(\lambda) + \lambda_i\xi^*\delta
+ \end{displaymath}
+ for any $\lambda$ feasible for \eqref{eq:perturbed-primal}. In particular, for $\lambda^*$ primal optimal for \eqref{eq:perturbed-primal}:
+ \begin{displaymath}
+ L^*_{c'}(\alpha) \geq L^*_{c}(\alpha) + \alpha\xi^*\delta
+ \end{displaymath}
+ since $\lambda_i^*\geq \alpha$.
+
+ Using the KKT conditions for $(P_{c', \alpha})$, we can write:
+ \begin{displaymath}
+ \xi^* = \inf_{i:\lambda^{'*}_i>\alpha} \frac{\T{x_i}S(\lambda^{'*})^{-1}x_i}{c_i'}
+ \end{displaymath}
+ with $\lambda^{'*}$ optimal for $(P_{c', \alpha})$. Since $c_i'\leq 1$, using Lemma~\ref{lemma:derivative-bounds}, we get that $\xi^*\geq \frac{b}{2^n}$, which concludes the proof.
+\end{proof}
+
+
+\subsubsection*{End of the proof of Proposition~\ref{prop:monotonicity}}
+
+Let $\varepsilon$ in $(0, 1]$. The routine works as follows: set $\alpha\defeq
+\varepsilon(\delta + n^2)^{-1}$ and return an approximation $\tilde{L}^*_c$ of
+$L^*_c(\alpha)$ with an accuracy $\frac{1}{2^{n+1}}\alpha\delta b$ computed by
+a standard convex optimization algorithm. Note that this choice of $\alpha$
+implies $\alpha<\frac{1}{n}$ as required.
+
+\begin{enumerate}
+ \item using Lemma~\ref{lemma:proximity}:
+\begin{displaymath}
+ |\tilde{L}^*_c - L^*_c| \leq |\tilde{L}^*_c - L^*_c(\alpha)| + |L^*_c(\alpha) - L^*_c|
+ \leq \alpha\delta + \alpha n^2 = \varepsilon
+\end{displaymath}
+
+\item let $c' = (c_i', c_{-i})$ with $c_i'\leq c_i-\delta$, then:
+\begin{displaymath}
+ \tilde{L}^*_{c'} \geq L^*_{c'} - \frac{\alpha\delta b}{2^{n+1}}
+ \geq L^*_c + \frac{\alpha\delta b}{2^{n+1}}
+ \geq \tilde{L}^*_c
+\end{displaymath}
+where the first and inequality come from the accuracy of the approximation, and
+the inner inequality follows from Lemma~\ref{lemma:monotonicity}.
+
+\item the accuracy of the approximation $\tilde{L}^*_c$ is:
+\begin{displaymath}
+ A\defeq\frac{\varepsilon\delta b}{2^{n+1}(\delta + n^2)}
+\end{displaymath}
+
+The function $L$ is well-known to be concave and even self-concordant (see
+\emph{e.g.}, \cite{boyd2004convex}). In this case, the analysis of Newton's
+method for self-concordant functions in \cite{boyd2004convex}, shows that
+finding the maximum of $L$ to any precision $A$ can be done in
+$O(\log\log A^{-1})$ iterations. Note that:
+\begin{displaymath}
+ \log\log A^{-1} = O\bigg(\log\log\frac{1}{\varepsilon\delta b} + \log n\bigg)
+\end{displaymath}
+Furthermore, each iteration of Newton's method can be done in time $O\big(poly(n,
+d)\big)$.\qed
+\end{enumerate}
diff --git a/definitions.tex b/definitions.tex
index 616012e..281b290 100644
--- a/definitions.tex
+++ b/definitions.tex
@@ -1,6 +1,7 @@
\newcommand{\mutual}{\ensuremath{{I}}}
\newcommand{\entropy}{\ensuremath{{H}}}
\newtheorem{lemma}{Lemma}
+\newtheorem{proposition}{Proposition}
\newtheorem{fact}{Fact}
\newtheorem{example}{Example}
\newtheorem{prop}{Proposition}
diff --git a/dual.tex b/dual.tex
deleted file mode 100644
index a2d28c6..0000000
--- a/dual.tex
+++ /dev/null
@@ -1,263 +0,0 @@
-\documentclass{article}
-\usepackage[T1]{fontenc}
-\usepackage[utf8]{inputenc}
-\usepackage{amsmath, amsfonts, amsthm}
-\newtheorem{proposition}{Proposition}
-\input{definitions}
-
-\begin{document}
-Let $c$ be a cost vector in $[0,1]^n$, and $x_1,\ldots,x_n$, $n$ vectors in
-$\mathbf{R}^d$ such that for all $i\in\{1,\ldots,n\}$, $b\leq \T{x_i}{x_i}\leq
-1$ for some $b\in(0,1]$. Let us consider the following convex optimization
-problem:
-\begin{equation}\tag{$P_c$}\label{eq:primal}
- \begin{split}
- \mathrm{maximize}\quad & L(\lambda) \defeq\log\det\left(I_d + \sum_{i=1}^n
- \lambda_i x_i x_i^T\right)\\
- \textrm{subject to}\quad & c^T\lambda \leq 1 \;;\; 0\leq\lambda\leq \mathbf{1}
-\end{split}
-\end{equation}
-We denote by $L^*_c$ its optimal value.
-
-Let $\alpha\in\mathbf{R}^+$, consider the perturbed optimization problem:
-\begin{equation}\tag{$P_{c, \alpha}$}\label{eq:perturbed-primal}
- \begin{split}
- \mathrm{maximize}\quad & L(\lambda) \defeq\log\det\left(I_d + \sum_{i=1}^n
- \lambda_i x_i x_i^T\right)\\
- \textrm{subject to}\quad & c^T\lambda \leq 1 \;;\; \alpha\leq\lambda\leq \mathbf{1}
-\end{split}
-\end{equation}
-and denote by $L^*_c(\alpha)$ its optimal value. Note that we have $L^*_c = L^*_c(0)$.
-
-We will assume that $\alpha<\frac{1}{n}$ so that \eqref{eq:perturbed-primal} has at
-least one feasible point: $(\frac{1}{n},\ldots,\frac{1}{n})$.
-
-Let us introduce the lagrangian of problem, \eqref{eq:perturbed-primal}:
-\begin{displaymath}
- \mathcal{L}_{c, \alpha}(\lambda, \mu, \nu, \xi) \defeq L(\lambda)
- + \T{\mu}(\lambda-\alpha\mathbf{1}) + \T{\nu}(\mathbf{1}-\lambda) + \xi(1-\T{c}\lambda)
-\end{displaymath}
-so that:
-\begin{displaymath}
- L^*_c(\alpha) = \min_{\mu, \nu, \xi\geq 0}\max_\lambda \mathcal{L}_{c, \alpha}(\lambda, \mu, \nu, \xi)
-\end{displaymath}
-Similarly, we define $\mathcal{L}_{c}\defeq\mathcal{L}_{c, 0}$ the lagrangian of \eqref{eq:primal}.
-
-Let $\lambda^*$ be primal optimal for \eqref{eq:perturbed-primal}, and $(\mu^*,
-\nu^*, \xi^*)$ be dual optimal for the same problem. In addition to primal and
-dual feasibility, the KKT conditions give $\forall i\in\{1, \ldots, n\}$:
-\begin{gather*}
- \partial_i L(\lambda^*) + \mu_i^* - \nu_i^* - \xi^* c_i = 0\\
- \mu_i^*(\lambda_i^* - \alpha) = 0\\
- \nu_i^*(1 - \lambda_i^*) = 0
-\end{gather*}
-
-\begin{lemma}\label{lemma:derivative-bounds}
- Let $\partial_i L(\lambda)$ denote the $i$-th derivative of $L$, for $i\in\{1,\ldots, n\}$, then:
- \begin{displaymath}
- \forall\lambda\in[0, 1]^n,\;\frac{b}{2^n} \leq \partial_i L(\lambda) \leq 1
- \end{displaymath}
-\end{lemma}
-
-\begin{proof}
- Let us define:
- \begin{displaymath}
- S(\lambda)\defeq I_d + \sum_{i=1}^n \lambda_i x_i\T{x_i}
- \quad\mathrm{and}\quad
- S_k \defeq I_d + \sum_{i=1}^k x_i\T{x_i}
- \end{displaymath}
-
- We have $\partial_i L(\lambda) = \T{x_i}S(\lambda)^{-1}x_i$. Since
- $S(\lambda)\geq I_d$, $\partial_i L(\lambda)\leq \T{x_i}x_i \leq 1$, which
- is the right-hand side of the lemma.
-
- For the left-hand side, note that $S(\lambda) \leq S_n$. Hence
- $\partial_iL(\lambda)\geq \T{x_i}S_n^{-1}x_i$.
-
- Using the Sherman-Morrison formula, for all $k\geq 1$:
- \begin{displaymath}
- \T{x_i}S_k^{-1} x_i = \T{x_i}S_{k-1}^{-1}x_i
- - \frac{(\T{x_i}S_{k-1}^{-1}x_k)^2}{1+\T{x_k}S_{k-1}^{-1}x_k}
- \end{displaymath}
-
- By the Cauchy-Schwarz inequality:
- \begin{displaymath}
- (\T{x_i}S_{k-1}^{-1}x_k)^2 \leq \T{x_i}S_{k-1}^{-1}x_i\;\T{x_k}S_{k-1}^{-1}x_k
- \end{displaymath}
-
- Hence:
- \begin{displaymath}
- \T{x_i}S_k^{-1} x_i \geq \T{x_i}S_{k-1}^{-1}x_i
- - \T{x_i}S_{k-1}^{-1}x_i\frac{\T{x_k}S_{k-1}^{-1}x_k}{1+\T{x_k}S_{k-1}^{-1}x_k}
- \end{displaymath}
-
- But $\T{x_k}S_{k-1}^{-1}x_k\leq 1$ and $\frac{a}{1+a}\leq \frac{1}{2}$ if
- $0\leq a\leq 1$, so:
- \begin{displaymath}
- \T{x_i}S_{k}^{-1}x_i \geq \T{x_i}S_{k-1}^{-1}x_i
- - \frac{1}{2}\T{x_i}S_{k-1}^{-1}x_i\geq \frac{\T{x_i}S_{k-1}^{-1}x_i}{2}
- \end{displaymath}
-
- By induction:
- \begin{displaymath}
- \T{x_i}S_n^{-1} x_i \geq \frac{\T{x_i}x_i}{2^n}
- \end{displaymath}
-
- Using that $\T{x_i}{x_i}\geq b$ concludes the proof of the left-hand side
- of the lemma's inequality.
-\end{proof}
-
-\begin{lemma}\label{lemma:proximity}
-We have:
-\begin{displaymath}
- L^*_c - \alpha n^2\leq L^*_c(\alpha) \leq L^*_c
-\end{displaymath}
-In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n^2$.
-\end{lemma}
-
-\begin{proof}
- $\alpha\mapsto L^*_c(\alpha)$ is a decreasing function as it is the
- maximum value of the $L$ function over a set-decreasing domain, which gives
- the rightmost inequality.
-
- Let $\mu^*, \nu^*, \xi^*$ be dual optimal for $(P_{c, \alpha})$, that is:
- \begin{displaymath}
- L^*_{c}(\alpha) = \max_\lambda \mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*)
- \end{displaymath}
-
- Note that $\mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*)
- = \mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*)
- - \alpha\T{\mathbf{1}}\mu^*$, and that for any $\lambda$ feasible for
- problem \eqref{eq:primal}, $\mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*)
- \geq L(\lambda)$. Hence,
- \begin{displaymath}
- L^*_{c}(\alpha) \geq L(\lambda) - \alpha\T{\mathbf{1}}\mu^*
- \end{displaymath}
- for any $\lambda$ feasible for \eqref{eq:primal}. In particular, for $\lambda$ primal optimal for $\eqref{eq:primal}$:
- \begin{equation}\label{eq:local-1}
- L^*_{c}(\alpha) \geq L^*_c - \alpha\T{\mathbf{1}}\mu^*
- \end{equation}
-
- Let us denote by the $M$ the support of $\mu^*$, that is $M\defeq
- \{i|\mu_i^* > 0\}$, and by $\lambda^*$ a primal optimal point for
- $\eqref{eq:perturbed-primal}$. From the KKT conditions we see that:
- \begin{displaymath}
- M \subseteq \{i|\lambda_i^* = \alpha\}
- \end{displaymath}
-
-
- Let us first assume that $|M| = 0$, then $\T{\mathbf{1}}\mu^*=0$ and the lemma follows.
-
- We will now assume that $|M|\geq 1$. In this case $\T{c}\lambda^*
- = 1$, otherwise we could increase the coordinates of $\lambda^*$ in $M$,
- which would increase the value of the objective function and contradict the
- optimality of $\lambda^*$. Note also, that $|M|\leq n-1$, otherwise, since
- $\alpha< \frac{1}{n}$, we would have $\T{c}\lambda^*\ < 1$, which again
- contradicts the optimality of $\lambda^*$. Let us write:
- \begin{displaymath}
- 1 = \T{c}\lambda^* = \alpha\sum_{i\in M}c_i + \sum_{i\in \bar{M}}\lambda_i^*c_i
- \leq \alpha |M| + (n-|M|)\max_{i\in \bar{M}} c_i
- \end{displaymath}
- That is:
- \begin{equation}\label{local-2}
- \max_{i\in\bar{M}} c_i \geq \frac{1 - |M|\alpha}{n-|M|}> \frac{1}{n}
- \end{equation}
- where the last inequality uses again that $\alpha<\frac{1}{n}$. From the
- KKT conditions, we see that for $i\in M$, $\nu_i^* = 0$ and:
- \begin{equation}\label{local-3}
- \mu_i^* = \xi^*c_i - \partial_i L(\lambda^*)\leq \xi^*c_i\leq \xi^*
- \end{equation}
- since $\partial_i L(\lambda^*)\geq 0$ and $c_i\leq 1$.
-
- Furthermore, using the KKT conditions again, we have that:
- \begin{equation}\label{local-4}
- \xi^* \leq \inf_{i\in \bar{M}}\frac{\partial_i L(\lambda^*)}{c_i}\leq \inf_{i\in \bar{M}} \frac{1}{c_i}
- = \frac{1}{\max_{i\in\bar{M}} c_i}
- \end{equation}
- where the last inequality uses Lemma~\ref{lemma:derivative-bounds}.
-
- Combining \eqref{local-2}, \eqref{local-3} and \eqref{local-4}, we get that:
- \begin{displaymath}
- \sum_{i\in M}\mu_i^* \leq |M|\xi^* \leq n\xi^*\leq \frac{n}{\max_{i\in\bar{M}} c_i} \leq n^2
- \end{displaymath}
-
- This implies that:
- \begin{displaymath}
- \T{\mathbf{1}}\mu^* = \sum_{i=1}^n \mu^*_i = \sum_{i\in M}\mu_i^*\leq n^2
- \end{displaymath}
- which in addition to \eqref{eq:local-1} proves the lemma.
-\end{proof}
-
-\begin{lemma}\label{lemma:monotonicity}
- If $c'$ = $(c_i', c_{-i})$, with $c_i'\leq c_i - \delta$, we have:
- \begin{displaymath}
- L^*_{c'}(\alpha) \geq L^*_c(\alpha) + \frac{\alpha\delta b}{2^n}
- \end{displaymath}
-\end{lemma}
-
-\begin{proof}
- Let $\mu^*, \nu^*, \xi^*$ be dual optimal for $(P_{c', \alpha})$. Noting that, $\mathcal{L}_{c', \alpha}(\lambda, \mu^*, \nu^*, \xi^*) \geq
- \mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*) + \lambda_i\xi^*\delta$, we get similarly to Lemma~\ref{lemma:proximity}:
- \begin{displaymath}
- L^*_{c'}(\alpha) \geq L(\lambda) + \lambda_i\xi^*\delta
- \end{displaymath}
- for any $\lambda$ feasible for \eqref{eq:perturbed-primal}. In particular, for $\lambda^*$ primal optimal for \eqref{eq:perturbed-primal}:
- \begin{displaymath}
- L^*_{c'}(\alpha) \geq L^*_{c}(\alpha) + \alpha\xi^*\delta
- \end{displaymath}
- since $\lambda_i^*\geq \alpha$.
-
- Using the KKT conditions for $(P_{c', \alpha})$, we can write:
- \begin{displaymath}
- \xi^* = \inf_{i:\lambda^{'*}_i>\alpha} \frac{\T{x_i}S(\lambda^{'*})^{-1}x_i}{c_i'}
- \end{displaymath}
- with $\lambda^{'*}$ optimal for $(P_{c', \alpha})$. Since $c_i'\leq 1$, using Lemma~\ref{lemma:derivative-bounds}, we get that $\xi^*\geq \frac{b}{2^n}$, which concludes the proof.
-\end{proof}
-
-\begin{proposition}
- Let $\delta\in(0,1]$. For any $\varepsilon\in(0,1]$, there exists
- a routine which computes an approximate solution $\tilde{L}^*_c$ to
- \eqref{eq:primal} such that:
- \begin{enumerate}
- \item $|\tilde{L}^*_c - L^*_c| \leq \varepsilon$
- \item for all $c' = (c_i', c_{-i})$ with $c_i'\leq c_i-\delta$, $\tilde{L}^*_c \leq \tilde{L}^*_{c'}$
- \item the routine's running time is $O\big(poly(n, d, \log\log\frac{1}{b\varepsilon\delta})\big)$
- \end{enumerate}
-\end{proposition}
-
-\begin{proof}
-Let $\varepsilon$ in $(0, 1]$. The routine works as follows: set $\alpha\defeq
-\varepsilon(\delta + n^2)^{-1}$ and return an approximation $\tilde{L}^*_c$ of
-$L^*_c(\alpha)$ with an accuracy $\frac{1}{2^{n+1}}\alpha\delta b$ computed by
-a standard convex optimization algorithm. Note that this choice of $\alpha$
-implies $\alpha<\frac{1}{n}$ as required.
-
-\begin{enumerate}
- \item using Lemma~\ref{lemma:proximity}:
-\begin{displaymath}
- |\tilde{L}^*_c - L^*_c| \leq |\tilde{L}^*_c - L^*_c(\alpha)| + |L^*_c(\alpha) - L^*_c|
- \leq \alpha\delta + \alpha n^2 = \varepsilon
-\end{displaymath}
-
-\item let $c' = (c_i', c_{-i})$ with $c_i'\leq c_i-\delta$, then:
-\begin{displaymath}
- \tilde{L}^*_{c'} \geq L^*_{c'} - \frac{\alpha\delta b}{2^{n+1}}
- \geq L^*_c + \frac{\alpha\delta b}{2^{n+1}}
- \geq \tilde{L}^*_c
-\end{displaymath}
-where the first and inequality come from the accuracy of the approximation, and
-the inner inequality follows from Lemma~\ref{lemma:monotonicity}.
-
-\item the accuracy of the approximation $\tilde{L}^*_c$ is:
-\begin{displaymath}
- A\defeq\frac{\varepsilon\delta b}{2^{n+1}(\delta + n^2)}
-\end{displaymath}
-\sloppy
-hence, the standard convex optimization algorithm runs in time $O(poly(n, d,\log\log A^{-1}))$. Note that:
-\begin{displaymath}
- \log\log A^{-1} = O\bigg(\log\log\frac{1}{\varepsilon\delta b} + \log n\bigg)
-\end{displaymath}
-which yields the wanted running time for the routine.\qedhere
-\end{enumerate}
-\end{proof}
-\end{document}
diff --git a/main.tex b/main.tex
index d8635db..21260e1 100644
--- a/main.tex
+++ b/main.tex
@@ -61,16 +61,6 @@ following properties:
\item $OPT'_{-i^*}$ must be computable in polynomial time.
\end{itemize}
-Such a quantity can be found by considering relaxations of the
-optimization problem \eqref{eq:opt-reduced}. A function $L:[0, 1]^{n}\to\reals_+$ defined on the hypercube $[0, 1]^{n}$ is a \emph{fractional relaxation} of $V$ over the set
-$\mathcal{N}$ if $L(\id_S) = V(S)$ for all $S\subseteq\mathcal{N}$, where $\id_S$
-denotes the indicator vector of $S$. The optimization program
-\eqref{eq:non-strategic} extends naturally to such relaxations:
-\begin{align}
- OPT' \defeq \max_{\lambda\in[0, 1]^{n}}
- \left\{L(\lambda) \Big| \sum_{i=1}^{n} \lambda_i c_i
- \leq B\right\}\label{relax}
-\end{align}
One of the main technical contributions of \citeN{chen} and
\citeN{singer-influence} is to come up with appropriate such relaxations for
@@ -78,24 +68,13 @@ One of the main technical contributions of \citeN{chen} and
\subsection{Our Approach}
-\sloppy
-We introduce a relaxation $L$ specifically tailored to the value function of
-\SEDP:
-\begin{equation}\label{eq:our-relaxation}
-\forall\,\lambda\in[0,1]^n,\quad L(\lambda) \defeq
-\log\det\left(I_d + \sum_{i\in\mathcal{N}} \lambda_i x_i\T{x_i}\right),
-\end{equation}
-The function $L$ is well-known to be concave and even self-concordant (see
-\emph{e.g.}, \cite{boyd2004convex}). In this case, the analysis of Newton's
-method for self-concordant functions in \cite{boyd2004convex}, shows that
-finding the maximum of $L$ to any precision $\varepsilon$ can be done in
-$O(\log\log\varepsilon^{-1})$ iterations. Being the solution to a maximization
-problem, $OPT'_{-i^*}$ satisfies the required monotonicity property.
+\begin{comment}
The main challenge
will be to prove that $OPT'_{-i^*}$, for our relaxation $L$, is close to
$OPT_{-i^*}$.
We show this by establishing that $L$ is within a constant factor from the so-called multi-linear relaxation of \eqref{modified}, which in turn can be related to \eqref{modified} through pipage rounding. We establish the constant factor to the multi-linear relaxation by bounding the partial derivatives of these two functions; we obtain the latter bound by exploiting convexity properties of matrix functions over the convex cone of positive semidefinite matrices.
+\end{comment}
\begin{algorithm}[t]
\caption{Mechanism for \SEDP{}}\label{mechanism}
diff --git a/paper.tex b/paper.tex
index d9abdf8..f0a6e93 100644
--- a/paper.tex
+++ b/paper.tex
@@ -1,38 +1,36 @@
-\documentclass[prodmode,acmec]{ec-acmsmall}
-%\documentclass{article}
-\acmVolume{X}
-\acmNumber{X}
-\acmArticle{X}
-\acmYear{2013}
-\acmMonth{2}
+\documentclass[11pt]{article}
+\usepackage[vmargin=1.5in]{geometry}
\usepackage[numbers]{natbib}
\usepackage[utf8]{inputenc}
\usepackage{amsmath,amsfonts}
-%\usepackage{amsthm}
-\usepackage{algorithm}
-\usepackage{algpseudocode,bbm,color,verbatim}
+\usepackage{algorithm, algpseudocode}
+\usepackage{bbm,color,verbatim}
+\usepackage[pagebackref=true,breaklinks=true,colorlinks=true]{hyperref}
+\usepackage{amsthm}
\input{definitions}
\title{Budget Feasible Mechanisms for Experimental Design}
\author{
- Thibaut Horel \affil{École Normale Supérieure}
-Stratis Ioannidis \affil{Technicolor}
-S. Muthukrishnan \affil{Rutgers University, Microsoft Research}}
-%Remove permission block empty space
-\makeatletter
-\let\@copyrightspace\relax
-\makeatother
-\markboth{Thibaut Horel, Stratis Ioannidis and S. Muthukrishnan}{Budget Feasible Mechanisms for Experimental Design}
-\begin{document}
+ Thibaut Horel\\École normale supérieure\\\texttt{thibaut.horel@ens.fr}
+ \and
+ Stratis Ioannidis\\Technicolor\\\texttt{stratis.ioannidis@technicolor.com}
+ \and
+ S. Muthukrishnan\\Rutgers University\\\texttt{muthu@cs.rutgers.edu}
+}
+\begin{document}
+\maketitle
\begin{abstract}
\input{abstract}
\end{abstract}
-\maketitle
+\newpage
+
\section{Introduction}
\input{intro}
\section{Preliminaries}\label{sec:peel}
\input{problem}
+\section{Approximation results}
+\input{approximation}
\section{Mechanism for \SEDP{}}
\input{main}
\section{Proofs}\label{sec:proofs}
@@ -41,7 +39,7 @@ S. Muthukrishnan \affil{Rutgers University, Microsoft Research}}
\input{general}
%\section{Conclusion}
%\input{conclusion}
-\bibliographystyle{acmsmall}
+\bibliographystyle{abbrvnat}
\bibliography{notes}
\newpage
\section*{Appendix}
diff --git a/proofs.tex b/proofs.tex
index 0c06d67..6169bb5 100644
--- a/proofs.tex
+++ b/proofs.tex
@@ -31,18 +31,9 @@ The complexity of the mechanism is given by the following lemma.
}
\end{proof}
-Finally, we prove the approximation ratio of the mechanism. We use the
-following lemma which establishes that $OPT'$, the optimal value \eqref{relax}
-of the fractional relaxation $L$ under the budget constraints is not too far
-from $OPT$.
-\begin{lemma}[Approximation]\label{lemma:relaxation}
- $ OPT' \leq 2 OPT
- + 2\max_{i\in\mathcal{N}}V(i)$.
-\end{lemma}
-The proof of Lemma~\ref{lemma:relaxation} is our main technical contribution,
-and can be found in Section \ref{sec:relaxation}.
+Finally, we prove the approximation ratio of the mechanism.
-We also use the following lemma from \cite{chen} which bounds $OPT$ in terms of
+We use the following lemma from \cite{chen} which bounds $OPT$ in terms of
the value of $S_G$, as computed in Algorithm \ref{mechanism}, and $i^*$, the
element of maximum value.
@@ -54,7 +45,7 @@ OPT \leq \frac{e}{e-1}\big( 3 V(S_G) + 2 V(i^*)\big).
\end{displaymath}
\end{lemma}
-Using Lemmas~\ref{lemma:relaxation} and~\ref{lemma:greedy-bound} we can complete the proof of
+Using Proposition~\ref{prop:relaxation} and~\ref{lemma:greedy-bound} we can complete the proof of
Theorem~\ref{thm:main} by showing that, for any $\varepsilon > 0$, if
$OPT_{-i}'$, the optimal value of $L$ when $i^*$ is excluded from
$\mathcal{N}$, has been computed to a precision $\varepsilon$, then the set
@@ -83,7 +74,7 @@ hence,
\end{equation}
If the condition does not hold, by observing that $OPT'_{-i^*}\leq OPT'$ and
-applying Lemma~\ref{lemma:relaxation}, we get
+applying Proposition~\ref{prop:relaxation}, we get
\begin{displaymath}
V(i^*) \stackrel{}\leq \frac{1}{C}OPT_{-i^*}' + \frac{\varepsilon}{C}
\leq \frac{1}{C} \big(2 OPT + 2 V(i^*)\big) + \frac{\varepsilon}{C}.
@@ -120,316 +111,6 @@ Placing the expression of $C$ in \eqref{eq:bound1} and \eqref{eq:bound2}
gives the approximation ratio in \eqref{approxbound}, and concludes the proof
of Theorem~\ref{thm:main}.\hspace*{\stretch{1}}\qed
-\subsection{Proof of Lemma~\ref{lemma:relaxation}}\label{sec:relaxation}
-
-We need to prove that for our relaxation $L$ given by
-\eqref{eq:our-relaxation}, $OPT'$ is close to $OPT$ as stated in
-Lemma~\ref{lemma:relaxation}. Our analysis follows the \emph{pipage rounding}
-framework of \citeN{pipage}.
-
-This framework uses the \emph{multi-linear} extension $F$ of the submodular
-function $V$. Let $P_\mathcal{N}^\lambda(S)$ be the probability of choosing the set $S$ if we select each element $i$ in $\mathcal{N}$ independently with probability $\lambda_i$:
-\begin{displaymath}
- P_\mathcal{N}^\lambda(S) \defeq \prod_{i\in S} \lambda_i
- \prod_{i\in\mathcal{N}\setminus S}( 1 - \lambda_i).
-\end{displaymath}
-Then, the \emph{multi-linear} extension $F$ is defined by:
-\begin{displaymath}
- F(\lambda)
- \defeq \mathbb{E}_{S\sim P_\mathcal{N}^\lambda}\big[V(S)\big]
- = \sum_{S\subseteq\mathcal{N}} P_\mathcal{N}^\lambda(S) V(S)
-\end{displaymath}
-
-For \EDP{} the multi-linear extension can be written:
-\begin{equation}\label{eq:multi-linear-logdet}
- F(\lambda) = \mathbb{E}_{S\sim
- P_\mathcal{N}^\lambda}\bigg[\log\det \big(I_d + \sum_{i\in S} x_i\T{x_i}\big) \Big].
-\end{equation}
-Note that the relaxation $L$ that we introduced in \eqref{eq:our-relaxation},
-follows naturally from the \emph{multi-linear} relaxation by swapping the
-expectation and the $\log\det$ in \eqref{eq:multi-linear-logdet}:
-\begin{displaymath}
- L(\lambda) = \log\det\left(\mathbb{E}_{S\sim
- P_\mathcal{N}^\lambda}\bigg[I_d + \sum_{i\in S} x_i\T{x_i} \bigg]\right).
-\end{displaymath}
-
-The proof proceeds as follows:
-\begin{itemize}
-\item First, we prove that $F$ admits the following rounding property: let
-$\lambda$ be a feasible element of $[0,1]^n$, it is possible to trade one
-fractional component of $\lambda$ for another until one of them becomes
-integral, obtaining a new element $\tilde{\lambda}$ which is both feasible and
-for which $F(\tilde{\lambda})\geq F(\lambda)$. Here, by feasibility of a point
-$\lambda$, we mean that it satisfies the budget constraint $\sum_{i=1}^n
-\lambda_i c_i \leq B$. This rounding property is referred to in the literature
-as \emph{cross-convexity} (see, \emph{e.g.}, \cite{dughmi}), or
-$\varepsilon$-convexity by \citeN{pipage}. This is stated and proven in
-Lemma~\ref{lemma:rounding} and allows us to bound $F$ in terms of $OPT$.
-\item Next, we prove the central result of bounding $L$ appropriately in terms
-of the multi-linear relaxation $F$ (Lemma \ref{lemma:relaxation-ratio}).
-\item Finally, we conclude the proof of Lemma~\ref{lemma:relaxation} by
-combining Lemma~\ref{lemma:rounding} and Lemma~\ref{lemma:relaxation-ratio}.
-\end{itemize}
-
-\begin{comment}
-Formally, if we define:
-\begin{displaymath}
- \tilde{F}_\lambda(\varepsilon) \defeq F\big(\lambda + \varepsilon(e_i
- - e_j)\big)
-\end{displaymath}
-where $e_i$ and $e_j$ are two vectors of the standard basis of
-$\reals^{n}$, then $\tilde{F}_\lambda$ is convex. Hence its maximum over the interval:
-\begin{displaymath}
- I_\lambda = \Big[\max(-\lambda_i,\lambda_j-1), \min(1-\lambda_i, \lambda_j)\Big]
-\end{displaymath}
-is attained at one of the boundaries of $I_\lambda$ for which one of the $i$-th
-or the $j$-th component of $\lambda$ becomes integral.
-\end{comment}
-
-\begin{lemma}[Rounding]\label{lemma:rounding}
- For any feasible $\lambda\in[0,1]^{n}$, there exists a feasible
- $\bar{\lambda}\in[0,1]^{n}$ such that at most one of its components is
- fractional %, that is, lies in $(0,1)$ and:
- and $F_{\mathcal{N}}(\lambda)\leq F_{\mathcal{N}}(\bar{\lambda})$.
-\end{lemma}
-\begin{proof}
- We give a rounding procedure which, given a feasible $\lambda$ with at least
- two fractional components, returns some feasible $\lambda'$ with one less fractional
- component such that $F(\lambda) \leq F(\lambda')$.
-
- Applying this procedure recursively yields the lemma's result.
- Let us consider such a feasible $\lambda$. Let $i$ and $j$ be two
- fractional components of $\lambda$ and let us define the following
- function:
- \begin{displaymath}
- F_\lambda(\varepsilon) = F(\lambda_\varepsilon)
- \quad\textrm{where} \quad
- \lambda_\varepsilon = \lambda + \varepsilon\left(e_i-\frac{c_i}{c_j}e_j\right)
- \end{displaymath}
- It is easy to see that if $\lambda$ is feasible, then:
- \begin{equation}\label{eq:convex-interval}
- \forall\varepsilon\in\Big[\max\Big(-\lambda_i,(\lambda_j-1)\frac{c_j}{c_i}\Big), \min\Big(1-\lambda_i, \lambda_j
- \frac{c_j}{c_i}\Big)\Big],\;
- \lambda_\varepsilon\;\;\textrm{is feasible}
- \end{equation}
- Furthermore, the function $F_\lambda$ is convex; indeed:
- \begin{align*}
- F_\lambda(\varepsilon)
- & = \mathbb{E}_{S'\sim P_{\mathcal{N}\setminus\{i,j\}}^\lambda(S')}\Big[
- (\lambda_i+\varepsilon)\Big(\lambda_j-\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{i,j\})\\
- & + (\lambda_i+\varepsilon)\Big(1-\lambda_j+\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{i\})
- + (1-\lambda_i-\varepsilon)\Big(\lambda_j-\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{j\})\\
- & + (1-\lambda_i-\varepsilon)\Big(1-\lambda_j+\varepsilon\frac{c_i}{c_j}\Big)V(S')\Big]
- \end{align*}
- Thus, $F_\lambda$ is a degree 2 polynomial whose dominant coefficient is:
- \begin{displaymath}
- \frac{c_i}{c_j}\mathbb{E}_{S'\sim
- P_{\mathcal{N}\setminus\{i,j\}}^\lambda(S')}\Big[
- V(S'\cup\{i\})+V(S'\cup\{i\})\\
- -V(S'\cup\{i,j\})-V(S')\Big]
- \end{displaymath}
- which is positive by submodularity of $V$. Hence, the maximum of
- $F_\lambda$ over the interval given in \eqref{eq:convex-interval} is
- attained at one of its limit, at which either the $i$-th or $j$-th component of
- $\lambda_\varepsilon$ becomes integral.
-\end{proof}
-
-
-\begin{lemma}\label{lemma:relaxation-ratio}
- % The following inequality holds:
-For all $\lambda\in[0,1]^{n},$
- %\begin{displaymath}
- $ \frac{1}{2}
- \,L(\lambda)\leq
- F(\lambda)\leq L(\lambda)$.
- %\end{displaymath}
-\end{lemma}
-\begin{proof}
- The bound $F_{\mathcal{N}}(\lambda)\leq L_{\mathcal{N}(\lambda)}$ follows by the concavity of the $\log\det$ function.
- To show the lower bound,
- we first prove that $\frac{1}{2}$ is a lower bound of the ratio $\partial_i
- F(\lambda)/\partial_i L(\lambda)$, where
- $\partial_i\, \cdot$ denotes the partial derivative with respect to the
- $i$-th variable.
-
- Let us start by computing the derivatives of $F$ and
- $L$ with respect to the $i$-th component.
- Observe that
- \begin{displaymath}
- \partial_i P_\mathcal{N}^\lambda(S) = \left\{
- \begin{aligned}
- & P_{\mathcal{N}\setminus\{i\}}^\lambda(S\setminus\{i\})\;\textrm{if}\;
- i\in S, \\
- & - P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\;\textrm{if}\;
- i\in \mathcal{N}\setminus S. \\
- \end{aligned}\right.
- \end{displaymath}
- Hence,
- \begin{displaymath}
- \partial_i F(\lambda) =
- \sum_{\substack{S\subseteq\mathcal{N}\\ i\in S}}
- P_{\mathcal{N}\setminus\{i\}}^\lambda(S\setminus\{i\})V(S)
- - \sum_{\substack{S\subseteq\mathcal{N}\\ i\in \mathcal{N}\setminus S}}
- P_{\mathcal{N}\setminus\{i\}}^\lambda(S)V(S).
- \end{displaymath}
- Now, using that every $S$ such that $i\in S$ can be uniquely written as
- $S'\cup\{i\}$, we can write:
- \begin{displaymath}
- \partial_i F(\lambda) =
- \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
- P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\big(V(S\cup\{i\})
- - V(S)\big).
- \end{displaymath}
- The marginal contribution of $i$ to
- $S$ can be written as
-\begin{align*}
-V(S\cup \{i\}) - V(S)& = \frac{1}{2}\log\det(I_d
- + \T{X_S}X_S + x_i\T{x_i})
- - \frac{1}{2}\log\det(I_d + \T{X_S}X_S)\\
- & = \frac{1}{2}\log\det(I_d + x_i\T{x_i}(I_d +
-\T{X_S}X_S)^{-1})
- = \frac{1}{2}\log(1 + \T{x_i}A(S)^{-1}x_i)
-\end{align*}
-where $A(S) \defeq I_d+ \T{X_S}X_S$, and the last equality follows from the
-Sylvester's determinant identity~\cite{sylvester}.
-% $ V(S\cup\{i\}) - V(S) = \frac{1}{2}\log\left(1 + \T{x_i} A(S)^{-1}x_i\right)$.
-Using this,
- \begin{displaymath}
- \partial_i F(\lambda) = \frac{1}{2}
- \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
- P_{\mathcal{N}\setminus\{i\}}^\lambda(S)
- \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big)
- \end{displaymath}
- The computation of the derivative of $L$ uses standard matrix
- calculus: writing $\tilde{A}(\lambda) = I_d+\sum_{i\in
- \mathcal{N}}\lambda_ix_i\T{x_i}$,
- \begin{displaymath}
- \det \tilde{A}(\lambda + h\cdot e_i) = \det\big(\tilde{A}(\lambda)
- + hx_i\T{x_i}\big)
- =\det \tilde{A}(\lambda)\big(1+
- h\T{x_i}\tilde{A}(\lambda)^{-1}x_i\big).
- \end{displaymath}
- Hence,
- \begin{displaymath}
- \log\det\tilde{A}(\lambda + h\cdot e_i)= \log\det\tilde{A}(\lambda)
- + h\T{x_i}\tilde{A}(\lambda)^{-1}x_i + o(h),
- \end{displaymath}
- which implies
- \begin{displaymath}
- \partial_i L(\lambda)
- =\frac{1}{2} \T{x_i}\tilde{A}(\lambda)^{-1}x_i.
- \end{displaymath}
-
-For two symmetric matrices $A$ and $B$, we write $A\succ B$ ($A\succeq B$) if
-$A-B$ is positive definite (positive semi-definite). This order allows us to
-define the notion of a \emph{decreasing} as well as \emph{convex} matrix
-function, similarly to their real counterparts. With this definition, matrix
-inversion is decreasing and convex over symmetric positive definite
-matrices (see Example 3.48 p. 110 in \cite{boyd2004convex}).
-In particular,
-\begin{gather*}
- \forall S\subseteq\mathcal{N},\quad A(S)^{-1} \succeq A(S\cup\{i\})^{-1}
-\end{gather*}
-as $A(S)\preceq A(S\cup\{i\})$. Observe that
- % \begin{gather*}
- % \forall
-$P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\geq P_{\mathcal{N}\setminus\{i\}}^\lambda(S\cup\{i\})$ for all $S\subseteq\mathcal{N}\setminus\{i\}$, and
- % ,\\
- $P_{\mathcal{N}\setminus\{i\}}^\lambda(S) \geq P_\mathcal{N}^\lambda(S),$ for all $S\subseteq\mathcal{N}$.
- %\end{gather*}
- Hence,
- \begin{align*}
- \partial_i F(\lambda)
- % & = \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}} P_\mathcal{N}^\lambda(S) \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big)\\
- & \geq \frac{1}{4}
- \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
- P_{\mathcal{N}\setminus\{i\}}^\lambda(S)
- \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big)\\
- &\hspace{-3.5em}+\frac{1}{4}
- \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
- P_{\mathcal{N}\setminus\{i\}}^\lambda(S\cup\{i\})
- \log\Big(1 + \T{x_i}A(S\cup\{i\})^{-1}x_i\Big)\\
- &\geq \frac{1}{4}
- \sum_{S\subseteq\mathcal{N}}
- P_\mathcal{N}^\lambda(S)
- \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big).
- \end{align*}
- Using that $A(S)\succeq I_d$ we get that $\T{x_i}A(S)^{-1}x_i \leq
- \norm{x_i}_2^2 \leq 1$. Moreover, $\log(1+x)\geq x$ for all $x\leq 1$.
- Hence,
- \begin{displaymath}
- \partial_i F(\lambda) \geq
- \frac{1}{4}
- \T{x_i}\bigg(\sum_{S\subseteq\mathcal{N}}P_\mathcal{N}^\lambda(S)A(S)^{-1}\bigg)x_i.
- \end{displaymath}
- Finally, using that the inverse is a matrix convex function over symmetric
- positive definite matrices:
- \begin{displaymath}
- \partial_i F(\lambda) \geq
- \frac{1}{4}
- \T{x_i}\bigg(\sum_{S\subseteq\mathcal{N}}P_\mathcal{N}^\lambda(S)A(S)\bigg)^{-1}x_i
- = \frac{1}{4}\T{x_i}\tilde{A}(\lambda)^{-1}x_i
- = \frac{1}{2}
- \partial_i L(\lambda).
- \end{displaymath}
-
-Having bound the ratio between the partial derivatives, we now bound the ratio $F(\lambda)/L(\lambda)$ from below. Consider the following cases.
- First, if the minimum of the ratio
- $F(\lambda)/L(\lambda)$ is attained at a point interior to the hypercube, then it is
- a critical point, \emph{i.e.}, $\partial_i \big(F(\lambda)/L(\lambda)\big)=0$ for all $i\in \mathcal{N}$; hence, at such a critical point:
- \begin{equation}\label{eq:lhopital}
- \frac{F(\lambda)}{L(\lambda)}
- = \frac{\partial_i F(\lambda)}{\partial_i
- L(\lambda)} \geq \frac{1}{2}.
- \end{equation}
- Second, if the minimum is attained as
- $\lambda$ converges to zero in, \emph{e.g.}, the $l_2$ norm, by the Taylor approximation, one can write:
- \begin{displaymath}
- \frac{F(\lambda)}{L(\lambda)}
- \sim_{\lambda\rightarrow 0}
- \frac{\sum_{i\in \mathcal{N}}\lambda_i\partial_i F(0)}
- {\sum_{i\in\mathcal{N}}\lambda_i\partial_i L(0)}
- \geq \frac{1}{2},
- \end{displaymath}
- \emph{i.e.}, the ratio $\frac{F(\lambda)}{L(\lambda)}$ is necessarily bounded from below by 1/2 for small enough $\lambda$.
- Finally, if the minimum is attained on a face of the hypercube $[0,1]^n$ (a face is
- defined as a subset of the hypercube where one of the variable is fixed to
- 0 or 1), without loss of generality, we can assume that the minimum is
- attained on the face where the $n$-th variable has been fixed
- to 0 or 1. Then, either the minimum is attained at a point interior to the
- face or on a boundary of the face. In the first sub-case, relation
- \eqref{eq:lhopital} still characterizes the minimum for $i< n$.
- In the second sub-case, by repeating the argument again by induction, we see
- that all is left to do is to show that the bound holds for the vertices of
- the cube (the faces of dimension 1). The vertices are exactly the binary
- points, for which we know that both relaxations are equal to the value
- function $V$. Hence, the ratio is equal to 1 on the vertices.
-\end{proof}
-
-To conclude the proof of Lemma~\ref{lemma:relaxation}, let us consider
-a feasible point $\lambda^*\in[0,1]^{n}$ such that $L(\lambda^*) = OPT'$. By
-applying Lemma~\ref{lemma:relaxation-ratio} and Lemma~\ref{lemma:rounding} we
-get a feasible point $\bar{\lambda}$ with at most one fractional component such
-that
-\begin{equation}\label{eq:e1}
- L(\lambda^*) \leq 2 F(\bar{\lambda}).
-\end{equation}
- Let $\lambda_i$ denote the fractional component of $\bar{\lambda}$ and $S$
- denote the set whose indicator vector is $\bar{\lambda} - \lambda_i e_i$.
- By definition of the multi-linear extension $F$:
- \begin{displaymath}
- F(\bar{\lambda}) = (1-\lambda_i)V(S) +\lambda_i V(S\cup\{i\}).
- \end{displaymath}
- By submodularity of $V$, $V(S\cup\{i\})\leq V(S) + V(\{i\})$. Hence,
- \begin{displaymath}
- F(\bar{\lambda}) \leq V(S) + V(i).
- \end{displaymath}
- Note that since $\bar{\lambda}$ is feasible, $S$ is also feasible and
- $V(S)\leq OPT$. Hence,
- \begin{equation}\label{eq:e2}
- F(\bar{\lambda}) \leq OPT + \max_{i\in\mathcal{N}} V(i).
- \end{equation}
- Together, \eqref{eq:e1} and \eqref{eq:e2} imply the lemma. \hspace*{\stretch{1}}\qed
\subsection{Proof of Theorem \ref{thm:lowerbound}}