From 07d48e21fb6fc62b1a85b9d80c25560529a9a0b5 Mon Sep 17 00:00:00 2001
From: Thibaut Horel <thibaut.horel@gmail.com>
Date: Fri, 28 Jun 2013 00:16:44 +0200
Subject: Moving the proofs to the appendix, improving the flow

---
 appendix.tex      | 669 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 approximation.tex | 602 ++++++------------------------------------------
 definitions.tex   |   2 +-
 intro.tex         |   2 +-
 main.tex          |  19 +-
 paper.tex         |   8 +-
 problem.tex       |  47 ++--
 proofs.tex        | 122 ----------
 8 files changed, 760 insertions(+), 711 deletions(-)

diff --git a/appendix.tex b/appendix.tex
index 2c83e04..dd643d7 100644
--- a/appendix.tex
+++ b/appendix.tex
@@ -1,6 +1,558 @@
+\subsection{Proof of Proposition~\ref{prop:relaxation}}
+
+\begin{lemma}\label{lemma:relaxation-ratio}
+For all $\lambda\in[0,1]^{n},$
+     $           \frac{1}{2}
+        \,L(\lambda)\leq
+        F(\lambda)\leq L(\lambda)$.
+\end{lemma}
+
+\begin{proof}
+    The bound $F_{\mathcal{N}}(\lambda)\leq L_{\mathcal{N}(\lambda)}$ follows by the concavity of the $\log\det$ function.
+    To show the lower bound, 
+    we first prove that $\frac{1}{2}$ is a lower bound of the ratio $\partial_i
+    F(\lambda)/\partial_i L(\lambda)$, where
+    $\partial_i\, \cdot$ denotes the partial derivative with respect to the
+    $i$-th variable.  
+
+   Let us start by computing the derivatives of $F$ and
+    $L$ with respect to the $i$-th component.
+    Observe that
+    \begin{displaymath}
+        \partial_i P_\mathcal{N}^\lambda(S) = \left\{
+            \begin{aligned}
+                & P_{\mathcal{N}\setminus\{i\}}^\lambda(S\setminus\{i\})\;\textrm{if}\;
+                i\in S, \\
+                & - P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\;\textrm{if}\;
+                i\in \mathcal{N}\setminus S. \\
+            \end{aligned}\right.
+    \end{displaymath}
+    Hence,
+    \begin{displaymath}
+        \partial_i F(\lambda) =
+        \sum_{\substack{S\subseteq\mathcal{N}\\ i\in S}}
+        P_{\mathcal{N}\setminus\{i\}}^\lambda(S\setminus\{i\})V(S)
+        - \sum_{\substack{S\subseteq\mathcal{N}\\ i\in \mathcal{N}\setminus S}}
+        P_{\mathcal{N}\setminus\{i\}}^\lambda(S)V(S).
+    \end{displaymath}
+    Now, using that every $S$ such that $i\in S$ can be uniquely written as
+    $S'\cup\{i\}$, we can write:
+    \begin{displaymath}
+        \partial_i F(\lambda) =
+        \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
+        P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\big(V(S\cup\{i\})
+        - V(S)\big).
+    \end{displaymath}
+    The marginal contribution of $i$ to
+    $S$ can be written as 
+\begin{align*}
+V(S\cup \{i\}) - V(S)& =   \frac{1}{2}\log\det(I_d 
+    + \T{X_S}X_S + x_i\T{x_i})
+    - \frac{1}{2}\log\det(I_d + \T{X_S}X_S)\\
+    &  = \frac{1}{2}\log\det(I_d + x_i\T{x_i}(I_d +
+\T{X_S}X_S)^{-1})
+ = \frac{1}{2}\log(1 + \T{x_i}A(S)^{-1}x_i)
+\end{align*}
+where $A(S) \defeq I_d+ \T{X_S}X_S$, and the last equality follows from the
+Sylvester's determinant identity~\cite{sylvester}.
+%  $  V(S\cup\{i\}) - V(S) = \frac{1}{2}\log\left(1 +  \T{x_i} A(S)^{-1}x_i\right)$.
+Using this,
+    \begin{displaymath}
+        \partial_i F(\lambda) = \frac{1}{2}
+        \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
+        P_{\mathcal{N}\setminus\{i\}}^\lambda(S)
+        \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big)
+    \end{displaymath}
+     The computation of the derivative of $L$ uses standard matrix
+    calculus: writing $\tilde{A}(\lambda) = I_d+\sum_{i\in
+    \mathcal{N}}\lambda_ix_i\T{x_i}$,
+    \begin{displaymath}
+        \det \tilde{A}(\lambda + h\cdot e_i) = \det\big(\tilde{A}(\lambda)
+        + hx_i\T{x_i}\big)
+         =\det \tilde{A}(\lambda)\big(1+
+        h\T{x_i}\tilde{A}(\lambda)^{-1}x_i\big).
+    \end{displaymath}
+    Hence,
+    \begin{displaymath}
+       \log\det\tilde{A}(\lambda + h\cdot e_i)= \log\det\tilde{A}(\lambda)
+        + h\T{x_i}\tilde{A}(\lambda)^{-1}x_i + o(h),
+    \end{displaymath}
+    which implies
+    \begin{displaymath}
+        \partial_i L(\lambda)
+        =\frac{1}{2} \T{x_i}\tilde{A}(\lambda)^{-1}x_i.
+    \end{displaymath}
+
+For two symmetric matrices $A$ and $B$, we write $A\succ B$ ($A\succeq B$) if
+$A-B$ is positive definite (positive semi-definite).  This order allows us to
+define the notion of a \emph{decreasing} as well as  \emph{convex} matrix
+function, similarly to their real counterparts. With this definition, matrix
+inversion is decreasing and convex over symmetric positive definite
+matrices (see Example 3.48 p. 110 in \cite{boyd2004convex}).
+In particular,
+\begin{gather*}
+        \forall S\subseteq\mathcal{N},\quad A(S)^{-1} \succeq A(S\cup\{i\})^{-1}
+\end{gather*}
+as $A(S)\preceq A(S\cup\{i\})$. Observe that
+ %   \begin{gather*}
+ %       \forall 
+$P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\geq P_{\mathcal{N}\setminus\{i\}}^\lambda(S\cup\{i\})$ for all $S\subseteq\mathcal{N}\setminus\{i\}$, and
+       % ,\\
+        $P_{\mathcal{N}\setminus\{i\}}^\lambda(S) \geq P_\mathcal{N}^\lambda(S),$ for all $S\subseteq\mathcal{N}$.
+ %\end{gather*}
+    Hence,
+    \begin{align*}
+        \partial_i F(\lambda) 
+      %  & = \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}} P_\mathcal{N}^\lambda(S) \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big)\\
+        & \geq \frac{1}{4}
+        \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
+        P_{\mathcal{N}\setminus\{i\}}^\lambda(S)
+        \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big)\\
+        &\hspace{-3.5em}+\frac{1}{4}
+        \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
+        P_{\mathcal{N}\setminus\{i\}}^\lambda(S\cup\{i\})
+        \log\Big(1 + \T{x_i}A(S\cup\{i\})^{-1}x_i\Big)\\
+        &\geq \frac{1}{4}
+        \sum_{S\subseteq\mathcal{N}}
+        P_\mathcal{N}^\lambda(S)
+        \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big).
+    \end{align*}
+    Using that $A(S)\succeq I_d$ we get that $\T{x_i}A(S)^{-1}x_i \leq
+    \norm{x_i}_2^2 \leq 1$. Moreover, $\log(1+x)\geq x$ for all $x\leq 1$.
+    Hence,
+    \begin{displaymath}
+        \partial_i F(\lambda) \geq
+        \frac{1}{4}
+        \T{x_i}\bigg(\sum_{S\subseteq\mathcal{N}}P_\mathcal{N}^\lambda(S)A(S)^{-1}\bigg)x_i.
+    \end{displaymath}
+    Finally, using that the inverse is a matrix convex function over symmetric
+    positive definite matrices:
+    \begin{displaymath}
+        \partial_i F(\lambda) \geq
+        \frac{1}{4}
+        \T{x_i}\bigg(\sum_{S\subseteq\mathcal{N}}P_\mathcal{N}^\lambda(S)A(S)\bigg)^{-1}x_i
+        = \frac{1}{4}\T{x_i}\tilde{A}(\lambda)^{-1}x_i
+         = \frac{1}{2}
+     \partial_i L(\lambda).
+    \end{displaymath}
+
+Having bound the ratio between the partial derivatives, we now bound the ratio $F(\lambda)/L(\lambda)$ from below. Consider the following cases.
+    First, if the minimum of the ratio
+    $F(\lambda)/L(\lambda)$ is attained at a point interior to the hypercube, then it is
+    a critical point, \emph{i.e.}, $\partial_i \big(F(\lambda)/L(\lambda)\big)=0$ for all $i\in \mathcal{N}$; hence, at such a critical point:
+    \begin{equation}\label{eq:lhopital}
+        \frac{F(\lambda)}{L(\lambda)}
+        = \frac{\partial_i F(\lambda)}{\partial_i
+        L(\lambda)} \geq \frac{1}{2}.
+    \end{equation}
+    Second, if the minimum is attained as 
+    $\lambda$ converges to zero in, \emph{e.g.}, the $l_2$ norm, by the Taylor approximation, one can write:
+    \begin{displaymath}
+        \frac{F(\lambda)}{L(\lambda)}
+        \sim_{\lambda\rightarrow 0}
+        \frac{\sum_{i\in \mathcal{N}}\lambda_i\partial_i F(0)}
+        {\sum_{i\in\mathcal{N}}\lambda_i\partial_i L(0)}
+        \geq \frac{1}{2},
+    \end{displaymath}
+    \emph{i.e.}, the ratio $\frac{F(\lambda)}{L(\lambda)}$ is necessarily bounded from below by 1/2 for small enough $\lambda$. 
+       Finally, if the minimum is attained on a face of the hypercube $[0,1]^n$ (a face is
+    defined as a subset of the hypercube where one of the variable is fixed to
+    0 or 1), without loss of generality, we can assume that the minimum is
+    attained on the face where the $n$-th variable has been fixed
+    to 0 or 1. Then, either the minimum is attained at a point interior to the
+    face or on a boundary of the face. In the first sub-case, relation
+    \eqref{eq:lhopital} still characterizes the minimum for $i< n$.
+    In the second sub-case, by repeating the argument again by induction, we see
+    that all is left to do is to show that the bound holds for the vertices of
+    the cube (the faces of dimension 1).  The vertices are exactly the binary
+    points, for which we know that both relaxations are equal to the value
+    function $V$. Hence, the ratio is equal to 1 on the vertices.
+\end{proof}
+
+We now prove that $F$ admits the following exchange property: let
+$\lambda$ be a feasible element of $[0,1]^n$, it is possible to trade one
+fractional component of $\lambda$ for another until one of them becomes
+integral, obtaining a new element $\tilde{\lambda}$ which is both feasible and
+for which $F(\tilde{\lambda})\geq F(\lambda)$. Here, by feasibility of a point
+$\lambda$, we mean that it satisfies the budget constraint $\sum_{i=1}^n
+\lambda_i c_i \leq B$.  This rounding property is referred to in the literature
+as \emph{cross-convexity} (see, \emph{e.g.}, \cite{dughmi}), or
+$\varepsilon$-convexity by \citeN{pipage}.
+
+\begin{lemma}[Rounding]\label{lemma:rounding}
+    For any feasible $\lambda\in[0,1]^{n}$, there exists a feasible
+    $\bar{\lambda}\in[0,1]^{n}$ such that at most one of its components is
+    fractional %, that is, lies in $(0,1)$ and:
+     and $F_{\mathcal{N}}(\lambda)\leq F_{\mathcal{N}}(\bar{\lambda})$.
+\end{lemma}
+
+\begin{proof}
+    We give a rounding procedure which, given a feasible $\lambda$ with at least
+    two fractional components, returns some feasible $\lambda'$ with one less fractional
+    component such that $F(\lambda) \leq F(\lambda')$.
+
+    Applying this procedure recursively yields the lemma's result.
+    Let us consider such a feasible $\lambda$. Let $i$ and $j$ be two
+    fractional components of $\lambda$ and let us define the following
+    function:
+    \begin{displaymath}
+        F_\lambda(\varepsilon) = F(\lambda_\varepsilon)
+        \quad\textrm{where} \quad
+        \lambda_\varepsilon = \lambda + \varepsilon\left(e_i-\frac{c_i}{c_j}e_j\right)
+    \end{displaymath}
+    It is easy to see that if $\lambda$ is feasible, then:
+    \begin{equation}\label{eq:convex-interval}
+        \forall\varepsilon\in\Big[\max\Big(-\lambda_i,(\lambda_j-1)\frac{c_j}{c_i}\Big), \min\Big(1-\lambda_i, \lambda_j
+        \frac{c_j}{c_i}\Big)\Big],\;
+            \lambda_\varepsilon\;\;\textrm{is feasible}
+    \end{equation}
+    Furthermore, the function $F_\lambda$ is convex; indeed:
+    \begin{align*}
+        F_\lambda(\varepsilon)
+        & = \mathbb{E}_{S'\sim P_{\mathcal{N}\setminus\{i,j\}}^\lambda(S')}\Big[
+        (\lambda_i+\varepsilon)\Big(\lambda_j-\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{i,j\})\\
+        & + (\lambda_i+\varepsilon)\Big(1-\lambda_j+\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{i\})
+         + (1-\lambda_i-\varepsilon)\Big(\lambda_j-\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{j\})\\
+         & + (1-\lambda_i-\varepsilon)\Big(1-\lambda_j+\varepsilon\frac{c_i}{c_j}\Big)V(S')\Big]
+    \end{align*}
+    Thus, $F_\lambda$ is a degree 2 polynomial whose dominant coefficient is:
+    \begin{displaymath}
+        \frac{c_i}{c_j}\mathbb{E}_{S'\sim
+        P_{\mathcal{N}\setminus\{i,j\}}^\lambda(S')}\Big[
+            V(S'\cup\{i\})+V(S'\cup\{i\})\\
+        -V(S'\cup\{i,j\})-V(S')\Big]
+    \end{displaymath}
+    which is positive by submodularity of $V$. Hence, the maximum of
+    $F_\lambda$ over the interval given in \eqref{eq:convex-interval} is
+    attained at one of its limit, at which either the $i$-th or $j$-th component of
+    $\lambda_\varepsilon$ becomes integral.
+\end{proof}
+
+\subsubsection*{End of the proof of Proposition~\ref{prop:relaxation}}
+
+Let us consider a feasible point $\lambda^*\in[0,1]^{n}$ such that
+$L(\lambda^*) = L^*_c$. By applying Lemma~\ref{lemma:relaxation-ratio} and
+Lemma~\ref{lemma:rounding} we get a feasible point $\bar{\lambda}$ with at most
+one fractional component such that
+\begin{equation}\label{eq:e1}
+    L(\lambda^*) \leq 2 F(\bar{\lambda}).
+\end{equation}
+    Let $\lambda_i$ denote the fractional component of $\bar{\lambda}$ and $S$
+    denote the set whose indicator vector is $\bar{\lambda} - \lambda_i e_i$.
+    By definition of the multi-linear extension $F$:
+    \begin{displaymath}
+        F(\bar{\lambda}) = (1-\lambda_i)V(S) +\lambda_i V(S\cup\{i\}).
+    \end{displaymath}
+    By submodularity of $V$, $V(S\cup\{i\})\leq V(S) + V(\{i\})$. Hence,
+    \begin{displaymath}
+        F(\bar{\lambda}) \leq V(S) + V(i).
+    \end{displaymath}
+    Note that since $\bar{\lambda}$ is feasible, $S$ is also feasible and
+    $V(S)\leq OPT$. Hence,
+    \begin{equation}\label{eq:e2}
+        F(\bar{\lambda}) \leq  OPT + \max_{i\in\mathcal{N}} V(i).
+    \end{equation}
+Together, \eqref{eq:e1} and \eqref{eq:e2} imply the proposition.\qedhere
+
+\begin{proof}[Proof of Lemma~\ref{lemma:derivative-bounds}]
+    Let us define:
+    \begin{displaymath}
+        S(\lambda)\defeq I_d + \sum_{i=1}^n \lambda_i x_i\T{x_i}
+        \quad\mathrm{and}\quad
+        S_k \defeq I_d + \sum_{i=1}^k x_i\T{x_i}
+    \end{displaymath}
+
+    We have $\partial_i L(\lambda) = \T{x_i}S(\lambda)^{-1}x_i$. Since
+    $S(\lambda)\geq I_d$, $\partial_i L(\lambda)\leq \T{x_i}x_i \leq 1$, which
+    is the right-hand side  of the lemma.
+
+    For the left-hand side, note that $S(\lambda) \leq S_n$. Hence
+    $\partial_iL(\lambda)\geq \T{x_i}S_n^{-1}x_i$.
+
+    Using the Sherman-Morrison formula, for all $k\geq 1$:
+    \begin{displaymath}
+        \T{x_i}S_k^{-1} x_i = \T{x_i}S_{k-1}^{-1}x_i 
+        - \frac{(\T{x_i}S_{k-1}^{-1}x_k)^2}{1+\T{x_k}S_{k-1}^{-1}x_k}
+    \end{displaymath}
+
+    By the Cauchy-Schwarz inequality:
+    \begin{displaymath}
+        (\T{x_i}S_{k-1}^{-1}x_k)^2 \leq \T{x_i}S_{k-1}^{-1}x_i\;\T{x_k}S_{k-1}^{-1}x_k
+    \end{displaymath}
+
+    Hence:
+    \begin{displaymath}
+        \T{x_i}S_k^{-1} x_i \geq \T{x_i}S_{k-1}^{-1}x_i 
+        - \T{x_i}S_{k-1}^{-1}x_i\frac{\T{x_k}S_{k-1}^{-1}x_k}{1+\T{x_k}S_{k-1}^{-1}x_k}
+    \end{displaymath}
+
+    But $\T{x_k}S_{k-1}^{-1}x_k\leq 1$ and $\frac{a}{1+a}\leq \frac{1}{2}$ if
+    $0\leq a\leq 1$, so:
+    \begin{displaymath}
+        \T{x_i}S_{k}^{-1}x_i \geq \T{x_i}S_{k-1}^{-1}x_i
+        - \frac{1}{2}\T{x_i}S_{k-1}^{-1}x_i\geq \frac{\T{x_i}S_{k-1}^{-1}x_i}{2}
+    \end{displaymath}
+
+    By induction:
+    \begin{displaymath}
+        \T{x_i}S_n^{-1} x_i \geq \frac{\T{x_i}x_i}{2^n}
+    \end{displaymath}
+    
+    Using that $\T{x_i}{x_i}\geq b$ concludes the proof of the left-hand side
+    of the lemma's inequality.
+\end{proof}
+
+\subsection{Proof of Proposition~\ref{prop:monotonicity}}
+
+The $\log\det$ function is concave and self-concordant (see
+\cite{boyd2004convex}), in this case, the analysis of the barrier method in
+in \cite{boyd2004convex} (Section 11.5.5) can be summarized in the following
+lemma:
+
+\begin{lemma}\label{lemma:barrier}
+For any $\varepsilon>0$, the barrier method computes an $\varepsilon$-accurate
+approximation of $L^*_c$ in time $O(poly(n,d,\log\log\varepsilon^{-1})$.
+\end{lemma}
+
+We show that the optimal value of \eqref{eq:perturbed-primal} is close to the
+optimal value of \eqref{eq:primal} (Lemma~\ref{lemma:proximity}) while being
+well-behaved with respect to changes of the cost
+(Lemma~\ref{lemma:monotonicity}). These lemmas together imply
+Proposition~\ref{prop:monotonicity}.
+
+\begin{lemma}\label{lemma:derivative-bounds}
+    Let $\partial_i L(\lambda)$ denote the $i$-th derivative of $L$, for $i\in\{1,\ldots, n\}$, then:
+    \begin{displaymath}
+        \forall\lambda\in[0, 1]^n,\;\frac{b}{2^n} \leq \partial_i L(\lambda) \leq 1
+    \end{displaymath}
+\end{lemma}
+
+\begin{proof}
+    Let us define:
+    \begin{displaymath}
+        S(\lambda)\defeq I_d + \sum_{i=1}^n \lambda_i x_i\T{x_i}
+        \quad\mathrm{and}\quad
+        S_k \defeq I_d + \sum_{i=1}^k x_i\T{x_i}
+    \end{displaymath}
+
+    We have $\partial_i L(\lambda) = \T{x_i}S(\lambda)^{-1}x_i$. Since
+    $S(\lambda)\geq I_d$, $\partial_i L(\lambda)\leq \T{x_i}x_i \leq 1$, which
+    is the right-hand side  of the lemma.
+
+    For the left-hand side, note that $S(\lambda) \leq S_n$. Hence
+    $\partial_iL(\lambda)\geq \T{x_i}S_n^{-1}x_i$.
+
+    Using the Sherman-Morrison formula, for all $k\geq 1$:
+    \begin{displaymath}
+        \T{x_i}S_k^{-1} x_i = \T{x_i}S_{k-1}^{-1}x_i 
+        - \frac{(\T{x_i}S_{k-1}^{-1}x_k)^2}{1+\T{x_k}S_{k-1}^{-1}x_k}
+    \end{displaymath}
+
+    By the Cauchy-Schwarz inequality:
+    \begin{displaymath}
+        (\T{x_i}S_{k-1}^{-1}x_k)^2 \leq \T{x_i}S_{k-1}^{-1}x_i\;\T{x_k}S_{k-1}^{-1}x_k
+    \end{displaymath}
+
+    Hence:
+    \begin{displaymath}
+        \T{x_i}S_k^{-1} x_i \geq \T{x_i}S_{k-1}^{-1}x_i 
+        - \T{x_i}S_{k-1}^{-1}x_i\frac{\T{x_k}S_{k-1}^{-1}x_k}{1+\T{x_k}S_{k-1}^{-1}x_k}
+    \end{displaymath}
+
+    But $\T{x_k}S_{k-1}^{-1}x_k\leq 1$ and $\frac{a}{1+a}\leq \frac{1}{2}$ if
+    $0\leq a\leq 1$, so:
+    \begin{displaymath}
+        \T{x_i}S_{k}^{-1}x_i \geq \T{x_i}S_{k-1}^{-1}x_i
+        - \frac{1}{2}\T{x_i}S_{k-1}^{-1}x_i\geq \frac{\T{x_i}S_{k-1}^{-1}x_i}{2}
+    \end{displaymath}
+
+    By induction:
+    \begin{displaymath}
+        \T{x_i}S_n^{-1} x_i \geq \frac{\T{x_i}x_i}{2^n}
+    \end{displaymath}
+    
+    Using that $\T{x_i}{x_i}\geq b$ concludes the proof of the left-hand side
+    of the lemma's inequality.
+\end{proof}
+
+Let us introduce the lagrangian of problem, \eqref{eq:perturbed-primal}:
+
+\begin{displaymath}
+    \mathcal{L}_{c, \alpha}(\lambda, \mu, \nu, \xi) \defeq L(\lambda) 
+    + \T{\mu}(\lambda-\alpha\mathbf{1}) + \T{\nu}(\mathbf{1}-\lambda) + \xi(1-\T{c}\lambda)
+\end{displaymath}
+so that:
+\begin{displaymath}
+    L^*_c(\alpha) = \min_{\mu, \nu, \xi\geq 0}\max_\lambda \mathcal{L}_{c, \alpha}(\lambda, \mu, \nu, \xi)
+\end{displaymath}
+Similarly, we define $\mathcal{L}_{c}\defeq\mathcal{L}_{c, 0}$ the lagrangian of \eqref{eq:primal}.
+
+Let $\lambda^*$ be primal optimal for \eqref{eq:perturbed-primal}, and $(\mu^*,
+\nu^*, \xi^*)$ be dual optimal for the same problem. In addition to primal and
+dual feasibility, the KKT conditions give $\forall i\in\{1, \ldots, n\}$:
+\begin{gather*}
+    \partial_i L(\lambda^*) + \mu_i^* - \nu_i^* - \xi^* c_i = 0\\
+    \mu_i^*(\lambda_i^* - \alpha) = 0\\
+    \nu_i^*(1 - \lambda_i^*) = 0
+\end{gather*}
+
+\begin{lemma}\label{lemma:proximity}
+We have:
+\begin{displaymath}
+    L^*_c - \alpha n^2\leq L^*_c(\alpha) \leq L^*_c
+\end{displaymath}
+In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n^2$.
+\end{lemma}
+
+\begin{proof}
+    $\alpha\mapsto L^*_c(\alpha)$ is a decreasing function as it is the
+    maximum value of the $L$ function over a set-decreasing domain, which gives
+    the rightmost inequality.
+
+    Let $\mu^*, \nu^*, \xi^*$ be dual optimal for $(P_{c, \alpha})$, that is:
+    \begin{displaymath}
+        L^*_{c}(\alpha) = \max_\lambda \mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*)
+    \end{displaymath}
+
+    Note that $\mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*)
+    = \mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*)
+    - \alpha\T{\mathbf{1}}\mu^*$, and that for any $\lambda$ feasible for
+    problem \eqref{eq:primal}, $\mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*)
+    \geq L(\lambda)$. Hence,
+    \begin{displaymath}
+        L^*_{c}(\alpha) \geq L(\lambda) - \alpha\T{\mathbf{1}}\mu^*
+    \end{displaymath}
+    for any $\lambda$ feasible for \eqref{eq:primal}. In particular, for $\lambda$ primal optimal for $\eqref{eq:primal}$:
+    \begin{equation}\label{eq:local-1}
+        L^*_{c}(\alpha) \geq L^*_c - \alpha\T{\mathbf{1}}\mu^*
+    \end{equation}
+
+    Let us denote by the $M$ the support of $\mu^*$, that is $M\defeq
+    \{i|\mu_i^* > 0\}$, and by $\lambda^*$ a primal optimal point for
+    $\eqref{eq:perturbed-primal}$.  From the KKT conditions we see that:
+    \begin{displaymath}
+        M \subseteq \{i|\lambda_i^* = \alpha\}
+    \end{displaymath}
+
+
+    Let us first assume that $|M| = 0$, then $\T{\mathbf{1}}\mu^*=0$ and the lemma follows.
+
+    We will now assume that $|M|\geq 1$. In this case $\T{c}\lambda^*
+    = 1$, otherwise we could increase the coordinates of $\lambda^*$ in $M$,
+    which would increase the value of the objective function and contradict the
+    optimality of $\lambda^*$. Note also, that $|M|\leq n-1$, otherwise, since
+    $\alpha< \frac{1}{n}$, we would have $\T{c}\lambda^*\ < 1$, which again
+    contradicts the optimality of $\lambda^*$. Let us write:
+    \begin{displaymath}
+        1 = \T{c}\lambda^* = \alpha\sum_{i\in M}c_i + \sum_{i\in \bar{M}}\lambda_i^*c_i
+        \leq \alpha |M| + (n-|M|)\max_{i\in \bar{M}} c_i
+    \end{displaymath}
+    That is:
+    \begin{equation}\label{local-2}
+        \max_{i\in\bar{M}} c_i \geq \frac{1 - |M|\alpha}{n-|M|}> \frac{1}{n}
+    \end{equation}
+    where the last inequality uses again that $\alpha<\frac{1}{n}$. From the
+    KKT conditions, we see that for $i\in M$, $\nu_i^* = 0$ and:
+    \begin{equation}\label{local-3}
+        \mu_i^* = \xi^*c_i - \partial_i L(\lambda^*)\leq \xi^*c_i\leq \xi^*
+    \end{equation}
+    since $\partial_i L(\lambda^*)\geq 0$ and $c_i\leq 1$.
+
+    Furthermore, using the KKT conditions again, we have that:
+    \begin{equation}\label{local-4}
+        \xi^* \leq \inf_{i\in \bar{M}}\frac{\partial_i L(\lambda^*)}{c_i}\leq \inf_{i\in \bar{M}} \frac{1}{c_i}
+        = \frac{1}{\max_{i\in\bar{M}} c_i}
+    \end{equation}
+    where the last inequality uses Lemma~\ref{lemma:derivative-bounds}.
+
+    Combining \eqref{local-2}, \eqref{local-3} and \eqref{local-4}, we get that:
+    \begin{displaymath}
+        \sum_{i\in M}\mu_i^* \leq |M|\xi^* \leq n\xi^*\leq \frac{n}{\max_{i\in\bar{M}} c_i} \leq n^2
+    \end{displaymath}
+    
+    This implies that:
+    \begin{displaymath}
+        \T{\mathbf{1}}\mu^* = \sum_{i=1}^n \mu^*_i = \sum_{i\in M}\mu_i^*\leq n^2
+    \end{displaymath}
+    which in addition to \eqref{eq:local-1} proves the lemma.
+\end{proof} 
+
+\begin{lemma}\label{lemma:monotonicity}
+    If $c'$ = $(c_i', c_{-i})$, with $c_i'\leq c_i - \delta$, we have:
+    \begin{displaymath}
+        L^*_{c'}(\alpha) \geq L^*_c(\alpha) + \frac{\alpha\delta b}{2^n}
+    \end{displaymath}
+\end{lemma}
+
+\begin{proof}
+    Let $\mu^*, \nu^*, \xi^*$ be dual optimal for $(P_{c', \alpha})$. Noting that:
+    \begin{displaymath} 
+    \mathcal{L}_{c', \alpha}(\lambda, \mu^*, \nu^*, \xi^*) \geq
+    \mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*) + \lambda_i\xi^*\delta,
+    \end{displaymath}
+    we get similarly to Lemma~\ref{lemma:proximity}:
+    \begin{displaymath}
+        L^*_{c'}(\alpha) \geq L(\lambda) + \lambda_i\xi^*\delta
+    \end{displaymath}
+    for any $\lambda$ feasible for \eqref{eq:perturbed-primal}. In particular, for $\lambda^*$ primal optimal for \eqref{eq:perturbed-primal}:
+    \begin{displaymath}
+        L^*_{c'}(\alpha) \geq L^*_{c}(\alpha) + \alpha\xi^*\delta
+    \end{displaymath}
+    since $\lambda_i^*\geq \alpha$.
+
+    Using the KKT conditions for $(P_{c', \alpha})$, we can write:
+    \begin{displaymath}
+        \xi^* = \inf_{i:\lambda^{'*}_i>\alpha} \frac{\T{x_i}S(\lambda^{'*})^{-1}x_i}{c_i'}
+    \end{displaymath}
+    with $\lambda^{'*}$ optimal for $(P_{c', \alpha})$. Since $c_i'\leq 1$, using Lemma~\ref{lemma:derivative-bounds}, we get that $\xi^*\geq \frac{b}{2^n}$, which concludes the proof.
+\end{proof}
+
+\subsubsection*{End of the proof of Proposition~\ref{prop:monotonicity}}
+
+Let $\tilde{L}^*_c$ be the approximation computed by
+Algorithm~\ref{alg:monotone}.
+\begin{enumerate}
+    \item using Lemma~\ref{lemma:proximity}:
+\begin{displaymath}
+        |\tilde{L}^*_c - L^*_c| \leq |\tilde{L}^*_c - L^*_c(\alpha)| + |L^*_c(\alpha) - L^*_c|
+        \leq \alpha\delta + \alpha n^2 = \varepsilon
+\end{displaymath}
+which proves the $\varepsilon$-accuracy.
+
+\item for the $\delta$-decreasingness, let $c' = (c_i', c_{-i})$ with $c_i'\leq
+    c_i-\delta$, then:
+\begin{displaymath}
+    \tilde{L}^*_{c'} \geq L^*_{c'} - \frac{\alpha\delta b}{2^{n+1}} 
+                     \geq L^*_c + \frac{\alpha\delta b}{2^{n+1}}
+    \geq \tilde{L}^*_c
+\end{displaymath}
+where the first and inequality come from the accuracy of the approximation, and
+the inner inequality follows from Lemma~\ref{lemma:monotonicity}.
+
+\item the accuracy of the approximation $\tilde{L}^*_c$ is:
+\begin{displaymath}
+    A\defeq\frac{\varepsilon\delta b}{2^{n+1}(\delta + n^2)}
+\end{displaymath}
+
+Note that:
+\begin{displaymath}
+    \log\log A^{-1} = O\bigg(\log\log\frac{1}{\varepsilon\delta b} + \log n\bigg)
+\end{displaymath}
+Using Lemma~\ref{lemma:barrier} concludes the proof of the running time.\qed
+\end{enumerate}
+
+\subsection{Proof of Theorem~\ref{thm:main}}\label{sec:proofofmainthm}
+
+We now present the proof of Theorem~\ref{thm:main}. $\delta$-truthfulness and
+individual rationality follow from $\delta$-monotonicity and threshold
+payments. $\delta$-monotonicity and budget feasibility follow the same steps as the
+analysis of \citeN{chen}; for the sake of completeness, we restate their proof
+here.
+
 \begin{lemma}\label{lemma:monotone}
 Our mechanism for \EDP{} is $\delta$-monotone and budget feasible.
 \end{lemma}
+
 \begin{proof}
     Consider an agent $i$ with cost $c_i$ that is
     selected by the mechanism, and suppose that she reports
@@ -28,11 +580,6 @@ Our mechanism for \EDP{} is $\delta$-monotone and budget feasible.
     Suppose now that when $i$ reports $c_i$, $OPT'_{-i^*} < C V(i^*)$. Then $s_i(c_i,c_{-i})=1$ iff $i = i^*$. 
     Reporting $c_{i^*}'\leq c_{i^*}$ does not change $V(i^*)$ nor
     $OPT'_{-i^*} \leq C V(i^*)$; thus $s_{i^*}(c_{i^*}',c_{-i^*})=1$, so the mechanism is monotone.
-%\end{proof}
-%\begin{lemma}\label{lemma:budget-feasibility}
-%The mechanism is budget feasible.
-%\end{lemma}
-%\begin{proof}
 
 To show budget feasibility, suppose that $OPT'_{-i^*} < C V(i^*)$. Then the mechanism selects $i^*$. Since the bid of $i^*$ does not affect the above condition, the threshold payment of $i^*$ is $B$ and the mechanism is budget feasible.
 Suppose  that $OPT'_{-i^*} \geq C V(i^*)$. 
@@ -57,4 +604,116 @@ Hence, the total payment is bounded by the telescopic sum:
 \end{displaymath}
 \end{proof}
 
+The complexity of the mechanism is given by the following lemma.
+
+\begin{lemma}[Complexity]\label{lemma:complexity}
+    For any $\varepsilon > 0$ and any $\delta>0$, the complexity of the mechanism  is
+    $O\big(poly(n, d, \log\log\frac{1}{b\varepsilon\delta})\big)$
+\end{lemma}
+
+\begin{proof}
+    The value function $V$ in \eqref{modified} can be computed in time
+    $O(\text{poly}(n, d))$ and the mechanism only involves a linear
+    number of queries to the function $V$. 
+
+    By Proposition~\ref{prop:monotonicity}, line 3 of Algorithm~\ref{mechanism}
+    can be computed in time
+    $O(\text{poly}(n, d, \log\log \varepsilon^{-1}))$. Hence the allocation
+    function's complexity is as stated. 
+    %Payments can be easily computed in time  $O(\text{poly}(n, d))$ as in prior work.
+\junk{
+    Using Singer's characterization of the threshold payments
+    \cite{singer-mechanisms}, one can verify that they can be computed in time
+    $O(\text{poly}(n, d))$.
+    }
+\end{proof}
+
+Finally, we prove the approximation ratio of the mechanism.
+
+We use the following lemma from \cite{chen} which bounds $OPT$ in terms of
+the value of $S_G$, as computed in Algorithm \ref{mechanism}, and $i^*$, the
+element of maximum value.
+
+\begin{lemma}[\cite{chen}]\label{lemma:greedy-bound}
+Let $S_G$ be the set computed in Algorithm \ref{mechanism} and let 
+$i^*=\argmax_{i\in\mathcal{N}} V(\{i\})$. We have:
+\begin{displaymath}
+OPT \leq \frac{e}{e-1}\big( 3 V(S_G) + 2 V(i^*)\big).
+\end{displaymath}
+\end{lemma}
+
+Using Proposition~\ref{prop:relaxation} and~\ref{lemma:greedy-bound} we can complete the proof of
+Theorem~\ref{thm:main} by showing that, for any $\varepsilon > 0$, if
+$OPT_{-i}'$, the optimal value of $L$ when $i^*$ is excluded from
+$\mathcal{N}$, has been computed to a precision $\varepsilon$, then the set
+$S^*$ allocated by the mechanism is such that:
+\begin{equation} \label{approxbound}
+OPT
+\leq \frac{10e\!-\!3 + \sqrt{64e^2\!-\!24e\!+\!9}}{2(e\!-\!1)} V(S^*)\!
++ \! \varepsilon .
+\end{equation}
+To see this, let $L^*_{-i^*}$ be the true maximum value of $L$ subject to
+$\lambda_{i^*}=0$, $\sum_{i\in \mathcal{N}\setminus{i^*}}c_i\leq B$. From line
+3 of Algorithm~\ref{mechanism}, we have
+$L^*_{-i^*}-\varepsilon\leq OPT_{-i^*}' \leq L^*_{-i^*}+\varepsilon$.
+
+If the condition on line 4 of the algorithm holds, then
+\begin{displaymath}
+    V(i^*) \geq \frac{1}{C}L^*_{-i^*}-\frac{\varepsilon}{C} \geq
+    \frac{1}{C}OPT_{-i^*} -\frac{\varepsilon}{C}.
+\end{displaymath}
+Indeed, $L^*_{-i^*}\geq OPT_{-i^*}$ as $L$ is a fractional relaxation of $V$. Also, $OPT \leq OPT_{-i^*} + V(i^*)$,
+hence,
+\begin{equation}\label{eq:bound1}
+    OPT\leq (1+C)V(i^*) + \varepsilon.
+\end{equation}
+
+If the condition  does not hold, by observing that $L^*_{-i^*}\leq L^*_c$ and
+applying Proposition~\ref{prop:relaxation}, we get
+\begin{displaymath}
+    V(i^*)\leq \frac{1}{C}L^*_{-i^*} + \frac{\varepsilon}{C}
+    \leq \frac{1}{C} \big(2 OPT + 2 V(i^*)\big) + \frac{\varepsilon}{C}.
+\end{displaymath}
+Applying Lemma~\ref{lemma:greedy-bound},
+\begin{displaymath}
+    V(i^*) \leq \frac{1}{C}\left(\frac{2e}{e-1}\big(3 V(S_G)
+    + 2 V(i^*)\big) + 2 V(i^*)\right) + \frac{\varepsilon}{C}.
+\end{displaymath}
+Thus, if $C$ is such that $C(e-1) -6e  +2 > 0$,
+\begin{align*}
+    V(i^*) \leq \frac{6e}{C(e-1)- 6e + 2} V(S_G) 
+    + \frac{(e-1)\varepsilon}{C(e-1)- 6e + 2}.
+\end{align*}
+Finally, using Lemma~\ref{lemma:greedy-bound} again, we get
+\begin{equation}\label{eq:bound2}
+    OPT(V, \mathcal{N}, B) \leq 
+    \frac{3e}{e-1}\left( 1 + \frac{4e}{C (e-1) -6e  +2}\right) V(S_G)
+    + \frac{2e\varepsilon}{C(e-1)- 6e + 2}.
+\end{equation}
+To minimize the coefficients of $V_{i^*}$  and $V(S_G)$  in \eqref{eq:bound1}
+and \eqref{eq:bound2} respectively, we wish to chose $C$ that minimizes
+\begin{displaymath}
+    \max\left(1+C,\frac{3e}{e-1}\left( 1 + \frac{4e}{C (e-1) -6e  +2}
+            \right)\right).
+\end{displaymath}
+This function has two minima, only one of those is such that $C(e-1) -6e
++2 \geq 0$. This minimum is
+\begin{equation}\label{eq:constant}
+    C =  \frac{8e-1 + \sqrt{64e^2-24e + 9}}{2(e-1)}.
+\end{equation}
+For this minimum, $\frac{2e\varepsilon}{C^*(e-1)- 6e + 2}\leq \varepsilon.$
+Placing the expression of $C$ in \eqref{eq:bound1} and \eqref{eq:bound2}
+gives the approximation ratio in \eqref{approxbound}, and concludes the proof
+of Theorem~\ref{thm:main}.\hspace*{\stretch{1}}\qed
+
+\subsection{Proof of Theorem \ref{thm:lowerbound}}
 
+Suppose, for contradiction, that such a mechanism exists. Consider two
+experiments with dimension $d=2$, such that $x_1 = e_1=[1 ,0]$, $x_2=e_2=[0,1]$
+and $c_1=c_2=B/2+\epsilon$. Then, one of the two experiments, say, $x_1$, must
+be in the set selected by the mechanism, otherwise the ratio is unbounded,
+a contradiction. If $x_1$ lowers its value to $B/2-\epsilon$, by monotonicity
+it remains in the solution; by  threshold payment, it is paid at least
+$B/2+\epsilon$. So $x_2$ is not included in the solution by budget feasibility
+and individual rationality: hence, the selected set attains a value $\log2$,
+while the optimal value is $2\log 2$.\hspace*{\stretch{1}}\qed
diff --git a/approximation.tex b/approximation.tex
index 926ca1d..81a6d0c 100644
--- a/approximation.tex
+++ b/approximation.tex
@@ -1,16 +1,27 @@
-\EDP{} is NP-hard, but designing a mechanism for this problem will involve
-being able to find an approximation $\tilde{L}^*(c)$ of $OPT$ with the
-following three properties: first, it must be non-decreasing along
-coordinate-axis, second it must have a constant approximation ratio to $OPT$
-and finally, it must be computable in polynomial time.
+As noted above, \EDP{} is NP-hard. Designing a mechanism for this problem, as
+we will see in Section~\ref{sec:mechanism}, will involve being able to find an approximation of its optimal value
+$OPT$ defined in \eqref{eq:non-strategic}. In addition to being computable in
+polynomial time and having a bounded approximation ratio to $OPT$, we will also
+require this approximation to be non-increasing in the following sense:
 
-This approximation will be obtained by introducing a concave optimization
+\begin{definition}
+Let $f$ be a function from $\reals^n$ to $\reals$. We say that $f$ is
+\emph{non-decreasing (resp. non-increasing) along the $i$-th coordinate} iff:
+\begin{displaymath}
+\forall x\in\reals^n,\;
+t\mapsto f(x+ te_i)\; \text{is non-decreasing (resp. non-increasing)}
+\end{displaymath}
+where $e_i$ is the $i$-th canonical basis vector of $\reals^n$. 
+
+We say that $f$ is \emph{non-decreasing} (resp. \emph{non-increasing}) iff it
+is non-decreasing (resp. non-increasing) along all coordinates.
+\end{definition}
+
+
+Such an approximation will be obtained by introducing a concave optimization
 problem with a constant approximation ratio to \EDP{}
-(Proposition~\ref{prop:relaxation}).  Using Newton's method, it is then
-possible to solve this concave optimization problem to an arbitrary precision.
-However, this approximation breaks the monotonicity of the approximation.
-Finding a monotone approximate solution to the concave problem will be the
-object of (Section~\ref{sec:monotonicity}).
+(Proposition~\ref{prop:relaxation}) and then showing how to approximately solve
+this problem in a monotone way.
 
 \subsection{A concave relaxation of \EDP}\label{sec:concave}
 
@@ -41,14 +52,14 @@ guarantees for an interesting class of optimization problems through the
 \emph{pipage rounding} framework, which has been successfully applied
 in \citeN{chen, singer-influence}. 
 
-However, for the specific function $V$ defined in \eqref{modified}, the
+For the specific function $V$ defined in \eqref{modified}, the
 multi-linear extension cannot be computed (and \emph{a fortiori} maximized) in
 polynomial time. Hence, we introduce a new function $L$:
 \begin{equation}\label{eq:our-relaxation}
 \forall\,\lambda\in[0,1]^n,\quad L(\lambda) \defeq
 \log\det\left(I_d + \sum_{i\in\mathcal{N}} \lambda_i x_i\T{x_i}\right),
 \end{equation}
-Note that the relaxation $L$ that we introduced in \eqref{eq:our-relaxation},
+Note that this relaxation, 
 follows naturally from the \emph{multi-linear} extension by swapping the
 expectation and $V$ in \eqref{eq:multi-linear}:
 \begin{displaymath}
@@ -64,330 +75,73 @@ a relaxation. We define:
     \leq B\right\}
 \end{equation}
 
-The key property of the relaxation $L$, which is our main technical result, is
-that it has constant approximation ratios to the multi-linear extension $F$.
-
-\begin{lemma}\label{lemma:relaxation-ratio}
- %   The following inequality holds:
-For all $\lambda\in[0,1]^{n},$
-    %\begin{displaymath}
-     $           \frac{1}{2}
-        \,L(\lambda)\leq
-        F(\lambda)\leq L(\lambda)$.
-    %\end{displaymath}
-\end{lemma}
-\begin{proof}
-    The bound $F_{\mathcal{N}}(\lambda)\leq L_{\mathcal{N}(\lambda)}$ follows by the concavity of the $\log\det$ function.
-    To show the lower bound, 
-    we first prove that $\frac{1}{2}$ is a lower bound of the ratio $\partial_i
-    F(\lambda)/\partial_i L(\lambda)$, where
-    $\partial_i\, \cdot$ denotes the partial derivative with respect to the
-    $i$-th variable.  
-
-   Let us start by computing the derivatives of $F$ and
-    $L$ with respect to the $i$-th component.
-    Observe that
-    \begin{displaymath}
-        \partial_i P_\mathcal{N}^\lambda(S) = \left\{
-            \begin{aligned}
-                & P_{\mathcal{N}\setminus\{i\}}^\lambda(S\setminus\{i\})\;\textrm{if}\;
-                i\in S, \\
-                & - P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\;\textrm{if}\;
-                i\in \mathcal{N}\setminus S. \\
-            \end{aligned}\right.
-    \end{displaymath}
-    Hence,
-    \begin{displaymath}
-        \partial_i F(\lambda) =
-        \sum_{\substack{S\subseteq\mathcal{N}\\ i\in S}}
-        P_{\mathcal{N}\setminus\{i\}}^\lambda(S\setminus\{i\})V(S)
-        - \sum_{\substack{S\subseteq\mathcal{N}\\ i\in \mathcal{N}\setminus S}}
-        P_{\mathcal{N}\setminus\{i\}}^\lambda(S)V(S).
-    \end{displaymath}
-    Now, using that every $S$ such that $i\in S$ can be uniquely written as
-    $S'\cup\{i\}$, we can write:
-    \begin{displaymath}
-        \partial_i F(\lambda) =
-        \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
-        P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\big(V(S\cup\{i\})
-        - V(S)\big).
-    \end{displaymath}
-    The marginal contribution of $i$ to
-    $S$ can be written as 
-\begin{align*}
-V(S\cup \{i\}) - V(S)& =   \frac{1}{2}\log\det(I_d 
-    + \T{X_S}X_S + x_i\T{x_i})
-    - \frac{1}{2}\log\det(I_d + \T{X_S}X_S)\\
-    &  = \frac{1}{2}\log\det(I_d + x_i\T{x_i}(I_d +
-\T{X_S}X_S)^{-1})
- = \frac{1}{2}\log(1 + \T{x_i}A(S)^{-1}x_i)
-\end{align*}
-where $A(S) \defeq I_d+ \T{X_S}X_S$, and the last equality follows from the
-Sylvester's determinant identity~\cite{sylvester}.
-%  $  V(S\cup\{i\}) - V(S) = \frac{1}{2}\log\left(1 +  \T{x_i} A(S)^{-1}x_i\right)$.
-Using this,
-    \begin{displaymath}
-        \partial_i F(\lambda) = \frac{1}{2}
-        \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
-        P_{\mathcal{N}\setminus\{i\}}^\lambda(S)
-        \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big)
-    \end{displaymath}
-     The computation of the derivative of $L$ uses standard matrix
-    calculus: writing $\tilde{A}(\lambda) = I_d+\sum_{i\in
-    \mathcal{N}}\lambda_ix_i\T{x_i}$,
-    \begin{displaymath}
-        \det \tilde{A}(\lambda + h\cdot e_i) = \det\big(\tilde{A}(\lambda)
-        + hx_i\T{x_i}\big)
-         =\det \tilde{A}(\lambda)\big(1+
-        h\T{x_i}\tilde{A}(\lambda)^{-1}x_i\big).
-    \end{displaymath}
-    Hence,
-    \begin{displaymath}
-       \log\det\tilde{A}(\lambda + h\cdot e_i)= \log\det\tilde{A}(\lambda)
-        + h\T{x_i}\tilde{A}(\lambda)^{-1}x_i + o(h),
-    \end{displaymath}
-    which implies
-    \begin{displaymath}
-        \partial_i L(\lambda)
-        =\frac{1}{2} \T{x_i}\tilde{A}(\lambda)^{-1}x_i.
-    \end{displaymath}
-
-For two symmetric matrices $A$ and $B$, we write $A\succ B$ ($A\succeq B$) if
-$A-B$ is positive definite (positive semi-definite).  This order allows us to
-define the notion of a \emph{decreasing} as well as  \emph{convex} matrix
-function, similarly to their real counterparts. With this definition, matrix
-inversion is decreasing and convex over symmetric positive definite
-matrices (see Example 3.48 p. 110 in \cite{boyd2004convex}).
-In particular,
-\begin{gather*}
-        \forall S\subseteq\mathcal{N},\quad A(S)^{-1} \succeq A(S\cup\{i\})^{-1}
-\end{gather*}
-as $A(S)\preceq A(S\cup\{i\})$. Observe that
- %   \begin{gather*}
- %       \forall 
-$P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\geq P_{\mathcal{N}\setminus\{i\}}^\lambda(S\cup\{i\})$ for all $S\subseteq\mathcal{N}\setminus\{i\}$, and
-       % ,\\
-        $P_{\mathcal{N}\setminus\{i\}}^\lambda(S) \geq P_\mathcal{N}^\lambda(S),$ for all $S\subseteq\mathcal{N}$.
- %\end{gather*}
-    Hence,
-    \begin{align*}
-        \partial_i F(\lambda) 
-      %  & = \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}} P_\mathcal{N}^\lambda(S) \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big)\\
-        & \geq \frac{1}{4}
-        \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
-        P_{\mathcal{N}\setminus\{i\}}^\lambda(S)
-        \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big)\\
-        &\hspace{-3.5em}+\frac{1}{4}
-        \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
-        P_{\mathcal{N}\setminus\{i\}}^\lambda(S\cup\{i\})
-        \log\Big(1 + \T{x_i}A(S\cup\{i\})^{-1}x_i\Big)\\
-        &\geq \frac{1}{4}
-        \sum_{S\subseteq\mathcal{N}}
-        P_\mathcal{N}^\lambda(S)
-        \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big).
-    \end{align*}
-    Using that $A(S)\succeq I_d$ we get that $\T{x_i}A(S)^{-1}x_i \leq
-    \norm{x_i}_2^2 \leq 1$. Moreover, $\log(1+x)\geq x$ for all $x\leq 1$.
-    Hence,
-    \begin{displaymath}
-        \partial_i F(\lambda) \geq
-        \frac{1}{4}
-        \T{x_i}\bigg(\sum_{S\subseteq\mathcal{N}}P_\mathcal{N}^\lambda(S)A(S)^{-1}\bigg)x_i.
-    \end{displaymath}
-    Finally, using that the inverse is a matrix convex function over symmetric
-    positive definite matrices:
-    \begin{displaymath}
-        \partial_i F(\lambda) \geq
-        \frac{1}{4}
-        \T{x_i}\bigg(\sum_{S\subseteq\mathcal{N}}P_\mathcal{N}^\lambda(S)A(S)\bigg)^{-1}x_i
-        = \frac{1}{4}\T{x_i}\tilde{A}(\lambda)^{-1}x_i
-         = \frac{1}{2}
-     \partial_i L(\lambda).
-    \end{displaymath}
-
-Having bound the ratio between the partial derivatives, we now bound the ratio $F(\lambda)/L(\lambda)$ from below. Consider the following cases.
-    First, if the minimum of the ratio
-    $F(\lambda)/L(\lambda)$ is attained at a point interior to the hypercube, then it is
-    a critical point, \emph{i.e.}, $\partial_i \big(F(\lambda)/L(\lambda)\big)=0$ for all $i\in \mathcal{N}$; hence, at such a critical point:
-    \begin{equation}\label{eq:lhopital}
-        \frac{F(\lambda)}{L(\lambda)}
-        = \frac{\partial_i F(\lambda)}{\partial_i
-        L(\lambda)} \geq \frac{1}{2}.
-    \end{equation}
-    Second, if the minimum is attained as 
-    $\lambda$ converges to zero in, \emph{e.g.}, the $l_2$ norm, by the Taylor approximation, one can write:
-    \begin{displaymath}
-        \frac{F(\lambda)}{L(\lambda)}
-        \sim_{\lambda\rightarrow 0}
-        \frac{\sum_{i\in \mathcal{N}}\lambda_i\partial_i F(0)}
-        {\sum_{i\in\mathcal{N}}\lambda_i\partial_i L(0)}
-        \geq \frac{1}{2},
-    \end{displaymath}
-    \emph{i.e.}, the ratio $\frac{F(\lambda)}{L(\lambda)}$ is necessarily bounded from below by 1/2 for small enough $\lambda$. 
-       Finally, if the minimum is attained on a face of the hypercube $[0,1]^n$ (a face is
-    defined as a subset of the hypercube where one of the variable is fixed to
-    0 or 1), without loss of generality, we can assume that the minimum is
-    attained on the face where the $n$-th variable has been fixed
-    to 0 or 1. Then, either the minimum is attained at a point interior to the
-    face or on a boundary of the face. In the first sub-case, relation
-    \eqref{eq:lhopital} still characterizes the minimum for $i< n$.
-    In the second sub-case, by repeating the argument again by induction, we see
-    that all is left to do is to show that the bound holds for the vertices of
-    the cube (the faces of dimension 1).  The vertices are exactly the binary
-    points, for which we know that both relaxations are equal to the value
-    function $V$. Hence, the ratio is equal to 1 on the vertices.
-\end{proof}
-
-We now prove that $F$ admits the following exchange property: let
-$\lambda$ be a feasible element of $[0,1]^n$, it is possible to trade one
-fractional component of $\lambda$ for another until one of them becomes
-integral, obtaining a new element $\tilde{\lambda}$ which is both feasible and
-for which $F(\tilde{\lambda})\geq F(\lambda)$. Here, by feasibility of a point
-$\lambda$, we mean that it satisfies the budget constraint $\sum_{i=1}^n
-\lambda_i c_i \leq B$.  This rounding property is referred to in the literature
-as \emph{cross-convexity} (see, \emph{e.g.}, \cite{dughmi}), or
-$\varepsilon$-convexity by \citeN{pipage}.
-
-\begin{lemma}[Rounding]\label{lemma:rounding}
-    For any feasible $\lambda\in[0,1]^{n}$, there exists a feasible
-    $\bar{\lambda}\in[0,1]^{n}$ such that at most one of its components is
-    fractional %, that is, lies in $(0,1)$ and:
-     and $F_{\mathcal{N}}(\lambda)\leq F_{\mathcal{N}}(\bar{\lambda})$.
-\end{lemma}
-\begin{proof}
-    We give a rounding procedure which, given a feasible $\lambda$ with at least
-    two fractional components, returns some feasible $\lambda'$ with one less fractional
-    component such that $F(\lambda) \leq F(\lambda')$.
-
-    Applying this procedure recursively yields the lemma's result.
-    Let us consider such a feasible $\lambda$. Let $i$ and $j$ be two
-    fractional components of $\lambda$ and let us define the following
-    function:
-    \begin{displaymath}
-        F_\lambda(\varepsilon) = F(\lambda_\varepsilon)
-        \quad\textrm{where} \quad
-        \lambda_\varepsilon = \lambda + \varepsilon\left(e_i-\frac{c_i}{c_j}e_j\right)
-    \end{displaymath}
-    It is easy to see that if $\lambda$ is feasible, then:
-    \begin{equation}\label{eq:convex-interval}
-        \forall\varepsilon\in\Big[\max\Big(-\lambda_i,(\lambda_j-1)\frac{c_j}{c_i}\Big), \min\Big(1-\lambda_i, \lambda_j
-        \frac{c_j}{c_i}\Big)\Big],\;
-            \lambda_\varepsilon\;\;\textrm{is feasible}
-    \end{equation}
-    Furthermore, the function $F_\lambda$ is convex; indeed:
-    \begin{align*}
-        F_\lambda(\varepsilon)
-        & = \mathbb{E}_{S'\sim P_{\mathcal{N}\setminus\{i,j\}}^\lambda(S')}\Big[
-        (\lambda_i+\varepsilon)\Big(\lambda_j-\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{i,j\})\\
-        & + (\lambda_i+\varepsilon)\Big(1-\lambda_j+\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{i\})
-         + (1-\lambda_i-\varepsilon)\Big(\lambda_j-\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{j\})\\
-         & + (1-\lambda_i-\varepsilon)\Big(1-\lambda_j+\varepsilon\frac{c_i}{c_j}\Big)V(S')\Big]
-    \end{align*}
-    Thus, $F_\lambda$ is a degree 2 polynomial whose dominant coefficient is:
-    \begin{displaymath}
-        \frac{c_i}{c_j}\mathbb{E}_{S'\sim
-        P_{\mathcal{N}\setminus\{i,j\}}^\lambda(S')}\Big[
-            V(S'\cup\{i\})+V(S'\cup\{i\})\\
-        -V(S'\cup\{i,j\})-V(S')\Big]
-    \end{displaymath}
-    which is positive by submodularity of $V$. Hence, the maximum of
-    $F_\lambda$ over the interval given in \eqref{eq:convex-interval} is
-    attained at one of its limit, at which either the $i$-th or $j$-th component of
-    $\lambda_\varepsilon$ becomes integral.
-\end{proof}
-
-Using Lemma~\ref{lemma:rounding}, we can relate the multi-linear extension to
-$OPT$, and Lemma~\ref{lemma:relaxation-ratio} relates our relaxation $L$ to the
-multi-linear extension. Putting these together gives us the following result:
+The key property of our relaxation $L$ is that it has a bounded approximation
+ratio to the multi-linear relaxation $F$. This is one of our main technical
+contributions and is stated and proved in Lemma~\ref{lemma:relaxation-ratio}
+found in Appendix. Moreover, the multi-linear relaxation $F$ has an exchange
+property (see Lemma~\ref{lemma:rounding}) which allows us to relate its value to
+$OPT$ through rounding. Together, these properties give the following
+proposition which is also proved in the Appendix.
 
 \begin{proposition}\label{prop:relaxation}
 $L^*_c \leq 2 OPT + 2\max_{i\in\mathcal{N}}V(i)$.
 \end{proposition}
 
-\begin{proof}
-Let us consider a feasible point $\lambda^*\in[0,1]^{n}$ such that
-$L(\lambda^*) = L^*_c$. By applying Lemma~\ref{lemma:relaxation-ratio} and
-Lemma~\ref{lemma:rounding} we get a feasible point $\bar{\lambda}$ with at most
-one fractional component such that
-\begin{equation}\label{eq:e1}
-    L(\lambda^*) \leq 2 F(\bar{\lambda}).
-\end{equation}
-    Let $\lambda_i$ denote the fractional component of $\bar{\lambda}$ and $S$
-    denote the set whose indicator vector is $\bar{\lambda} - \lambda_i e_i$.
-    By definition of the multi-linear extension $F$:
-    \begin{displaymath}
-        F(\bar{\lambda}) = (1-\lambda_i)V(S) +\lambda_i V(S\cup\{i\}).
-    \end{displaymath}
-    By submodularity of $V$, $V(S\cup\{i\})\leq V(S) + V(\{i\})$. Hence,
-    \begin{displaymath}
-        F(\bar{\lambda}) \leq V(S) + V(i).
-    \end{displaymath}
-    Note that since $\bar{\lambda}$ is feasible, $S$ is also feasible and
-    $V(S)\leq OPT$. Hence,
-    \begin{equation}\label{eq:e2}
-        F(\bar{\lambda}) \leq  OPT + \max_{i\in\mathcal{N}} V(i).
-    \end{equation}
-    Together, \eqref{eq:e1} and \eqref{eq:e2} imply the lemma.
-\end{proof}
-
-\subsection{A monotonous estimator}\label{sec:monotonicity}
+\subsection{Solving a convex problem monotonously}\label{sec:monotonicity}
 
-The $\log\det$ function is concave and self-concordant (see
-\cite{boyd2004convex}), in this case, the analysis of the barrier method in
-in \cite{boyd2004convex} (Section 11.5.5) can be summarized in the following
-lemma:
+Note, that the feasible set in Problem~\eqref{eq:primal} increases (for the
+inclusion) when the cost decreases. As a consequence, $c\mapsto L^*_c$ is
+non-increasing.
 
-\begin{lemma}\label{lemma:barrier}
-For any $\varepsilon>0$, the barrier method computes an $\varepsilon$-accurate
-approximation of $L^*_c$ in time $O(poly(n,d,\log\log\varepsilon^{-1})$.
-\end{lemma}
+Furthermore, \eqref{eq:primal} being a convex optimization problem, using
+standard convex optimization algorithms (Lemma~\ref{lemma:barrier} gives
+a formal statement for our specific problem) we can compute
+a $\varepsilon$-accurate approximation of its optimal value as defined below.
 
-Note however that even though $L^*_c$ is non-decreasing along coordinate axis
-(if one of the cost decreases, then the feasible set of \eqref{eq:primal}
-increases), this will not necessarily be the case for an $\varepsilon$-accurate
-approximation of $L^*_c$ and Lemma~\ref{lemma:barrier} in itself is not
-sufficient to provide an approximation satisfying the
-properties requested at the beginning of Section~\ref{sec:approximation}.
+\begin{definition}
+$a$ is an $\varepsilon$-accurate approximation of $b$ iff $|a-b|\leq \varepsilon$.
+\end{definition}
 
-The estimator we will construct in this section will have a slightly weaker
-form of coordinate-wise monotonicity: \emph{$\delta$-monotonicity}.
+Note however that an $\varepsilon$-accurate approximation of a non-increasing
+function is not in general non-increasing itself. The goal of this section is
+to approximate $L^*_c$ while preserving monotonicity. The estimator we
+construct has a weaker form of monotonicity that we call
+\emph{$\delta$-monotonicity}.
 
 \begin{definition}
 Let $f$ be a function from $\reals^n$ to $\reals$, we say that $f$ is
-\emph{$\delta$-increasing} iff:
-\begin{displaymath}
+\emph{$\delta$-increasing along the $i$-th coordinate} iff:
+\begin{equation}\label{eq:dd}
     \forall x\in\reals^n,\;
     \forall \mu\geq\delta,\;
-    \forall i\in\{1,\ldots,n\},\;
     f(x+\mu e_i)\geq f(x)
-\end{displaymath}
-where $e_i$ is the $i$-th basis vector of $\reals^n$. We define
-\emph{$\delta$-decreasing} functions similarly.
+\end{equation}
+where $e_i$ is the $i$-th canonical basis vector of $\reals^n$. By extension,
+$f$ is $\delta$-increasing iff it is $\delta$-increasing along all coordinates.
+
+We define \emph{$\delta$-decreasing} functions by reversing the inequality in
+\eqref{eq:dd}.
 \end{definition}
 
-For the ease of presentation, we normalize the costs by dividing them by the
-budget $B$ so that the budget constraint in \eqref{eq:primal} now reads
-$\T{c}\lambda\leq 1$.  We consider a perturbation of \eqref{eq:primal} by
-introducing:
+We consider a perturbation of \eqref{eq:primal} by introducing:
 \begin{equation}\tag{$P_{c, \alpha}$}\label{eq:perturbed-primal}
     L^*_c(\alpha) \defeq \max_{\lambda\in[\alpha, 1]^{n}}
     \left\{L(\lambda) \Big| \sum_{i=1}^{n} \lambda_i c_i
-    \leq 1\right\}
+    \leq B\right\}
 \end{equation}
 Note that we have $L^*_c = L^*_c(0)$.  We will also assume that
-$\alpha<\frac{1}{n}$ so that \eqref{eq:perturbed-primal} has at least one
-feasible point: $(\frac{1}{n},\ldots,\frac{1}{n})$.
-
-The $\delta$-decreasing approximation of $L^*_c$ is obtained by computing an
-approximate solution of \eqref{eq:perturbed-primal}.
+$\alpha<\frac{1}{nB}$ so that \eqref{eq:perturbed-primal} has at least one
+feasible point: $(\frac{1}{nB},\ldots,\frac{1}{nB})$. By computing
+an approximation of $L^*_c(\alpha)$ as in Algorithm~\ref{alg:monotone}, we
+obtain a $\delta$-decreasing approximation of $L^*_c$.
 
-\begin{algorithm}[h]
+\begin{algorithm}[t]
     \caption{}\label{alg:monotone}
     \begin{algorithmic}[1]
-        \State $\alpha \gets \varepsilon(\delta+n^2)^{-1} $ 
+        \State $\alpha \gets \varepsilon B^{-1}(\delta+n^2)^{-1} $ 
 
         \State Compute a $\frac{1}{2^{n+1}}\alpha\delta b$-accurate approximation of
-        $L^*_c(\alpha)$ using the barrier method
+        $L^*_c(\alpha)$
     \end{algorithmic}
 \end{algorithm}
 
@@ -399,231 +153,3 @@ approximate solution of \eqref{eq:perturbed-primal}.
 \end{proposition}
 
 
-We show that the optimal value of \eqref{eq:perturbed-primal} is close to the
-optimal value of \eqref{eq:primal} (Lemma~\ref{lemma:proximity}) while being
-well-behaved with respect to changes of the cost
-(Lemma~\ref{lemma:monotonicity}). These lemmas together imply
-Proposition~\ref{prop:monotonicity}.
-
-
-\begin{lemma}\label{lemma:derivative-bounds}
-    Let $\partial_i L(\lambda)$ denote the $i$-th derivative of $L$, for $i\in\{1,\ldots, n\}$, then:
-    \begin{displaymath}
-        \forall\lambda\in[0, 1]^n,\;\frac{b}{2^n} \leq \partial_i L(\lambda) \leq 1
-    \end{displaymath}
-\end{lemma}
-
-\begin{proof}
-    Let us define:
-    \begin{displaymath}
-        S(\lambda)\defeq I_d + \sum_{i=1}^n \lambda_i x_i\T{x_i}
-        \quad\mathrm{and}\quad
-        S_k \defeq I_d + \sum_{i=1}^k x_i\T{x_i}
-    \end{displaymath}
-
-    We have $\partial_i L(\lambda) = \T{x_i}S(\lambda)^{-1}x_i$. Since
-    $S(\lambda)\geq I_d$, $\partial_i L(\lambda)\leq \T{x_i}x_i \leq 1$, which
-    is the right-hand side  of the lemma.
-
-    For the left-hand side, note that $S(\lambda) \leq S_n$. Hence
-    $\partial_iL(\lambda)\geq \T{x_i}S_n^{-1}x_i$.
-
-    Using the Sherman-Morrison formula, for all $k\geq 1$:
-    \begin{displaymath}
-        \T{x_i}S_k^{-1} x_i = \T{x_i}S_{k-1}^{-1}x_i 
-        - \frac{(\T{x_i}S_{k-1}^{-1}x_k)^2}{1+\T{x_k}S_{k-1}^{-1}x_k}
-    \end{displaymath}
-
-    By the Cauchy-Schwarz inequality:
-    \begin{displaymath}
-        (\T{x_i}S_{k-1}^{-1}x_k)^2 \leq \T{x_i}S_{k-1}^{-1}x_i\;\T{x_k}S_{k-1}^{-1}x_k
-    \end{displaymath}
-
-    Hence:
-    \begin{displaymath}
-        \T{x_i}S_k^{-1} x_i \geq \T{x_i}S_{k-1}^{-1}x_i 
-        - \T{x_i}S_{k-1}^{-1}x_i\frac{\T{x_k}S_{k-1}^{-1}x_k}{1+\T{x_k}S_{k-1}^{-1}x_k}
-    \end{displaymath}
-
-    But $\T{x_k}S_{k-1}^{-1}x_k\leq 1$ and $\frac{a}{1+a}\leq \frac{1}{2}$ if
-    $0\leq a\leq 1$, so:
-    \begin{displaymath}
-        \T{x_i}S_{k}^{-1}x_i \geq \T{x_i}S_{k-1}^{-1}x_i
-        - \frac{1}{2}\T{x_i}S_{k-1}^{-1}x_i\geq \frac{\T{x_i}S_{k-1}^{-1}x_i}{2}
-    \end{displaymath}
-
-    By induction:
-    \begin{displaymath}
-        \T{x_i}S_n^{-1} x_i \geq \frac{\T{x_i}x_i}{2^n}
-    \end{displaymath}
-    
-    Using that $\T{x_i}{x_i}\geq b$ concludes the proof of the left-hand side
-    of the lemma's inequality.
-\end{proof}
-
-Let us introduce the lagrangian of problem, \eqref{eq:perturbed-primal}:
-
-\begin{displaymath}
-    \mathcal{L}_{c, \alpha}(\lambda, \mu, \nu, \xi) \defeq L(\lambda) 
-    + \T{\mu}(\lambda-\alpha\mathbf{1}) + \T{\nu}(\mathbf{1}-\lambda) + \xi(1-\T{c}\lambda)
-\end{displaymath}
-so that:
-\begin{displaymath}
-    L^*_c(\alpha) = \min_{\mu, \nu, \xi\geq 0}\max_\lambda \mathcal{L}_{c, \alpha}(\lambda, \mu, \nu, \xi)
-\end{displaymath}
-Similarly, we define $\mathcal{L}_{c}\defeq\mathcal{L}_{c, 0}$ the lagrangian of \eqref{eq:primal}.
-
-Let $\lambda^*$ be primal optimal for \eqref{eq:perturbed-primal}, and $(\mu^*,
-\nu^*, \xi^*)$ be dual optimal for the same problem. In addition to primal and
-dual feasibility, the KKT conditions give $\forall i\in\{1, \ldots, n\}$:
-\begin{gather*}
-    \partial_i L(\lambda^*) + \mu_i^* - \nu_i^* - \xi^* c_i = 0\\
-    \mu_i^*(\lambda_i^* - \alpha) = 0\\
-    \nu_i^*(1 - \lambda_i^*) = 0
-\end{gather*}
-
-\begin{lemma}\label{lemma:proximity}
-We have:
-\begin{displaymath}
-    L^*_c - \alpha n^2\leq L^*_c(\alpha) \leq L^*_c
-\end{displaymath}
-In particular, $|L^*_c - L^*_c(\alpha)| \leq \alpha n^2$.
-\end{lemma}
-
-\begin{proof}
-    $\alpha\mapsto L^*_c(\alpha)$ is a decreasing function as it is the
-    maximum value of the $L$ function over a set-decreasing domain, which gives
-    the rightmost inequality.
-
-    Let $\mu^*, \nu^*, \xi^*$ be dual optimal for $(P_{c, \alpha})$, that is:
-    \begin{displaymath}
-        L^*_{c}(\alpha) = \max_\lambda \mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*)
-    \end{displaymath}
-
-    Note that $\mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*)
-    = \mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*)
-    - \alpha\T{\mathbf{1}}\mu^*$, and that for any $\lambda$ feasible for
-    problem \eqref{eq:primal}, $\mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*)
-    \geq L(\lambda)$. Hence,
-    \begin{displaymath}
-        L^*_{c}(\alpha) \geq L(\lambda) - \alpha\T{\mathbf{1}}\mu^*
-    \end{displaymath}
-    for any $\lambda$ feasible for \eqref{eq:primal}. In particular, for $\lambda$ primal optimal for $\eqref{eq:primal}$:
-    \begin{equation}\label{eq:local-1}
-        L^*_{c}(\alpha) \geq L^*_c - \alpha\T{\mathbf{1}}\mu^*
-    \end{equation}
-
-    Let us denote by the $M$ the support of $\mu^*$, that is $M\defeq
-    \{i|\mu_i^* > 0\}$, and by $\lambda^*$ a primal optimal point for
-    $\eqref{eq:perturbed-primal}$.  From the KKT conditions we see that:
-    \begin{displaymath}
-        M \subseteq \{i|\lambda_i^* = \alpha\}
-    \end{displaymath}
-
-
-    Let us first assume that $|M| = 0$, then $\T{\mathbf{1}}\mu^*=0$ and the lemma follows.
-
-    We will now assume that $|M|\geq 1$. In this case $\T{c}\lambda^*
-    = 1$, otherwise we could increase the coordinates of $\lambda^*$ in $M$,
-    which would increase the value of the objective function and contradict the
-    optimality of $\lambda^*$. Note also, that $|M|\leq n-1$, otherwise, since
-    $\alpha< \frac{1}{n}$, we would have $\T{c}\lambda^*\ < 1$, which again
-    contradicts the optimality of $\lambda^*$. Let us write:
-    \begin{displaymath}
-        1 = \T{c}\lambda^* = \alpha\sum_{i\in M}c_i + \sum_{i\in \bar{M}}\lambda_i^*c_i
-        \leq \alpha |M| + (n-|M|)\max_{i\in \bar{M}} c_i
-    \end{displaymath}
-    That is:
-    \begin{equation}\label{local-2}
-        \max_{i\in\bar{M}} c_i \geq \frac{1 - |M|\alpha}{n-|M|}> \frac{1}{n}
-    \end{equation}
-    where the last inequality uses again that $\alpha<\frac{1}{n}$. From the
-    KKT conditions, we see that for $i\in M$, $\nu_i^* = 0$ and:
-    \begin{equation}\label{local-3}
-        \mu_i^* = \xi^*c_i - \partial_i L(\lambda^*)\leq \xi^*c_i\leq \xi^*
-    \end{equation}
-    since $\partial_i L(\lambda^*)\geq 0$ and $c_i\leq 1$.
-
-    Furthermore, using the KKT conditions again, we have that:
-    \begin{equation}\label{local-4}
-        \xi^* \leq \inf_{i\in \bar{M}}\frac{\partial_i L(\lambda^*)}{c_i}\leq \inf_{i\in \bar{M}} \frac{1}{c_i}
-        = \frac{1}{\max_{i\in\bar{M}} c_i}
-    \end{equation}
-    where the last inequality uses Lemma~\ref{lemma:derivative-bounds}.
-
-    Combining \eqref{local-2}, \eqref{local-3} and \eqref{local-4}, we get that:
-    \begin{displaymath}
-        \sum_{i\in M}\mu_i^* \leq |M|\xi^* \leq n\xi^*\leq \frac{n}{\max_{i\in\bar{M}} c_i} \leq n^2
-    \end{displaymath}
-    
-    This implies that:
-    \begin{displaymath}
-        \T{\mathbf{1}}\mu^* = \sum_{i=1}^n \mu^*_i = \sum_{i\in M}\mu_i^*\leq n^2
-    \end{displaymath}
-    which in addition to \eqref{eq:local-1} proves the lemma.
-\end{proof} 
-
-\begin{lemma}\label{lemma:monotonicity}
-    If $c'$ = $(c_i', c_{-i})$, with $c_i'\leq c_i - \delta$, we have:
-    \begin{displaymath}
-        L^*_{c'}(\alpha) \geq L^*_c(\alpha) + \frac{\alpha\delta b}{2^n}
-    \end{displaymath}
-\end{lemma}
-
-\begin{proof}
-    Let $\mu^*, \nu^*, \xi^*$ be dual optimal for $(P_{c', \alpha})$. Noting that:
-    \begin{displaymath} 
-    \mathcal{L}_{c', \alpha}(\lambda, \mu^*, \nu^*, \xi^*) \geq
-    \mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*) + \lambda_i\xi^*\delta,
-    \end{displaymath}
-    we get similarly to Lemma~\ref{lemma:proximity}:
-    \begin{displaymath}
-        L^*_{c'}(\alpha) \geq L(\lambda) + \lambda_i\xi^*\delta
-    \end{displaymath}
-    for any $\lambda$ feasible for \eqref{eq:perturbed-primal}. In particular, for $\lambda^*$ primal optimal for \eqref{eq:perturbed-primal}:
-    \begin{displaymath}
-        L^*_{c'}(\alpha) \geq L^*_{c}(\alpha) + \alpha\xi^*\delta
-    \end{displaymath}
-    since $\lambda_i^*\geq \alpha$.
-
-    Using the KKT conditions for $(P_{c', \alpha})$, we can write:
-    \begin{displaymath}
-        \xi^* = \inf_{i:\lambda^{'*}_i>\alpha} \frac{\T{x_i}S(\lambda^{'*})^{-1}x_i}{c_i'}
-    \end{displaymath}
-    with $\lambda^{'*}$ optimal for $(P_{c', \alpha})$. Since $c_i'\leq 1$, using Lemma~\ref{lemma:derivative-bounds}, we get that $\xi^*\geq \frac{b}{2^n}$, which concludes the proof.
-\end{proof}
-
-
-\subsubsection*{End of the proof of Proposition~\ref{prop:monotonicity}}
-
-Let $\tilde{L}^*_c$ be the approximation computed by
-Algorithm~\ref{alg:monotone}.
-\begin{enumerate}
-    \item using Lemma~\ref{lemma:proximity}:
-\begin{displaymath}
-        |\tilde{L}^*_c - L^*_c| \leq |\tilde{L}^*_c - L^*_c(\alpha)| + |L^*_c(\alpha) - L^*_c|
-        \leq \alpha\delta + \alpha n^2 = \varepsilon
-\end{displaymath}
-which proves the $\varepsilon$-accuracy.
-
-\item for the $\delta$-decreasingness, let $c' = (c_i', c_{-i})$ with $c_i'\leq
-    c_i-\delta$, then:
-\begin{displaymath}
-    \tilde{L}^*_{c'} \geq L^*_{c'} - \frac{\alpha\delta b}{2^{n+1}} 
-                     \geq L^*_c + \frac{\alpha\delta b}{2^{n+1}}
-    \geq \tilde{L}^*_c
-\end{displaymath}
-where the first and inequality come from the accuracy of the approximation, and
-the inner inequality follows from Lemma~\ref{lemma:monotonicity}.
-
-\item the accuracy of the approximation $\tilde{L}^*_c$ is:
-\begin{displaymath}
-    A\defeq\frac{\varepsilon\delta b}{2^{n+1}(\delta + n^2)}
-\end{displaymath}
-
-Note that:
-\begin{displaymath}
-    \log\log A^{-1} = O\bigg(\log\log\frac{1}{\varepsilon\delta b} + \log n\bigg)
-\end{displaymath}
-Using Lemma~\ref{lemma:barrier} concludes the proof of the running time.\qed
-\end{enumerate}
diff --git a/definitions.tex b/definitions.tex
index a05edae..d474853 100644
--- a/definitions.tex
+++ b/definitions.tex
@@ -8,7 +8,7 @@
 \newtheorem{theorem}[lemma]{Theorem}
 \newtheorem{corollary}[lemma]{Corollary}
 \theoremstyle{definition}
-\newtheorem*{definition}{Definition}
+\newtheorem{definition}[lemma]{Definition}
 \newcommand{\citeN}{\citet}
 %\newcommand*{\defeq}{\stackrel{\text{def}}{=}}
 \newcommand*{\defeq}{\equiv}
diff --git a/intro.tex b/intro.tex
index 6002b46..0712540 100644
--- a/intro.tex
+++ b/intro.tex
@@ -68,7 +68,7 @@ From a technical perspective, we present a convex relaxation of \eqref{obj}, and
 %Our approach to mechanisms for experimental design --- by 
 % optimizing  the information gain in parameters like $\beta$ which are estimated through the data analysis process --- is general.  We give examples of this approach beyond linear regression to a general class that includes logistic regression and learning binary functions, and show that the corresponding budgeted mechanism design problem is also expressed through a submodular optimization.  Hence,  prior work \cite{chen,singer-mechanisms} immediately applies, and gives randomized, universally truthful, polynomial time, constant factor approximation mechanisms for problems in this class. Getting deterministic, truthful, polynomial time mechanisms with a constant approximation factor for this class or specific problems in it, like we did for \EDP, remains an open problem.
 
-In what follows, we describe related work in Section~\ref{sec:related}. We briefly review  experimental design and budget feasible mechanisms in Section~\ref{sec:peel} and define \SEDP\ formally. In Section~\ref{sec:main} we present our mechanism for \SEDP\ and state our main results,  which are proved in Section~\ref{sec:proofs}. A generalization of our framework to machine learning tasks beyond linear regression is presented in Section~\ref{sec:ext}. 
+In what follows, we describe related work in Section~\ref{sec:related}. We briefly review  experimental design and budget feasible mechanisms in Section~\ref{sec:peel} and define \SEDP\ formally. In Section~\ref{sec:main} we present our mechanism for \SEDP\ and state our main results. A generalization of our framework to machine learning tasks beyond linear regression is presented in Section~\ref{sec:ext}. 
 
 \junk{
 
diff --git a/main.tex b/main.tex
index 0e8457c..ff26c5d 100644
--- a/main.tex
+++ b/main.tex
@@ -53,9 +53,9 @@ Instead, \citeN{singer-mechanisms} and \citeN{chen}
 suggest to approximate $OPT_{-i^*}$ by a quantity $OPT'_{-i^*}$ satisfying the
 following properties:
 \begin{itemize}
-    \item $OPT'_{-i^*}$ must not depend on agent $i^*$'s reported
-    cost and must be monotone: it can only increase when agents decrease
-    their costs. This is to ensure the monotonicity of the allocation function.
+    \item $OPT'_{-i^*}$ must not depend on agent $i^*$'s reported cost and must
+    be decreasing with respect to the costs. This is to ensure the monotonicity
+    of the allocation function.
     \item $OPT'_{-i^*}$ must be close to $OPT_{-i^*}$ to maintain a bounded
     approximation ratio.
     \item $OPT'_{-i^*}$ must be computable in polynomial time.
@@ -74,18 +74,23 @@ $\delta$-decreasing, the resulting mechanism will be $\delta$-truthful as
 defined below.
 
 \begin{definition}
-Let $\mathcal{M}= (S, p)$ a mechanism, and let $s$ denote the indicator
-function of $S$ ($s_i(c) = \id_{i\in S(c)}$). We say that $\mathcal{M}$ is
+Let $\mathcal{M}= (S, p)$ a mechanism, and let $s_i$ denote the indicator
+function of $i\in S(c)$, $s_i(c) \defeq \id_{i\in S(c)}$. We say that $\mathcal{M}$ is
 $\delta$-truthful iff:
 \begin{displaymath}
 \forall c\in\reals_+^n,\;
-\forall\mu\geq\delta,\;
+\forall\mu\;\text{with}\; |\mu|\geq\delta,\;
 \forall i\in\{1,\ldots,n\},\;
 p(c)-s_i(c)\cdot c_i \geq p(c+\mu e_i) - s_i(c+\mu e_i)\cdot c_i
 \end{displaymath}
-where $e_i$ is the $i$-th basis vector of $\reals^n$.
+where $e_i$ is the $i$-th canonical basis vector of $\reals^n$.
 \end{definition}
 
+Intuitively, $\delta$-truthfulness implies that agents have no incentive to lie
+by more than $\delta$ about their reported costs. In practical applications,
+the bids being discretized, if $\delta$ is smaller than the discretization
+step, the mechanism will be truthful in effect.
+
 $\delta$-truthfulness will follow from $\delta$-monotonicity by the following
 variant of Myerson's theorem.
 
diff --git a/paper.tex b/paper.tex
index 8a8775c..7b821b6 100644
--- a/paper.tex
+++ b/paper.tex
@@ -5,9 +5,9 @@
 \usepackage{amsmath,amsfonts}
 \usepackage{algorithm, algpseudocode}
 \usepackage{bbm,color,verbatim}
-\usepackage[pagebackref=true,breaklinks=true,colorlinks=true]{hyperref}
 \usepackage{amsthm}
 \input{definitions}
+\usepackage[pagebackref=true,breaklinks=true,colorlinks=true]{hyperref}
 \title{Budget Feasible Mechanisms for Experimental Design}
 \author{
     Thibaut Horel\\École Normale Supérieure\\\texttt{thibaut.horel@ens.fr}
@@ -33,10 +33,8 @@
 \input{problem}
 \section{Approximation results}\label{sec:approximation}
 \input{approximation}
-\section{Mechanism for \SEDP{}}
+\section{Mechanism for \SEDP{}}\label{sec:mechanism}
 \input{main}
-\section{Proofs}\label{sec:proofs}
-\input{proofs}
 \section{Extensions}\label{sec:ext}
 \input{general}
 %\section{Conclusion}
@@ -44,6 +42,6 @@
 \bibliographystyle{abbrvnat}
 \bibliography{notes}
 \newpage
-\section*{Appendix}
+\section{Appendix}
 \input{appendix}
 \end{document}
diff --git a/problem.tex b/problem.tex
index 6bfac6b..a5db2ac 100644
--- a/problem.tex
+++ b/problem.tex
@@ -146,22 +146,20 @@ $ \mathcal{N}$ of experiments to be purchased, while the payment function
 returns a vector of payments $[p_i(c)]_{i\in \mathcal{N}}$.
  Let $s_i(c) = \id_{i\in S(c)}$ be the binary indicator of  $i\in S(c)$. As usual, we seek mechanisms that have the following properties \cite{singer-mechanisms}:
 \begin{itemize}
-\item \emph{Normalization.} Unallocated experiments receive zero payments, \emph{i.e.},
- \begin{align}s_i(c)=0\text{ implies }p_i(c)=0.\label{normal}\end{align}
-\item\emph{Individual Rationality.} Payments for allocated experiments exceed costs, \emph{i.e.},
- \begin{align} p_i(c)\geq c_i\cdot s_i(c).\label{ir}\end{align}
-\item \emph{No Positive Transfers.} Payments are non-negative, \emph{i.e.},
-\begin{align}p_i(c)\geq 0\label{pt}.\end{align} 
-    \item \emph{Truthfulness/Incentive Compatibility.} An experiment subject has no incentive to missreport her true cost. Formally, let $c_{-i}$
- be a vector of costs of all agents except $i$. Then, %    $c_{-i}$ being the same). Let $[p_i']_{i\in \mathcal{N}} = p(c_i',
-%    c_{-i})$, then the 
-\begin{align}  p_i(c_i,c_{-i}) - s_i(c_i,c_{-i})\cdot c_i \geq p_i(c_i',c_{-i}) - s_i(c_i',c_{-i})\cdot c_i, \label{truthful}\end{align} for every $i \in \mathcal{N}$ and every two cost vectors $(c_i,c_{-i})$ and $(c_i',c_{-i})$.
-    \item \emph{Budget Feasibility.} The sum of the payments should not exceed the
-    budget constraint, \emph{i.e.}
-    %\begin{displaymath}
-     \begin{align}   \sum_{i\in\mathcal{N}} p_i(c) \leq B.\label{budget}\end{align}
-    %\end{displaymath}
+\item \emph{Normalization.} Unallocated experiments receive zero payments: $s_i(c)=0\text{ implies }p_i(c)=0.\label{normal}$
+\item\emph{Individual Rationality.} Payments for allocated experiments exceed
+costs: $p_i(c)\geq c_i\cdot s_i(c).\label{ir}$
+\item \emph{No Positive Transfers.} Payments are non-negative: $p_i(c)\geq 0\label{pt}$.
+\item \emph{Truthfulness/Incentive Compatibility.} Reporting her true cost is
+a dominant strategy for an experiment subject. Formally, let $c_{-i}$
+ be a vector of costs of all agents except $i$. Then, $p_i(c_i,c_{-i})
+ - s_i(c_i,c_{-i})\cdot c_i \geq p_i(c_i',c_{-i}) - s_i(c_i',c_{-i})\cdot c_i,
+ \label{truthful}$ for every $i \in \mathcal{N}$ and every two cost vectors $(c_i,c_{-i})$ and $(c_i',c_{-i})$.
+ \item \emph{Budget Feasibility.} The sum of the payments should not exceed the
+    budget constraint, \emph{i.e.} $\sum_{i\in\mathcal{N}} p_i(c) \leq
+    B.\label{budget}$
 \end{itemize}
+
 %We define the \emph{Strategic} version of \EDP{} as
 %\begin{center}
 %\textsc{StrategicExperimentalDesignProblem} (SEDP)
@@ -172,15 +170,13 @@ returns a vector of payments $[p_i(c)]_{i\in \mathcal{N}}$.
 %\end{align*}
 %%\end{subequations}
 %\end{center}
+
 Given that the full information problem \EDP{} is NP-hard, we further seek mechanisms that have the following two additional properties:
 \begin{itemize}
     \item \emph{Approximation Ratio.} The value of the allocated set  should not
     be too far from the optimum value of the full information case
     \eqref{eq:non-strategic}. Formally, there must exist some $\alpha\geq 1$
-    such that:
-    \begin{displaymath}
-        OPT \leq \alpha V(S).
-    \end{displaymath}
+    such that $OPT \leq \alpha V(S)$.
  %   The approximation ratio captures the \emph{price of truthfulness}, \emph{i.e.},  the relative value loss incurred by adding the truthfulness constraint. % to the value function maximization.
     We stress here that the approximation ratio is defined with respect \emph{to the maximal value in the full information case}.
     \item \emph{Computational Efficiency.} The allocation and payment function
@@ -614,19 +610,6 @@ TODO? Explain what are the points which are the most valuable : points which
 are aligned along directions where the spread over the already existing points
 is small.
 
-\subsection{Auction}
-
-TODO Explain the optimization problem, why it has to be formulated as an auction
-problem. Explain the goals:
-\begin{itemize}
-    \item truthful
-    \item individually rational
-    \item budget feasible
-    \item has a good approximation ratio
-
-TODO Explain what is already known: it is ok when the function is submodular. When
-should we introduce the notion of submodularity?
-\end{itemize}
 
 \end{comment}
 
diff --git a/proofs.tex b/proofs.tex
index 4cefc25..e69de29 100644
--- a/proofs.tex
+++ b/proofs.tex
@@ -1,122 +0,0 @@
-\subsection{Proof of Theorem~\ref{thm:main}}\label{sec:proofofmainthm}
-
-We now present the proof of Theorem~\ref{thm:main}. $\delta$-truthfulness and
-individual rationality follow from $\delta$-monotonicity and threshold
-payments. $\delta$-monotonicity and budget feasibility follow the same steps as the
-analysis of \citeN{chen}; for the sake of completeness, we restate their proof
-in the Appendix.
-
-The complexity of the mechanism is given by the following lemma.
-
-\begin{lemma}[Complexity]\label{lemma:complexity}
-    For any $\varepsilon > 0$ and any $\delta>0$, the complexity of the mechanism  is
-    $O\big(poly(n, d, \log\log\frac{1}{b\varepsilon\delta})\big)$
-\end{lemma}
-
-\begin{proof}
-    The value function $V$ in \eqref{modified} can be computed in time
-    $O(\text{poly}(n, d))$ and the mechanism only involves a linear
-    number of queries to the function $V$. 
-
-    By Proposition~\ref{prop:monotonicity}, line 3 of Algorithm~\ref{mechanism}
-    can be computed in time
-    $O(\text{poly}(n, d, \log\log \varepsilon^{-1}))$. Hence the allocation
-    function's complexity is as stated. 
-    %Payments can be easily computed in time  $O(\text{poly}(n, d))$ as in prior work.
-\junk{
-    Using Singer's characterization of the threshold payments
-    \cite{singer-mechanisms}, one can verify that they can be computed in time
-    $O(\text{poly}(n, d))$.
-    }
-\end{proof}
-
-Finally, we prove the approximation ratio of the mechanism.
-
-We use the following lemma from \cite{chen} which bounds $OPT$ in terms of
-the value of $S_G$, as computed in Algorithm \ref{mechanism}, and $i^*$, the
-element of maximum value.
-
-\begin{lemma}[\cite{chen}]\label{lemma:greedy-bound}
-Let $S_G$ be the set computed in Algorithm \ref{mechanism} and let 
-$i^*=\argmax_{i\in\mathcal{N}} V(\{i\})$. We have:
-\begin{displaymath}
-OPT \leq \frac{e}{e-1}\big( 3 V(S_G) + 2 V(i^*)\big).
-\end{displaymath}
-\end{lemma}
-
-Using Proposition~\ref{prop:relaxation} and~\ref{lemma:greedy-bound} we can complete the proof of
-Theorem~\ref{thm:main} by showing that, for any $\varepsilon > 0$, if
-$OPT_{-i}'$, the optimal value of $L$ when $i^*$ is excluded from
-$\mathcal{N}$, has been computed to a precision $\varepsilon$, then the set
-$S^*$ allocated by the mechanism is such that:
-\begin{equation} \label{approxbound}
-OPT
-\leq \frac{10e\!-\!3 + \sqrt{64e^2\!-\!24e\!+\!9}}{2(e\!-\!1)} V(S^*)\!
-+ \! \varepsilon .
-\end{equation}
-To see this, let $L^*_{-i^*}$ be the true maximum value of $L$ subject to
-$\lambda_{i^*}=0$, $\sum_{i\in \mathcal{N}\setminus{i^*}}c_i\leq B$. From line
-3 of Algorithm~\ref{mechanism}, we have
-$L^*_{-i^*}-\varepsilon\leq OPT_{-i^*}' \leq L^*_{-i^*}+\varepsilon$.
-
-If the condition on line 4 of the algorithm holds, then
-\begin{displaymath}
-    V(i^*) \geq \frac{1}{C}L^*_{-i^*}-\frac{\varepsilon}{C} \geq
-    \frac{1}{C}OPT_{-i^*} -\frac{\varepsilon}{C}.
-\end{displaymath}
-Indeed, $L^*_{-i^*}\geq OPT_{-i^*}$ as $L$ is a fractional relaxation of $V$. Also, $OPT \leq OPT_{-i^*} + V(i^*)$,
-hence,
-\begin{equation}\label{eq:bound1}
-    OPT\leq (1+C)V(i^*) + \varepsilon.
-\end{equation}
-
-If the condition  does not hold, by observing that $L^*_{-i^*}\leq L^*_c$ and
-applying Proposition~\ref{prop:relaxation}, we get
-\begin{displaymath}
-    V(i^*)\leq \frac{1}{C}L^*_{-i^*} + \frac{\varepsilon}{C}
-    \leq \frac{1}{C} \big(2 OPT + 2 V(i^*)\big) + \frac{\varepsilon}{C}.
-\end{displaymath}
-Applying Lemma~\ref{lemma:greedy-bound},
-\begin{displaymath}
-    V(i^*) \leq \frac{1}{C}\left(\frac{2e}{e-1}\big(3 V(S_G)
-    + 2 V(i^*)\big) + 2 V(i^*)\right) + \frac{\varepsilon}{C}.
-\end{displaymath}
-Thus, if $C$ is such that $C(e-1) -6e  +2 > 0$,
-\begin{align*}
-    V(i^*) \leq \frac{6e}{C(e-1)- 6e + 2} V(S_G) 
-    + \frac{(e-1)\varepsilon}{C(e-1)- 6e + 2}.
-\end{align*}
-Finally, using Lemma~\ref{lemma:greedy-bound} again, we get
-\begin{equation}\label{eq:bound2}
-    OPT(V, \mathcal{N}, B) \leq 
-    \frac{3e}{e-1}\left( 1 + \frac{4e}{C (e-1) -6e  +2}\right) V(S_G)
-    + \frac{2e\varepsilon}{C(e-1)- 6e + 2}.
-\end{equation}
-To minimize the coefficients of $V_{i^*}$  and $V(S_G)$  in \eqref{eq:bound1}
-and \eqref{eq:bound2} respectively, we wish to chose $C$ that minimizes
-\begin{displaymath}
-    \max\left(1+C,\frac{3e}{e-1}\left( 1 + \frac{4e}{C (e-1) -6e  +2}
-            \right)\right).
-\end{displaymath}
-This function has two minima, only one of those is such that $C(e-1) -6e
-+2 \geq 0$. This minimum is
-\begin{equation}\label{eq:constant}
-    C =  \frac{8e-1 + \sqrt{64e^2-24e + 9}}{2(e-1)}.
-\end{equation}
-For this minimum, $\frac{2e\varepsilon}{C^*(e-1)- 6e + 2}\leq \varepsilon.$
-Placing the expression of $C$ in \eqref{eq:bound1} and \eqref{eq:bound2}
-gives the approximation ratio in \eqref{approxbound}, and concludes the proof
-of Theorem~\ref{thm:main}.\hspace*{\stretch{1}}\qed
-
-
-\subsection{Proof of Theorem \ref{thm:lowerbound}}
-
-Suppose, for contradiction, that such a mechanism exists. Consider two
-experiments with dimension $d=2$, such that $x_1 = e_1=[1 ,0]$, $x_2=e_2=[0,1]$
-and $c_1=c_2=B/2+\epsilon$. Then, one of the two experiments, say, $x_1$, must
-be in the set selected by the mechanism, otherwise the ratio is unbounded,
-a contradiction. If $x_1$ lowers its value to $B/2-\epsilon$, by monotonicity
-it remains in the solution; by  threshold payment, it is paid at least
-$B/2+\epsilon$. So $x_2$ is not included in the solution by budget feasibility
-and individual rationality: hence, the selected set attains a value $\log2$,
-while the optimal value is $2\log 2$.\hspace*{\stretch{1}}\qed
-- 
cgit v1.2.3-70-g09d2