Previous approaches towards designing truthful, budget feasible mechanisms for \textsc{Knapsack}~\cite{chen} and \textsc{Coverage}~\cite{singer-influence} build upon polynomial-time algorithms that compute an approximation of $OPT$, the optimal value in the full information case. Crucially, to be used in designing a truthful mechanism, such algorithms need also to be \emph{monotone}, in the sense that decreasing any cost $c_i$ leads to an increase  in the estimation of $OPT$; %In the cases of \textsc{Knapsack} and~\textsc{Coverage}, as well as in the case of \EDP{}, 
the monotonicity property  precludes using traditional approximation algorithms.

In Section~\ref{sec:concave}, we address this issue by designing a convex relaxation of \EDP{}, and showing that its solution can be used to approximate $OPT$ (Proposition~\ref{prop:relaxation}). The objective of this relaxation is concave and self-concordant \cite{boyd2004convex} and, as such, there exists an algorithm that solves this relaxed problem with arbitrary accuracy in polynomial time. Unfortunately, the output of this algorithm may not necessarily be monotone. Nevertheless, in Section~\ref{sec:monotonicity}, we show that a  solver of the relaxed problem can be used to construct a solver that is ``almost'' monotone (Proposition~\ref{prop:monotonicity}). In Section~\ref{sec:main}, we show that this algorithm can be used to design a $\delta$-truthful mechanism for \EDP.


%As noted above, \EDP{} is NP-hard. Designing a mechanism for this problem, as
%we will see in Section~\ref{sec:mechanism}, will involve being able to find an approximation of its optimal value
%$OPT$ defined in \eqref{eq:non-strategic}. In addition to being computable in
%polynomial time and having a bounded approximation ratio to $OPT$, we will also
%require this approximation to be non-increasing in the following sense:

%\begin{definition}
%Let $f$ be a function from $\reals^n$ to $\reals$. We say that $f$ is
%\emph{non-decreasing (resp. non-increasing) along the $i$-th coordinate} iff:
%\begin{displaymath}
%\forall x\in\reals^n,\;
%t\mapsto f(x+ te_i)\; \text{is non-decreasing (resp. non-increasing)}
%\end{displaymath}
%where $e_i$ is the $i$-th canonical basis vector of $\reals^n$. 

%We say that $f$ is \emph{non-decreasing} (resp. \emph{non-increasing}) iff it
%is non-decreasing (resp. non-increasing) along all coordinates.
%\end{definition}


%Such an approximation will be obtained by introducing a concave optimization
%problem with a constant approximation ratio to \EDP{}
%(Proposition~\ref{prop:relaxation}) and then showing how to approximately solve
%this problem in a monotone way.

\subsection{A Convex Relaxation of \EDP}
\label{sec:concave}

A classical way of relaxing combinatorial optimization problems is 
\emph{relaxing by expectation}, using the so-called \emph{multi-linear}
extension of the objective function $V$ (see, \emph{e.g.}, \cite{calinescu2007maximizing,vondrak2008optimal,dughmi2011convex}).
This is because this extension can yield approximation guarantees for a wide class of combinatorial problems through \emph{pipage rounding}, a technique introduced by \citeN{pipage}.  In general, such relaxations preserve monotonicity which, as discussed, is required in mechanism design.

Formally, let $P_\mathcal{N}^\lambda$ be a probability distribution over $\mathcal{N}$ parametrized by $\lambda\in [0,1]^n$, where a set $S\subseteq \mathcal{N}$ sampled from  $P_\mathcal{N}^\lambda$ is constructed as follows:  each $i\in \mathcal{N}$ is selected to be in $S$ independently with probability $\lambda_i$, \emph{i.e.},
%\begin{displaymath}
$    P_\mathcal{N}^\lambda(S) \defeq \prod_{i\in S} \lambda_i
    \prod_{i\in\mathcal{N}\setminus S}( 1 - \lambda_i).$
%\end{displaymath}
Then, the \emph{multi-linear} extension $F:[0,1]^n\to\reals$ of $V$ is defined as the
expectation of $V$ under the  distribution $P_\mathcal{N}^\lambda$:
\begin{equation}\label{eq:multi-linear}
    F(\lambda) 
    \defeq \mathbb{E}_{S\sim P_\mathcal{N}^\lambda}\big[V(S)\big]
  %  = \sum_{S\subseteq\mathcal{N}} P_\mathcal{N}^\lambda(S) V(S)
= \mathbb{E}_{S\sim P_\mathcal{N}^\lambda}\big[ \log\det\big( I_d + \textstyle\sum_{i\in S} x_i\T{x_i}\big) \big],\quad \lambda\in[0,1]^n.
\end{equation}
Function $F$ is an extension of $V$ to the domain  $[0,1]^n$, as it equals $V$ on integer inputs: $F(\id_S) = V(S)$ for all
$S\subseteq\mathcal{N}$, where $\id_S$ denotes the indicator vector of $S$. %\citeN{pipage} have shown how to use this extension to obtain approximation guarantees for an interesting class of optimization problems through the \emph{pipage rounding} framework, which has been successfully applied in \citeN{chen, singer-influence}. 
Contrary to problems such as \textsc{Knapsack}, the
multi-linear extension \eqref{eq:multi-linear} cannot be optimized in
polynomial time for  the value function $V$  we study here, given by \eqref{modified}. Hence, we introduce an extension $L:[0,1]^n\to\reals$ s.t.~
\begin{equation}\label{eq:our-relaxation}
        L(\lambda) \defeq
\log\det\big(I_d + \textstyle\sum_{i\in\mathcal{N}} \lambda_i x_i\T{x_i}\big),
%= \log\det\left(\mathbb{E}_{S\sim P_\mathcal{N}^\lambda}\bigg[I_d + \sum_{i\in S} x_i\T{x_i} \bigg]\right),
\quad \lambda\in[0,1]^n.
\end{equation}
Note that $L$ also extends $V$, and follows naturally from the multi-linear extension by swapping the
expectation and $\log \det$ in \eqref{eq:multi-linear}. Crucially, it is \emph{strictly concave} on $[0,1]^n$, a fact that  we exploit in the next section to maximize $L$ subject to the budget constraint in polynomial time.
%\begin{displaymath}
%    L(\lambda) = 
%\end{displaymath}

Our first technical lemma relates $L$ to the multi-linear extension $F$:
\begin{lemma}\label{lemma:relaxation-ratio}
For all $\lambda\in[0,1]^{n},$
     $           \frac{1}{2}
        \,L(\lambda)\leq
        F(\lambda)\leq L(\lambda)$.
\end{lemma}
\begin{comment}
The proof of this lemma can be found in \cite{arxiv}. In short,  exploiting the concavity of the $\log\det$ function over the set of positive semi-definite matrices, we first  bound the ratio of all partial derivatives of $F$ and $L$. We then show that the bound on the ratio of the derivatives also implies a bound on the ratio $F/L$.
\end{comment}
\begin{proof}
    The bound $F(\lambda)\leq L(\lambda)$ follows by the concavity of the $\log\det$ function and Jensen's inequality.
    To show the lower bound, 
    we first prove that $\frac{1}{2}$ is a lower bound of the ratio $\partial_i
    F(\lambda)/\partial_i L(\lambda)$, where we use
    $\partial_i\, \cdot$ as a shorthand for  $\frac{\partial}{\partial \lambda_i}$, the partial derivative  with respect to the
    $i$-th variable. 

   Let us start by computing the partial derivatives of $F$ and
    $L$ with respect to the $i$-th component. 
    Observe that
    \begin{displaymath}
        \partial_i P_\mathcal{N}^\lambda(S) = \left\{
            \begin{aligned}
                & P_{\mathcal{N}\setminus\{i\}}^\lambda(S\setminus\{i\})\;\textrm{if}\;
                i\in S, \\
                & - P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\;\textrm{if}\;
                i\in \mathcal{N}\setminus S. \\
            \end{aligned}\right.
    \end{displaymath}
    Hence,
    \begin{displaymath}
        \partial_i F(\lambda) =
        \sum_{\substack{S\subseteq\mathcal{N}\\ i\in S}}
        P_{\mathcal{N}\setminus\{i\}}^\lambda(S\setminus\{i\})V(S)
        - \sum_{\substack{S\subseteq\mathcal{N}\\ i\in \mathcal{N}\setminus S}}
        P_{\mathcal{N}\setminus\{i\}}^\lambda(S)V(S).
    \end{displaymath}
    Now, using that every $S$ such that $i\in S$ can be uniquely written as
    $S'\cup\{i\}$, we can write:
    \begin{displaymath}
        \partial_i F(\lambda) =
        \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
        P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\big(V(S\cup\{i\})
        - V(S)\big).
    \end{displaymath}
Recall from \eqref{eq:marginal_contrib} that the marginal contribution of $i$ to $S$ is given by
$$V(S\cup \{i\}) - V(S) =\frac{1}{2}\log(1 + \T{x_i}A(S)^{-1}x_i), $$
where $A(S) = I_d+ \T{X_S}X_S$.
%  $  V(S\cup\{i\}) - V(S) = \frac{1}{2}\log\left(1 +  \T{x_i} A(S)^{-1}x_i\right)$.
Using this,
    \begin{displaymath}
        \partial_i F(\lambda) = \frac{1}{2}
        \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
        P_{\mathcal{N}\setminus\{i\}}^\lambda(S)
        \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big)
    \end{displaymath}
     The computation of the derivative of $L$ uses standard matrix
    calculus: writing $\tilde{A}(\lambda) \defeq I_d+\sum_{i\in
    \mathcal{N}}\lambda_ix_i\T{x_i}$,
    \begin{displaymath}
        \det \tilde{A}(\lambda + h\cdot e_i) = \det\big(\tilde{A}(\lambda)
        + hx_i\T{x_i}\big)
         =\det \tilde{A}(\lambda)\big(1+
        h\T{x_i}\tilde{A}(\lambda)^{-1}x_i\big).
    \end{displaymath}
    Hence,
    \begin{displaymath}
       \log\det\tilde{A}(\lambda + h\cdot e_i)= \log\det\tilde{A}(\lambda)
        + h\T{x_i}\tilde{A}(\lambda)^{-1}x_i + o(h),
    \end{displaymath}
    which implies
    \begin{displaymath}
        \partial_i L(\lambda)
        =\frac{1}{2} \T{x_i}\tilde{A}(\lambda)^{-1}x_i.
    \end{displaymath}
Recall from \eqref{eq:inverse} that the monotonicity of the matrix inverse over positive definite matrices implies 
\begin{gather*}
        \forall S\subseteq\mathcal{N},\quad A(S)^{-1} \succeq A(S\cup\{i\})^{-1}
\end{gather*}
as $A(S)\preceq A(S\cup\{i\})$. Observe that since $1\leq \lambda_i\leq 1$, 
$P_{\mathcal{N}\setminus\{i\}}^\lambda(S) \geq P_\mathcal{N}^\lambda(S)$ and
$P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\geq P_{\mathcal{N}}^\lambda(S\cup\{i\})$
for all $S\subseteq\mathcal{N}\setminus\{i\}$. Hence,
\begin{align*}
    \partial_i F(\lambda) 
    & \geq \frac{1}{4}
    \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
    P_{\mathcal{N}}^\lambda(S)
    \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big)\\
    &+\frac{1}{4}
    \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
    P_{\mathcal{N}}^\lambda(S\cup\{i\})
    \log\Big(1 + \T{x_i}A(S\cup\{i\})^{-1}x_i\Big)\\
    &\geq \frac{1}{4}
    \sum_{S\subseteq\mathcal{N}}
    P_\mathcal{N}^\lambda(S)
    \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big).
\end{align*}
Using that $A(S)\succeq I_d$ we get that $\T{x_i}A(S)^{-1}x_i \leq
\norm{x_i}_2^2 \leq 1$. Moreover, $\log(1+x)\geq x$ for all $x\leq 1$.
Hence,
\begin{displaymath}
    \partial_i F(\lambda) \geq
    \frac{1}{4}
    \T{x_i}\bigg(\sum_{S\subseteq\mathcal{N}}P_\mathcal{N}^\lambda(S)A(S)^{-1}\bigg)x_i.
\end{displaymath}
Finally, using that the inverse is a matrix convex function over symmetric
positive definite matrices (see Appendix~\ref{app:properties}):
\begin{displaymath}
    \partial_i F(\lambda) \geq
    \frac{1}{4}
    \T{x_i}\bigg(\sum_{S\subseteq\mathcal{N}}P_\mathcal{N}^\lambda(S)A(S)\bigg)^{-1}x_i
    = \frac{1}{4}\T{x_i}\tilde{A}(\lambda)^{-1}x_i
    = \frac{1}{2}
    \partial_i L(\lambda).
\end{displaymath}

Having bound the ratio between the partial derivatives, we now bound the ratio
$F(\lambda)/L(\lambda)$ from below. Consider the following three cases.

First, if the minimum is attained as $\lambda$ converges to zero in,
\emph{e.g.}, the $l_2$ norm, by the Taylor approximation, one can write:
\begin{displaymath}
    \frac{F(\lambda)}{L(\lambda)}
    \sim_{\lambda\rightarrow 0}
    \frac{\sum_{i\in \mathcal{N}}\lambda_i\partial_i F(0)}
    {\sum_{i\in\mathcal{N}}\lambda_i\partial_i L(0)}
    \geq \frac{1}{2},
\end{displaymath}
\emph{i.e.}, the ratio $\frac{F(\lambda)}{L(\lambda)}$ is necessarily bounded
from below by 1/2 for small enough $\lambda$.

Second, if the minimum of the ratio $F(\lambda)/L(\lambda)$ is attained at
a vertex of the hypercube $[0,1]^n$ different from 0. $F$ and $L$ being
relaxations of the value function $V$, they are equal to $V$ on the vertices
which are exactly the binary points. Hence, the minimum is equal to 1 in this
case; in particular, it is greater than $1/2$.

Finally, if the minimum is attained at a point $\lambda^*$ with at least one
coordinate belonging to $(0,1)$, let $i$ be one such coordinate and consider
the function $G_i$:
\begin{displaymath}
    G_i: x \mapsto \frac{F}{L}(\lambda_1^*,\ldots,\lambda_{i-1}^*, x,
    \lambda_{i+1}^*, \ldots, \lambda_n^*).
\end{displaymath}
Then this function attains a minimum at $\lambda^*_i\in(0,1)$ and its
derivative is zero at this point. Hence:
\begin{displaymath}
    0 = G_i'(\lambda^*_i) = \partial_i\left(\frac{F}{L}\right)(\lambda^*).
\end{displaymath}
But $\partial_i(F/L)(\lambda^*)=0$ implies that
\begin{displaymath}
    \frac{F(\lambda^*)}{L(\lambda^*)} = \frac{\partial_i
    F(\lambda^*)}{\partial_i L(\lambda^*)}\geq \frac{1}{2}
\end{displaymath}
using the lower bound on the ratio of the partial derivatives. This concludes
the proof of the lemma.
\end{proof}

Armed with this result, we subsequently use  pipage rounding to show that any $\lambda$ that maximizes the multi-linear extension $F$ can be rounded to an ``almost'' integral solution. More specifically, given a set of costs $c\in \reals^n_+$, we say that a $\lambda\in [0,1]^n$ is feasible if it belongs to the set $\dom_c =\{\lambda \in [0,1]^n: \sum_{i\in \mathcal{N}} c_i\lambda_i\leq B\}$. Then, the following lemma holds:

\begin{lemma}[Rounding]\label{lemma:rounding}
    For any feasible $\lambda\in \dom_c$, there exists a feasible
    $\bar{\lambda}\in \dom_c$ such that (a) $F(\lambda)\leq F(\bar{\lambda})$, and (b) at most one of the 
    coordinates  of $\bar{\lambda}$ is fractional. %, that is, lies in $(0,1)$ and:
\end{lemma}
\begin{comment}
The proof, also in \cite{arxiv}, follows the main steps of the pipage rounding method of \citeN{pipage}. % this  rounding property is referred to in the literature as \emph{cross-convexity} (see, \emph{e.g.}, \cite{dughmi}), or $\varepsilon$-convexity by \citeN{pipage}. 
\end{comment}
\begin{proof}
    We give a rounding procedure which, given a feasible $\lambda$ with at least
    two fractional components, returns some feasible $\lambda'$ with one fewer fractional
    component such that $F(\lambda) \leq F(\lambda')$.

    Applying this procedure recursively yields the lemma's result.
    Let us consider such a feasible $\lambda$. Let $i$ and $j$ be two
    fractional components of $\lambda$ and let us define the following
    function:
    \begin{displaymath}
        F_\lambda(\varepsilon) = F(\lambda_\varepsilon)
        \quad\textrm{where} \quad
        \lambda_\varepsilon = \lambda + \varepsilon\left(e_i-\frac{c_i}{c_j}e_j\right)
    \end{displaymath}
    It is easy to see that if $\lambda$ is feasible, then:
    \begin{equation}\label{eq:convex-interval}
        \forall\varepsilon\in\Big[\max\Big(-\lambda_i,(\lambda_j-1)\frac{c_j}{c_i}\Big), \min\Big(1-\lambda_i, \lambda_j
        \frac{c_j}{c_i}\Big)\Big],\;
            \lambda_\varepsilon\;\;\textrm{is feasible}
    \end{equation}
    Furthermore, the function $F_\lambda$ is convex; indeed:
    \begin{align*}
        F_\lambda(\varepsilon)
        & = \mathbb{E}_{S'\sim P_{\mathcal{N}\setminus\{i,j\}}^\lambda(S')}\Big[
        (\lambda_i+\varepsilon)\Big(\lambda_j-\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{i,j\})\\
        & + (\lambda_i+\varepsilon)\Big(1-\lambda_j+\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{i\})\\
        &+ (1-\lambda_i-\varepsilon)\Big(\lambda_j-\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{j\})\\
         & + (1-\lambda_i-\varepsilon)\Big(1-\lambda_j+\varepsilon\frac{c_i}{c_j}\Big)V(S')\Big]
    \end{align*}
    Thus, $F_\lambda$ is a degree 2 polynomial whose dominant coefficient is:
    \begin{displaymath}
        \frac{c_i}{c_j}\mathbb{E}_{S'\sim
        P_{\mathcal{N}\setminus\{i,j\}}^\lambda(S')}\Big[
            V(S'\cup\{i\})+V(S'\cup\{i\})\\
        -V(S'\cup\{i,j\})-V(S')\Big]
    \end{displaymath}
    which is positive by submodularity of $V$. Hence, the maximum of
    $F_\lambda$ over the interval given in \eqref{eq:convex-interval} is
    attained at one of its limits, at which either the $i$-th or $j$-th component of
    $\lambda_\varepsilon$ becomes integral.
\end{proof}

Together, Lemma~\ref{lemma:relaxation-ratio} and Lemma~\ref{lemma:rounding} imply that $OPT$, the optimal value of \EDP, can be approximated by solving the following convex optimization problem:
\begin{align}\tag{$P_c$}\label{eq:primal}
\text{Maximize:} \quad L(\lambda)\quad \text{subject to:} \quad\lambda \in \dom_c
\end{align}
In particular, for $L_c^*\defeq \max_{\lambda\in \dom_c} L(\lambda)$, the following holds:
\begin{proposition}\label{prop:relaxation}
$OPT\leq L^*_c \leq 2 OPT + 2\max_{i\in\mathcal{N}}V(i)$.
\end{proposition}
\begin{proof}
The lower bound on $L^*_c$ follows immediately from the fact that $L$ extends $V$ to $[0,1]^n$. For the upper bound, let us consider a feasible point $\lambda^*\in \dom_c$ such that
$L(\lambda^*) = L^*_c$. By applying Lemma~\ref{lemma:relaxation-ratio} and
Lemma~\ref{lemma:rounding} we get a feasible point $\bar{\lambda}$ with at most
one fractional component such that
\begin{equation}\label{eq:e1}
    L(\lambda^*) \leq 2 F(\bar{\lambda}).
\end{equation}
    Let $\lambda_i$ denote the fractional component of $\bar{\lambda}$ and $S$
    denote the set whose indicator vector is $\bar{\lambda} - \lambda_i e_i$.
    By definition of the multi-linear extension $F$:
    \begin{displaymath}
        F(\bar{\lambda}) = (1-\lambda_i)V(S) +\lambda_i V(S\cup\{i\}).
    \end{displaymath}
    By submodularity of $V$, $V(S\cup\{i\})\leq V(S) + V(\{i\})$. Hence,
    \begin{displaymath}
        F(\bar{\lambda}) \leq V(S) + V(i).
    \end{displaymath}
    Note that since $\bar{\lambda}$ is feasible, $S$ is also feasible and
    $V(S)\leq OPT$. Hence,
    \begin{equation}\label{eq:e2}
        F(\bar{\lambda}) \leq  OPT + \max_{i\in\mathcal{N}} V(i).
    \end{equation}
Together, \eqref{eq:e1} and \eqref{eq:e2} imply the proposition.
\end{proof}

As we discuss in the next section, $L^*_c$ can be computed by a poly-time algorithm at arbitrary accuracy. However, the outcome of this computation may not necessarily be monotone; we address this by converting this poly-time estimator of $L^*_c$ to one that is ``almost'' monotone.%The optimization program \eqref{eq:non-strategic} extends naturally to such
%a relaxation. We define:
%\begin{equation}\tag{$P_c$}\label{eq:primal}
%    L^*_c \defeq \max_{\lambda\in[0, 1]^{n}}
%    \left\{L(\lambda) \Big| \sum_{i=1}^{n} \lambda_i c_i
%    \leq B\right\}
%\end{equation}

%The key property of our relaxation $L$ is that it has a bounded approximation
%ratio to the multi-linear relaxation $F$. This is one of our main technical
%contributions and is stated and proved in Lemma~\ref{lemma:relaxation-ratio}
%found in Appendix. Moreover, the multi-linear relaxation $F$ has an exchange
%property (see Lemma~\ref{lemma:rounding}) which allows us to relate its value to
%$OPT$ through rounding. Together, these properties give the following
%proposition which is also proved in the Appendix.

\subsection{Polynomial-Time, Almost-Monotone Approximation}\label{sec:monotonicity}
The $\log\det$ objective function of \eqref{eq:primal} is strictly concave and \emph{self-concordant}~\cite{boyd2004convex}. The maximization of a concave, self-concordant function subject to a set of linear constraints can be performed through the \emph{barrier method} (see, \emph{e.g.},~\cite{boyd2004convex} Section 11.5.5 for  general self-concordant optimization as well as \cite{vandenberghe1998determinant} for a detailed treatment of the $\log\det$ objective). The performance of the barrier method is summarized in our case by the following lemma:
\begin{lemma}[\citeN{boyd2004convex}]\label{lemma:barrier}
For any $\varepsilon>0$, the barrier method computes  an 
approximation $\hat{L}^*_c$ that is $\varepsilon$-accurate, \emph{i.e.}, it satisfies $|\hat L^*_c- L^*_c|\leq \varepsilon$, in time $O\left(poly(n,d,\log\log\varepsilon^{-1})\right)$. The same guarantees apply when maximizing $L$ subject to an arbitrary set of $O(n)$ linear constraints.
\end{lemma}

 Clearly, the optimal value $L^*_c$ of \eqref{eq:primal} is monotone in $c$: formally, for any two $c,c'\in \reals_+^n$ s.t.~$c\leq c'$ coordinate-wise, $\dom_{c'}\subseteq \dom_c$ and thus $L^*_c\geq L^*_{c'}$. Hence, the map $c\mapsto L^*_c$ is non-increasing. Unfortunately, the same is not true for the output $\hat{L}_c^*$ of the barrier method: there is no guarantee that the $\epsilon$-accurate approximation $\hat{L}^*_c$ exhibits any kind of monotonicity.

Nevertheless, we prove that it is possible to use the barrier method to construct an approximation of $L^*_{c}$ that is ``almost'' monotone. More specifically, given $\delta>0$,  we say that $f:\reals^n\to\reals$ is
\emph{$\delta$-decreasing}  if
%\begin{equation}\label{eq:dd}
 $ f(x) \geq  f(x+\mu e_i)$, 
    for all $i\in \mathcal{N},x\in\reals^n, \mu\geq\delta,$
%\end{equation}
where $e_i$ is the $i$-th canonical basis vector of $\reals^n$.
In other words, $f$ is $\delta$-decreasing if increasing any coordinate by $\delta$ or more at input $x$ ensures that the output will be at most $f(x)$.

%Our next technical result establishes that, using the barrier method, it is possible to construct an algorithm that computes $L^*_c$ at arbitrary accuracy in polynomial time and is $\delta$-decreasing.

We achieve this by restricting the optimization over a subset of $\dom_c$ at which the concave relaxation $L$ is ``sufficiently'' concave. Formally, for $\alpha\geq 0$ let
$$\textstyle\dom_{c,\alpha} \defeq \{\lambda \in [\alpha,1]^n: \sum_{i\in \mathcal{N}}c_i\lambda_i \leq B\}\subseteq  \dom_c . $$ 
Note that $\dom_c=\dom_{c,0}.$ Consider the following perturbed problem:
\begin{align}\tag{$P_{c,\alpha}$}\label{eq:perturbed-primal}
\begin{split}  \text{Maximize:} &\qquad L(\lambda)\\
\text{subject to:} & \qquad\lambda \in \dom_{c,\alpha}
\end{split}
\end{align}

Restricting the feasible set to $\dom_{c,\alpha}$ ensures that the gradient of the optimal solution with respect to $c$ is bounded from below. This implies that an approximate solution to $P_{c,\alpha}$ given by the barrier method is $\delta$-decreasing with respect to the costs. On the other hand, by taking $\alpha$ small enough, we ensure that the approximate solution to $P_{c,\alpha}$ is still an $\epsilon$-accurate approximation of $L_c^*$. 

We note that the execution of the barrier method on the restricted set $\dom_{c,\alpha}$ is necessary. The algorithm's output when executed over the entire domain may not necessarily be $\delta$-decreasing, even when the approximation accuracy is small. This is because costs become saturated when the optimal $\lambda\in \dom_c$ lies at the boundary: increasing them has no effect on the objective. Forcing the optimization to happen ``off'' the boundary ensures that this does not occur, while taking $\alpha$ to be small ensures that this perturbation does not cost much in terms of approximation accuracy.

\begin{proposition}\label{prop:monotonicity}
    For any $\delta\in(0,1]$ and $\varepsilon\in(0,1]$, using the barrier
    method to solve \eqref{eq:perturbed-primal} for $\alpha\defeq\varepsilon
    (\delta/B+n^2)^{-1}$ with accuracy $\frac{1}{2^{n+1}B}\alpha\delta b$
    yields a $\delta$-decreasing, $\varepsilon$-accurate approximation of
    $L^*_c$. The running time of the algorithm is $O\big(poly(n, d,
    \log\log\frac{B}{b\varepsilon\delta})\big)$.
\end{proposition}

A formal description of the algorithm used in
Proposition~\ref{prop:monotonicity} is given in Algorithm~\ref{alg:monotone}.
We then proceed by showing that the optimal value of
\eqref{eq:perturbed-primal} is close to the optimal value of \eqref{eq:primal}
(Lemma~\ref{lemma:proximity}) while being well-behaved with respect to changes
of the cost (Lemma~\ref{lemma:monotonicity}). These lemmas together imply
Proposition~\ref{prop:monotonicity}.

\begin{algorithm}[t]
    \caption{}\label{alg:monotone}
    \begin{algorithmic}[1]
	\Require{ $B\in \reals_+$, $c\in[0,B]^n$, $\delta\in (0,1]$, $\epsilon\in (0,1]$ }
        \State $\alpha \gets \varepsilon (\delta/B+n^2)^{-1}$ 
        \State Use the barrier method to solve \eqref{eq:perturbed-primal} with
        accuracy $\varepsilon'=\frac{1}{2^{n+1}B}\alpha\delta b$; denote the output by $\hat{L}^*_{c,\alpha}$
	\State \textbf{return} $\hat{L}^*_{c,\alpha}$
    \end{algorithmic}
\end{algorithm}


Note that the choice of $\alpha$ given in Algorithm~\ref{alg:monotone} implies
that $\alpha<\frac{1}{n}$. This in turn implies that the feasible set
$\mathcal{D}_{c, \alpha}$ of \eqref{eq:perturbed-primal} is non-empty: it
contains the strictly feasible point $\lambda=(\frac{1}{n},\ldots,\frac{1}{n})$.

\begin{lemma}\label{lemma:derivative-bounds}
    Let $\partial_i L(\lambda)$ denote the $i$-th derivative of $L$, for $i\in\{1,\ldots, n\}$, then:
    \begin{displaymath}
        \forall\lambda\in[0, 1]^n,\;\frac{b}{2^n} \leq \partial_i L(\lambda) \leq 1
    \end{displaymath}
\end{lemma}

\begin{proof}
    Recall that we had defined:
    \begin{displaymath}
        \tilde{A}(\lambda)\defeq I_d + \sum_{i=1}^n \lambda_i x_i\T{x_i}
        \quad\mathrm{and}\quad
        A(S) \defeq I_d + \sum_{i\in S} x_i\T{x_i}
    \end{displaymath}
    Let us also define $A_k\defeq A(\{x_1,\ldots,x_k\})$.
    We have $\partial_i L(\lambda) = \T{x_i}\tilde{A}(\lambda)^{-1}x_i$. Since
    $\tilde{A}(\lambda)\succeq I_d$, $\partial_i L(\lambda)\leq \T{x_i}x_i \leq 1$, which
    is the right-hand side  of the lemma.
    For the left-hand side, note that $\tilde{A}(\lambda) \preceq A_n$. Hence
    $\partial_iL(\lambda)\geq \T{x_i}A_n^{-1}x_i$.
    Using the Sherman-Morrison formula \cite{sm}, for all $k\geq 1$:
    \begin{displaymath}
        \T{x_i}A_k^{-1} x_i = \T{x_i}A_{k-1}^{-1}x_i 
        - \frac{(\T{x_i}A_{k-1}^{-1}x_k)^2}{1+\T{x_k}A_{k-1}^{-1}x_k}
    \end{displaymath}
    By the Cauchy-Schwarz inequality:
    \begin{displaymath}
        (\T{x_i}A_{k-1}^{-1}x_k)^2 \leq \T{x_i}A_{k-1}^{-1}x_i\;\T{x_k}A_{k-1}^{-1}x_k
    \end{displaymath}
    Hence:
    \begin{displaymath}
        \T{x_i}A_k^{-1} x_i \geq \T{x_i}A_{k-1}^{-1}x_i 
        - \T{x_i}A_{k-1}^{-1}x_i\frac{\T{x_k}A_{k-1}^{-1}x_k}{1+\T{x_k}A_{k-1}^{-1}x_k}
    \end{displaymath}
    But $\T{x_k}A_{k-1}^{-1}x_k\leq 1$ and $\frac{a}{1+a}\leq \frac{1}{2}$ if
    $0\leq a\leq 1$, so:
    \begin{displaymath}
        \T{x_i}A_{k}^{-1}x_i \geq \T{x_i}A_{k-1}^{-1}x_i
        - \frac{1}{2}\T{x_i}A_{k-1}^{-1}x_i\geq \frac{\T{x_i}A_{k-1}^{-1}x_i}{2}
    \end{displaymath}
    By induction:
    \begin{displaymath}
        \T{x_i}A_n^{-1} x_i \geq \frac{\T{x_i}x_i}{2^n}
    \end{displaymath}
    Using that $\T{x_i}{x_i}\geq b$ concludes the proof of the left-hand side
    of the lemma's inequality.
\end{proof}
Let us introduce the Lagrangian of problem \eqref{eq:perturbed-primal}:

\begin{displaymath}
    \mathcal{L}_{c, \alpha}(\lambda, \mu, \nu, \xi) \defeq L(\lambda) 
    + \T{\mu}(\lambda-\alpha\mathbf{1}) + \T{\nu}(\mathbf{1}-\lambda) + \xi(B-\T{c}\lambda)
\end{displaymath}
so that:
\begin{displaymath}
    L^*_{c,\alpha} = \min_{\mu, \nu, \xi\geq 0}\max_\lambda \mathcal{L}_{c, \alpha}(\lambda, \mu, \nu, \xi)
\end{displaymath}
Similarly, we define $\mathcal{L}_{c}\defeq\mathcal{L}_{c, 0}$ the lagrangian of \eqref{eq:primal}.

Let $\lambda^*$ be primal optimal for \eqref{eq:perturbed-primal}, and $(\mu^*,
\nu^*, \xi^*)$ be dual optimal for the same problem. In addition to primal and
dual feasibility, the Karush-Kuhn-Tucker (KKT) conditions \cite{boyd2004convex} give $\forall i\in\{1, \ldots, n\}$:
\begin{gather*}
    \partial_i L(\lambda^*) + \mu_i^* - \nu_i^* - \xi^* c_i = 0\\
    \mu_i^*(\lambda_i^* - \alpha) = 0\\
    \nu_i^*(1 - \lambda_i^*) = 0
\end{gather*}

\begin{lemma}\label{lemma:proximity}
We have:
\begin{displaymath}
    L^*_c - \alpha n^2\leq L^*_{c,\alpha} \leq L^*_c
\end{displaymath}
In particular, $|L^*_c - L^*_{c,\alpha}| \leq \alpha n^2$.
\end{lemma}

\begin{proof}
    $\alpha\mapsto L^*_{c,\alpha}$ is a decreasing function as it is the
    maximum value of the $L$ function over a set-decreasing domain, which gives
    the rightmost inequality.

    Let $\mu^*, \nu^*, \xi^*$ be dual optimal for $(P_{c, \alpha})$, that is:
    \begin{displaymath}
        L^*_{c,\alpha} = \max_\lambda \mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*)
    \end{displaymath}

    Note that $\mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*)
    = \mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*)
    - \alpha\T{\mathbf{1}}\mu^*$, and that for any $\lambda$ feasible for
    problem \eqref{eq:primal}, $\mathcal{L}_{c}(\lambda, \mu^*, \nu^*, \xi^*)
    \geq L(\lambda)$. Hence,
    \begin{displaymath}
        L^*_{c,\alpha} \geq L(\lambda) - \alpha\T{\mathbf{1}}\mu^*
    \end{displaymath}
    for any $\lambda$ feasible for \eqref{eq:primal}. In particular, for $\lambda$ primal optimal for $\eqref{eq:primal}$:
    \begin{equation}\label{eq:local-1}
        L^*_{c,\alpha} \geq L^*_c - \alpha\T{\mathbf{1}}\mu^*
    \end{equation}

    Let us denote by the $M$ the support of $\mu^*$, that is $M\defeq
    \{i|\mu_i^* > 0\}$, and by $\lambda^*$ a primal optimal point for
    $\eqref{eq:perturbed-primal}$.  From the KKT conditions we see that:
    \begin{displaymath}
        M \subseteq \{i|\lambda_i^* = \alpha\}
    \end{displaymath}


    Let us first assume that $|M| = 0$, then $\T{\mathbf{1}}\mu^*=0$ and the lemma follows.

    We will now assume that $|M|\geq 1$. In this case $\T{c}\lambda^*
    = B$, otherwise we could increase the coordinates of $\lambda^*$ in $M$,
    which would increase the value of the objective function and contradict the
    optimality of $\lambda^*$. Note also, that $|M|\leq n-1$, otherwise, since
    $\alpha< \frac{1}{n}$, we would have $\T{c}\lambda^*\ < B$, which again
    contradicts the optimality of $\lambda^*$. Let us write:
    \begin{displaymath}
        B = \T{c}\lambda^* = \alpha\sum_{i\in M}c_i + \sum_{i\in \bar{M}}\lambda_i^*c_i
        \leq \alpha |M|B + (n-|M|)\max_{i\in \bar{M}} c_i
    \end{displaymath}
    That is:
    \begin{equation}\label{local-2}
        \max_{i\in\bar{M}} c_i \geq \frac{B - B|M|\alpha}{n-|M|}> \frac{B}{n}
    \end{equation}
    where the last inequality uses again that $\alpha<\frac{1}{n}$. From the
    KKT conditions, we see that for $i\in M$, $\nu_i^* = 0$ and:
    \begin{equation}\label{local-3}
        \mu_i^* = \xi^*c_i - \partial_i L(\lambda^*)\leq \xi^*c_i\leq \xi^*B
    \end{equation}
    since $\partial_i L(\lambda^*)\geq 0$ and $c_i\leq 1$.

    Furthermore, using the KKT conditions again, we have that:
    \begin{equation}\label{local-4}
        \xi^* \leq \inf_{i\in \bar{M}}\frac{\partial_i L(\lambda^*)}{c_i}\leq \inf_{i\in \bar{M}} \frac{1}{c_i}
        = \frac{1}{\max_{i\in\bar{M}} c_i}
    \end{equation}
    where the last inequality uses Lemma~\ref{lemma:derivative-bounds}.
    Combining \eqref{local-2}, \eqref{local-3} and \eqref{local-4}, we get that:
    \begin{displaymath}
        \sum_{i\in M}\mu_i^* \leq |M|\xi^*B \leq n\xi^*B\leq \frac{nB}{\max_{i\in\bar{M}} c_i} \leq n^2
    \end{displaymath}
    This implies that:
    \begin{displaymath}
        \T{\mathbf{1}}\mu^* = \sum_{i=1}^n \mu^*_i = \sum_{i\in M}\mu_i^*\leq n^2
    \end{displaymath}
    which along with \eqref{eq:local-1} proves the lemma.
\end{proof} 

\begin{lemma}\label{lemma:monotonicity}
    If $c'$ = $(c_i', c_{-i})$, with $c_i'\leq c_i - \delta$, we have:
    \begin{displaymath}
        L^*_{c',\alpha} \geq L^*_{c,\alpha} + \frac{\alpha\delta b}{2^nB}
    \end{displaymath}
\end{lemma}

\begin{proof}
    Let $\mu^*, \nu^*, \xi^*$ be dual optimal for $(P_{c', \alpha})$. Noting that:
    \begin{displaymath} 
    \mathcal{L}_{c', \alpha}(\lambda, \mu^*, \nu^*, \xi^*) \geq
    \mathcal{L}_{c, \alpha}(\lambda, \mu^*, \nu^*, \xi^*) + \lambda_i\xi^*\delta,
    \end{displaymath}
    we get similarly to Lemma~\ref{lemma:proximity}:
    \begin{displaymath}
        L^*_{c',\alpha} \geq L(\lambda) + \lambda_i\xi^*\delta
    \end{displaymath}
    for any $\lambda$ feasible for \eqref{eq:perturbed-primal}. In particular, for $\lambda^*$ primal optimal for \eqref{eq:perturbed-primal}:
    \begin{displaymath}
        L^*_{c',\alpha} \geq L^*_{c,\alpha} + \alpha\xi^*\delta
    \end{displaymath}
    since $\lambda_i^*\geq \alpha$.

    Using the KKT conditions for $(P_{c', \alpha})$, we can write:
    \begin{displaymath}
        \xi^* = \inf_{i:\lambda^{'*}_i>\alpha} \frac{\T{x_i}S(\lambda^{'*})^{-1}x_i}{c_i'}
    \end{displaymath}
    with $\lambda^{'*}$ optimal for $(P_{c', \alpha})$. Since $c_i'\leq B$,
    using Lemma~\ref{lemma:derivative-bounds}, we get that $\xi^*\geq
    \frac{b}{2^nB}$, which concludes the proof.
\end{proof}

We are now ready to conclude the  proof of Proposition~\ref{prop:monotonicity}.

\begin{proof}
Let $\hat{L}^*_{c,\alpha}$ be the approximation computed by
Algorithm~\ref{alg:monotone}.
\begin{enumerate}
    \item using Lemma~\ref{lemma:proximity}:
\begin{displaymath}
        |\hat{L}^*_{c,\alpha} - L^*_c| \leq |\hat{L}^*_{c,\alpha} - L^*_{c,\alpha}| + |L^*_{c,\alpha} - L^*_c|
        \leq \frac{\alpha\delta}{B} + \alpha n^2 = \varepsilon
\end{displaymath}
which proves the $\varepsilon$-accuracy.

\item for the $\delta$-decreasingness, let $c' = (c_i', c_{-i})$ with $c_i'\leq
    c_i-\delta$, then:
\begin{displaymath}
    \hat{L}^*_{c',\alpha} \geq L^*_{c',\alpha} - \frac{\alpha\delta b}{2^{n+1}B} 
                     \geq L^*_{c,\alpha} + \frac{\alpha\delta b}{2^{n+1}B}
    \geq \hat{L}^*_{c,\alpha}
\end{displaymath}
where the first and last inequalities follow from the accuracy of the approximation, and
the inner inequality follows from Lemma~\ref{lemma:monotonicity}.

\item the accuracy of the approximation $\hat{L}^*_{c,\alpha}$ is:
\begin{displaymath}
    \varepsilon' =\frac{\varepsilon\delta b}{2^{n+1}(\delta + n^2B)}
\end{displaymath}

Note that:
\begin{displaymath}
    \log\log (\varepsilon')^{-1} = O\bigg(\log\log\frac{B}{\varepsilon\delta b} + \log n\bigg)
\end{displaymath}
Using Lemma~\ref{lemma:barrier} concludes the proof of the running time.\qedhere
\end{enumerate}
\end{proof}