5 files changed, 54 insertions, 40 deletions
diff --git a/appendix.tex b/appendix.tex
index f9564c3..9175e34 100644
--- a/appendix.tex
+++ b/appendix.tex
@@ -1,3 +1,30 @@
+\section{Properties of the Value Function $V$} \label{app:properties}
+For the sake of concreteness, we prove below the positivity, monotonicity, and submodularity of $V(S) = \log\det(I_d+X_S^TX_S)$ from basic principles. We note however that these properties hold more generally for the information gain under a wider class of models than the linear model with Gaussian noise and prior that we study here: we discuss this in more detail in Appendix~\ref{sec:ext}.
+
+For two symmetric matrices $A$ and $B$, we write $A\succ B$ ($A\succeq B$) if
+$A-B$ is positive definite (positive semi-definite).  This order allows us to
+define the notion of a \emph{decreasing} as well as  \emph{convex} matrix
+function, similarly to their real counterparts. With this definition, matrix
+inversion is decreasing and convex over symmetric positive definite
+matrices (see Example 3.48 p. 110 in \cite{boyd2004convex}).
+
+The positivity of $V(S)$ follows from the fact that $X_S^TX_S$ is positive semi-definite and, as such $I_d+X_S^TX_S\succeq I_d$, so all its eigenvalues are larger or equal to one, and they are all one if and only if $S=\emptyset$. The marginal contribution of item $i\in\mathcal{N}$ to set
+    $S\subseteq \mathcal{N}$ can be written as 
+\begin{align}
+V(S\cup \{i\}) - V(S)& =   \frac{1}{2}\log\det(I_d 
+    + \T{X_S}X_S + x_i\T{x_i})
+    - \frac{1}{2}\log\det(I_d + \T{X_S}X_S)\nonumber\\
+    &  = \frac{1}{2}\log\det(I_d + x_i\T{x_i}(I_d +
+\T{X_S}X_S)^{-1})
+ = \frac{1}{2}\log(1 + \T{x_i}A(S)^{-1}x_i)\label{eq:marginal_contrib}
+\end{align}
+where $A(S) \defeq I_d+ \T{X_S}X_S$, and the last equality follows from the
+Sylvester's determinant identity~\cite{sylvester}. Monotonicity therefore follows from the fact that $A(S)^{-1}$ is positive semidefinite. Finally, since the inverse is decreasing over positive definite matrices, we have
+\begin{gather}
+        \forall S\subseteq\mathcal{N},\quad A(S)^{-1} \succeq A(S\cup\{i\})^{-1}. \label{eq:inverse}
+\end{gather}
+and submodularity also follows. \qed
+
 
 \section{Proofs of Statements in Section~\ref{sec:concave}}
 \subsection{Proof of Lemma~\ref{lemma:relaxation-ratio}}\label{proofofrelaxation-ratio}
@@ -37,18 +64,9 @@
         P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\big(V(S\cup\{i\})
         - V(S)\big).
     \end{displaymath}
-    The marginal contribution of $i$ to
-    $S$ can be written as 
-\begin{align*}
-V(S\cup \{i\}) - V(S)& =   \frac{1}{2}\log\det(I_d 
-    + \T{X_S}X_S + x_i\T{x_i})
-    - \frac{1}{2}\log\det(I_d + \T{X_S}X_S)\\
-    &  = \frac{1}{2}\log\det(I_d + x_i\T{x_i}(I_d +
-\T{X_S}X_S)^{-1})
- = \frac{1}{2}\log(1 + \T{x_i}A(S)^{-1}x_i)
-\end{align*}
-where $A(S) \defeq I_d+ \T{X_S}X_S$, and the last equality follows from the
-Sylvester's determinant identity~\cite{sylvester}.
+Recall from \eqref{eq:marginal_contrib} that the marginal contribution of $i$ to $S$ is given by
+$$V(S\cup \{i\}) - V(S) =\frac{1}{2}\log(1 + \T{x_i}A(S)^{-1}x_i), $$
+where $A(S) = I_d+ \T{X_S}X_S$.
 %  $  V(S\cup\{i\}) - V(S) = \frac{1}{2}\log\left(1 +  \T{x_i} A(S)^{-1}x_i\right)$.
 Using this,
     \begin{displaymath}
@@ -58,7 +76,7 @@ Using this,
         \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big)
     \end{displaymath}
      The computation of the derivative of $L$ uses standard matrix
-    calculus: writing $\tilde{A}(\lambda) = I_d+\sum_{i\in
+    calculus: writing $\tilde{A}(\lambda) \defeq I_d+\sum_{i\in
     \mathcal{N}}\lambda_ix_i\T{x_i}$,
     \begin{displaymath}
         \det \tilde{A}(\lambda + h\cdot e_i) = \det\big(\tilde{A}(\lambda)
@@ -76,14 +94,7 @@ Using this,
         \partial_i L(\lambda)
         =\frac{1}{2} \T{x_i}\tilde{A}(\lambda)^{-1}x_i.
     \end{displaymath}
-
-For two symmetric matrices $A$ and $B$, we write $A\succ B$ ($A\succeq B$) if
-$A-B$ is positive definite (positive semi-definite).  This order allows us to
-define the notion of a \emph{decreasing} as well as  \emph{convex} matrix
-function, similarly to their real counterparts. With this definition, matrix
-inversion is decreasing and convex over symmetric positive definite
-matrices (see Example 3.48 p. 110 in \cite{boyd2004convex}).
-In particular,
+Recall from \eqref{eq:inverse} that the monotonicity of the matrix inverse over positive definite matrices implies 
 \begin{gather*}
         \forall S\subseteq\mathcal{N},\quad A(S)^{-1} \succeq A(S\cup\{i\})^{-1}
 \end{gather*}
@@ -96,8 +107,8 @@ for all $S\subseteq\mathcal{N}\setminus\{i\}$. Hence,
     & \geq \frac{1}{4}
     \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
     P_{\mathcal{N}}^\lambda(S)
-    \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big)\\
-    &\hspace{-3.5em}+\frac{1}{4}
+    \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big)
+    +\frac{1}{4}
     \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
     P_{\mathcal{N}}^\lambda(S\cup\{i\})
     \log\Big(1 + \T{x_i}A(S\cup\{i\})^{-1}x_i\Big)\\
@@ -115,7 +126,7 @@ Hence,
     \T{x_i}\bigg(\sum_{S\subseteq\mathcal{N}}P_\mathcal{N}^\lambda(S)A(S)^{-1}\bigg)x_i.
 \end{displaymath}
 Finally, using that the inverse is a matrix convex function over symmetric
-positive definite matrices:
+positive definite matrices (see Appendix~\ref{app:properties}):
 \begin{displaymath}
     \partial_i F(\lambda) \geq
     \frac{1}{4}
@@ -126,7 +137,7 @@ positive definite matrices:
 \end{displaymath}
 
 Having bound the ratio between the partial derivatives, we now bound the ratio
-$F(\lambda)/L(\lambda)$ from below. Consider the following cases.
+$F(\lambda)/L(\lambda)$ from below. Consider the following three cases.
 
 First, if the minimum is attained as $\lambda$ converges to zero in,
 \emph{e.g.}, the $l_2$ norm, by the Taylor approximation, one can write:
@@ -643,7 +654,6 @@ The complexity of the mechanism is given by the following lemma.
 \end{proof}
 
 Finally, we prove the approximation ratio of the mechanism.
-
 We use the following lemma from \cite{chen} which bounds $OPT$ in terms of
 the value of $S_G$, as computed in Algorithm \ref{mechanism}, and $i^*$, the
 element of maximum value.
@@ -666,8 +676,8 @@ OPT
 \leq \frac{10e\!-\!3 + \sqrt{64e^2\!-\!24e\!+\!9}}{2(e\!-\!1)} V(S^*)\!
 + \! \varepsilon .
 \end{equation}
-To see this, let $L^*_{c_{-i^*}}$ be the true maximum value of $L$ subject to
-$\lambda_{c_{-i^*}}=0$, $\sum_{i\in \mathcal{N}\setminus{i^*}}c_i\leq B$. From line
+To see this, let $L^*_{c_{-i^*}}$ be the  maximum value of $L$ subject to
+$\lambda_{i^*}=0$, $\sum_{i\in \mathcal{N}\setminus{i^*}}c_i\leq B$. From line
 \ref{relaxexec} of Algorithm~\ref{mechanism}, we have
 $L^*_{c_{-i^*}}-\varepsilon\leq OPT_{-i^*}' \leq L^*_{c_{-i^*}}+\varepsilon$.
 
diff --git a/conclusion.tex b/conclusion.tex
index e69de29..695dfb7 100644
--- a/conclusion.tex
+++ b/conclusion.tex
@@ -0,0 +1,4 @@
+To be written. Will contain 
+(a) list of extensions with a forward pointer to Appendix
+(b) some concluding remark that we initiated the area, the opt criteria is not a priori clear, etc. 
+
diff --git a/main.tex b/main.tex
index 0dfa162..324f11f 100644
--- a/main.tex
+++ b/main.tex
@@ -57,7 +57,7 @@ To obtain this result, we use the following modified version of Myerson's theore
 %variant of Myerson's theorem.
 
 \begin{lemma}\label{thm:myerson-variant}
-\sloppy A normalized mechanism $\mathcal{M} = (S,p)$ for a single parameter auction is
+ A normalized mechanism $\mathcal{M} = (S,p)$ for a single parameter auction is
 $\delta$-truthful if:
 (a) $S$ is $\delta$-monotone, \emph{i.e.}, for any agent $i$ and $c_i' \leq
 c_i-\delta$, for any
@@ -65,11 +65,8 @@ fixed costs $c_{-i}$ of agents in $\mathcal{N}\setminus\{i\}$, $i\in S(c_i,
 c_{-i})$ implies $i\in S(c_i', c_{-i})$, and (b)
  agents are paid \emph{threshold payments}, \emph{i.e.}, for all $i\in S(c)$, $p_i(c)=\inf\{c_i': i\in S(c_i', c_{-i})\}$.
 \end{lemma}
-\fussy
-
-We can now state our main result, which is proved in Appendix~\ref{sec:proofofmainthm}:
+Lemma~\ref{thm:myerson-variant} allow us to incorporate our relaxation in the above framework, yielding the following theorem:
 \begin{theorem}\label{thm:main}
-    \sloppy
     For any $\delta>0$, and any $\epsilon>0$,  there exists a $\delta$-truthful, individually rational
     and budget feasible mechanim for \EDP{} that runs in time
     $O\big(poly(n, d, \log\log\frac{B}{b\varepsilon\delta})\big)$
@@ -82,6 +79,7 @@ We can now state our main result, which is proved in Appendix~\ref{sec:proofofma
          \simeq 12.98V(S^*) + \varepsilon.$
 %    \end{align*}
 \end{theorem}
+The proof of the theorem, as well as our proposed mechanism, can be found in Appendix~\ref{sec:proofofmainthm}.
 In addition, we prove the following simple lower bound, proved in Appendix~\ref{proofoflowerbound}.
 
 \begin{theorem}\label{thm:lowerbound}
@@ -89,3 +87,5 @@ There is no $2$-approximate, truthful, budget feasible, individually rational
 mechanism for EDP. 
 \end{theorem}
 
+
+
diff --git a/paper.tex b/paper.tex
index eb9a4b5..bd2b693 100644
--- a/paper.tex
+++ b/paper.tex
@@ -35,8 +35,8 @@
 \input{approximation}
 \section{Mechanism for \SEDP{}}\label{sec:mechanism}
 \input{main}
-%\section{Conclusion}
-%\input{conclusion}
+\section{Conclusion}
+\input{conclusion}
 \bibliographystyle{abbrvnat}
 \bibliography{notes}
 \appendix
diff --git a/problem.tex b/problem.tex
index 851f691..89ffa9d 100644
--- a/problem.tex
+++ b/problem.tex
@@ -27,7 +27,7 @@ Then, \E\ estimates $\beta$ through \emph{maximum a posteriori estimation}: \emp
 \end{align}
 where the last equality is obtained by setting  $\nabla_{\beta}\prob(\beta\mid y_S)$ to zero and solving the resulting linear system; in \eqref{ridge}, $X_S\defeq[x_i]_{i\in S}\in \reals^{|S|\times d}$ is the matrix of experiment features and
 $y_S\defeq[y_i]_{i\in S}\in\reals^{|S|}$ are the observed measurements. 
-This optimization, commonly known as \emph{ridge regression}, includes an additional quadratic penalty term compared to the standard least squares estimation.
+This optimization, commonly known as \emph{ridge regression}, includes an additional quadratic penalty term $\beta^TR\beta$ compared to the standard least squares estimation.
 % under \eqref{model}, the maximum likelihood estimator of $\beta$ is the \emph{least squares} estimator: for $X_S=[x_i]_{i\in S}\in \reals^{|S|\times d}$ the matrix of experiment features and
 %$y_S=[y_i]_{i\in S}\in\reals^{|S|}$ the observed measurements, 
 %\begin{align} \hat{\beta} &=\max_{\beta\in\reals^d}\prob(y_S;\beta) =\argmin_{\beta\in\reals^d } \sum_{i\in S}(\T{\beta}x_i-y_i)^2 \nonumber\\
@@ -109,11 +109,11 @@ the optimal value achievable in the full-information case.
 reduces to EDP with dimension $d=1$ by mapping the weight of each item, say,
 $w_i$, to an experiment with $x_i^2=w_i$.% Moreover, \eqref{modified} is submodular, monotone and satisfies $V(\emptyset) = 0$.
 
- Note that \eqref{modified} is a submodular set function, \emph{i.e.}, 
-$V(S)+V(T)\geq V(S\cup T)+V(S\cap T)$ for all $S,T\subseteq \mathcal{N}$. It is
+The value function \eqref{modified} has the following properties, which are proved in Appendix~\ref{app:properties}. First, it is non-negative, \emph{i.e.}, $V(S)\geq 0$
+for all $S\subseteq\mathcal{N}$. Second, it is
 also monotone, \emph{i.e.}, $V(S)\leq V(T)$ for all $S\subset T$, with
-$V(\emptyset)=0$. Finally, it is non-negative, \emph{i.e.}, $V(S)\geq 0$
-for all $S\subseteq\mathcal{N}$, since the matrix $\T{X_S}X_S$ is positive semi-definite for all $S\subseteq \mathcal{N}$.
+$V(\emptyset)=0$. Finally, it is a submodular, \emph{i.e.}, 
+$V(S)+V(T)\geq V(S\cup T)+V(S\cap T)$ for all $S,T\subseteq \mathcal{N}$. 
 %Finally, we define the strategic version  of \EDP{} (denoted by \SEDP) by adding the additional constraints of truthfulness, individual rationality, and normal payments, as described in the previous section.
 The above three properties imply that a greedy algorithm yields a constant approximation ratio to \EDP.
 In particular, consider the greedy algorithm in which, for