1 files changed, 63 insertions, 46 deletions
diff --git a/proof.tex b/proof.tex
index 25061bd..9288ff2 100644
--- a/proof.tex
+++ b/proof.tex
@@ -8,6 +8,7 @@
 \newtheorem{example}{Example}
 \newtheorem{prop}{Proposition}
 \newtheorem{theorem}{Theorem}
+\newcommand*{\defeq}{\stackrel{\text{def}}{=}}
 \newcommand{\var}{\mathop{\mathrm{Var}}}
 \newcommand{\condexp}[2]{\mathop{\mathbb{E}}\left[#1|#2\right]}
 \newcommand{\expt}[1]{\mathop{\mathbb{E}}\left[#1\right]}
@@ -33,18 +34,34 @@
                 $(\varepsilon_i)_{i\in \mathcal{N}}$ are mutually independent.
             \item prior knowledge of $\beta$: $\beta\sim\mathcal{N}(0,\kappa I_d)$
             \item $\mu = \frac{\kappa}{\sigma^2}$ is the regularization factor.
+            \item after the variables $y_i$ are disclosed, the experimenter
+                does \emph{maximum a posteriori estimation} which is equivalent
+                under Gaussian prior to solving the ridge regression
+                maximisation problem:
+                \begin{displaymath}
+                    \beta_{\text{max}} = \argmax \sum_i (y_i - \beta^*x_i)^2
+                    + \frac{1}{\mu}\sum_i \norm{\beta}_2^2
+                \end{displaymath}
         \end{itemize}
 \end{itemize}
 
 \subsection{Economics}
 
 \begin{itemize}
-    \item Value function:
+    \item Value function: following the experiment design theory, the value of
+        data is the decrease of uncertainty about the model. Common measure of
+        uncertainty, the entropy. Thus, the value of a set $S$ of users is:
+        \begin{displaymath}
+            V(S) = \mathbb{H}(\beta) - \mathbb{H}(\beta|Y_S)
+        \end{displaymath}
+        where $Y_S = \{y_i,\,i\in S\}$ is the set of observations.
+
+        In our case (under Gaussian prior):
         \begin{align*}
             \forall S\subset\mathcal{N},\; V(S)
             & = \frac{1}{2}\log\det\left(I_d
                     + \mu\sum_{i\in S} x_ix_i^*\right)\\
-            & = \frac{1}{2}\log\det X(S)
+            & \defeq \frac{1}{2}\log\det A(S)
         \end{align*}
     \item each user $i$ has a cost $c_i$
     \item the auctioneer has a budget constraint $B$
@@ -82,10 +99,10 @@ distribution over subsets of $\mathcal{N}$.
 
 Let $\lambda\in[0,1]^n$, let us define:
 \begin{displaymath}
-    P_\mathcal{N}(S,\lambda) = \prod_{i\in S}\lambda_i
+    P_\mathcal{N}^\lambda(S) = \prod_{i\in S}\lambda_i
     \prod_{i\in\mathcal{N}\setminus S}(1-\lambda_i)
 \end{displaymath}
-$P_{\mathcal{N}}(S,\lambda)$ is the probability of picking the set $S$ if we select
+$P_\mathcal{N}^\lambda(S)$ is the probability of picking the set $S$ if we select
 a subset of $\mathcal{N}$ at random by deciding independently for each point to
 include it in the set with probability $\lambda_i$ (and to exclude it with
 probability $1-\lambda_i$).
@@ -95,20 +112,19 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$:
     \item the \emph{multi-linear extension} of $V$:
         \begin{align*}
             F_\mathcal{N}(\lambda) 
-            & = \mathbb{E}_{S\sim P_\mathcal{N}(S,\lambda)}\big[\log\det X(S)\big]\\
-            & = \sum_{S\subset\mathcal{N}} P_\mathcal{N}(S,\lambda) V(S)\\
-            & = \sum_{S\subset\mathcal{N}} P_{\mathcal{N}}(S,\lambda) \log\det X(S)\\
+            & = \mathbb{E}_{S\sim P_\mathcal{N}^\lambda}\big[\log\det A(S)\big]\\
+            & = \sum_{S\subset\mathcal{N}} P_\mathcal{N}^\lambda(S) V(S)\\
+            & = \sum_{S\subset\mathcal{N}} P_\mathcal{N}^\lambda(S) \log\det A(S)\\
         \end{align*}
     \item the \emph{concave relaxation} of $V$:
         \begin{align*}
             L_{\mathcal{N}}(\lambda) 
-            & = \log\det \mathbb{E}_{S\sim P_\mathcal{N}(S,\lambda)}\big[X(S)\big]\\
+            & = \log\det \mathbb{E}_{S\sim P_\mathcal{N}^\lambda}\big[A(S)\big]\\
             & = \log\det\left(\sum_{S\subset N}
-                P_\mathcal{N}(S,\lambda)X(S)\right)\\
-                & = \log\det \tilde{X}(\lambda)\\
-            & = \log\det\left(I_d
-                + \mu\sum_{i\in\mathcal{N}}
-                    \lambda_ix_ix_i^*\right)
+                P_\mathcal{N}^\lambda(S)A(S)\right)\\
+            & = \log\det\left(I_d + \mu\sum_{i\in\mathcal{N}}
+                \lambda_ix_ix_i^*\right)\\
+            & \defeq \log\det \tilde{A}(\lambda)
         \end{align*}
 \end{itemize}
 
@@ -118,9 +134,9 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$:
 \end{lemma}
 
 \begin{proof}
-    This follows almost immediately from the concavity of the $\log\det$
-    function over symmetric positive semi-definite matrices. More precisely, if
-    $A$ and $B$ are two symmetric positive semi-definite matrices, then:
+    This follows from the concavity of the $\log\det$ function over symmetric
+    positive semi-definite matrices. More precisely, if $A$ and $B$ are two
+    symmetric positive semi-definite matrices, then:
     \begin{multline*}
         \forall\alpha\in [0, 1],\; \log\det\big(\alpha A + (1-\alpha) B\big)\\
         \geq \alpha\log\det A + (1-\alpha)\log\det B
@@ -141,7 +157,7 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$:
     two fractional components, returns some $\lambda'$ with one less fractional
     component, feasible such that:
     \begin{displaymath}
-        F(\lambda) \leq F(\lambda')
+        F_\mathcal{N}(\lambda) \leq F_\mathcal{N}(\lambda')
     \end{displaymath}
     Applying this procedure recursively yields the lemma's result.
 
@@ -164,7 +180,7 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$:
     Furthermore, the function $F_\lambda$ is convex, indeed:
     \begin{align*}
         F_\lambda(\varepsilon)
-        & = \mathbb{E}_{S'\sim P_{\mathcal{N}\setminus\{i,j\}}(S',\lambda)}\Big[
+        & = \mathbb{E}_{S'\sim P_{\mathcal{N}\setminus\{i,j\}}^\lambda(S')}\Big[
         (\lambda_i+\varepsilon)\Big(\lambda_j-\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{i,j\})\\
         & + (\lambda_i+\varepsilon)\Big(1-\lambda_j+\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{i\})\\
         & + (1-\lambda_i-\varepsilon)\Big(\lambda_j-\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{j\})\\
@@ -172,7 +188,8 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$:
     \end{align*}
     Thus, $F_\lambda$ is a degree 2 polynomial whose dominant coefficient is:
     \begin{multline*}
-        \frac{c_i}{c_j}\mathbb{E}_{S'\sim P_{\mathcal{N}\setminus\{i,j\}}(S',\lambda)}\Big[
+        \frac{c_i}{c_j}\mathbb{E}_{S'\sim
+        P_{\mathcal{N}\setminus\{i,j\}}^\lambda(S')}\Big[
             V(S'\cup\{i\})+V(S'\cup\{i\})\\
         -V(S'\cup\{i,j\})-V(S')\Big]
     \end{multline*}
@@ -221,12 +238,12 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$:
     the $i$-th component.
 
     For $F$, it suffices to look at the derivative of
-    $P_\mathcal{N}(S,\lambda)$:
+    $P_\mathcal{N}^\lambda(S)$:
     \begin{displaymath}
-        \partial_i P_\mathcal{N}(S,\lambda) = \left\{
+        \partial_i P_\mathcal{N}^\lambda(S) = \left\{
             \begin{aligned}
-                & P_{\mathcal{N}\setminus\{i\}}(S\setminus\{i\},\lambda)\;\textrm{if}\; i\in S \\
-                & - P_{\mathcal{N}\setminus\{i\}}(S,\lambda)\;\textrm{if}\;
+                & P_{\mathcal{N}\setminus\{i\}}^\lambda(S\setminus\{i\})\;\textrm{if}\; i\in S \\
+                & - P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\;\textrm{if}\;
                 i\in \mathcal{N}\setminus S \\
             \end{aligned}\right.
     \end{displaymath}
@@ -235,9 +252,9 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$:
     \begin{multline*}
         \partial_i F_\mathcal{N} =
         \sum_{\substack{S\subset\mathcal{N}\\ i\in S}}
-        P_{\mathcal{N}\setminus\{i\}}(S\setminus\{i\},\lambda)V(S)\\
+        P_{\mathcal{N}\setminus\{i\}}^\lambda(S\setminus\{i\})V(S)\\
         - \sum_{\substack{S\subset\mathcal{N}\\ i\in \mathcal{N}\setminus S}}
-        P_{\mathcal{N}\setminus\{i\}}(S,\lambda)V(S)\\
+        P_{\mathcal{N}\setminus\{i\}}^\lambda(S)V(S)\\
     \end{multline*}
 
     Now, using that every $S$ such that $i\in S$ can be uniquely written as
@@ -245,9 +262,9 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$:
     \begin{multline*}
         \partial_i F_\mathcal{N} =
         \sum_{\substack{S\subset\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
-        P_{\mathcal{N}\setminus\{i\}}(S,\lambda)V(S\cup\{i\})\\
+        P_{\mathcal{N}\setminus\{i\}}^\lambda(S)V(S\cup\{i\})\\
         - \sum_{\substack{S\subset\mathcal{N}\\ i\in \mathcal{N}\setminus S}}
-        P_{\mathcal{N}\setminus\{i\}}(S,\lambda)V(S)\\
+        P_{\mathcal{N}\setminus\{i\}}^\lambda(S)V(S)\\
     \end{multline*}
 
     Finally, by using the expression for the marginal contribution of $i$ to
@@ -255,47 +272,47 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$:
     \begin{displaymath}
         \partial_i F_\mathcal{N}(\lambda) = 
         \sum_{\substack{S\subset\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
-        P_{\mathcal{N}\setminus\{i\}}(S,\lambda)
-        \log\Big(1 + \mu x_i^*X(S)^{-1}x_i\Big)
+        P_{\mathcal{N}\setminus\{i\}}^\lambda(S)
+        \log\Big(1 + \mu x_i^*A(S)^{-1}x_i\Big)
     \end{displaymath}
 
     The computation of the derivative of $L_\mathcal{N}$ uses standard matrix
     calculus and gives:
     \begin{displaymath}
         \partial_i L_\mathcal{N}(\lambda)
-        = \mu x_i^* \tilde{X}(\lambda)^{-1}x_i
+        = \mu x_i^* \tilde{A}(\lambda)^{-1}x_i
     \end{displaymath}
     
     Using the following inequalities:
     \begin{gather*}
-        P_{\mathcal{N}\setminus\{i\}}(S,\lambda) \geq
-        P_\mathcal{N}(S,\lambda)\\
-        X(S)^{-1} \geq X(S\cup\{i\})^{-1}\\
-        P_\mathcal{N}(S\cup\{i\},\lambda)\geq P_\mathcal{N}(S,\lambda)
+        P_{\mathcal{N}\setminus\{i\}}^\lambda(S) \geq
+        P_\mathcal{N}^\lambda(S)\\
+        A(S)^{-1} \geq A(S\cup\{i\})^{-1}\\
+        P_\mathcal{N}^\lambda(S\cup\{i\})\geq P_\mathcal{N}^\lambda(S)
     \end{gather*}
     we get:
     \begin{align*}
         \partial_i F_\mathcal{N}(\lambda) 
         & \geq \sum_{\substack{S\subset\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
-        P_\mathcal{N}(S,\lambda)
-        \log\Big(1 + \mu x_i^*X(S)^{-1}x_i\Big)\\
+        P_\mathcal{N}^\lambda(S)
+        \log\Big(1 + \mu x_i^*A(S)^{-1}x_i\Big)\\
         & \geq \frac{1}{2}
         \sum_{\substack{S\subset\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
-        P_\mathcal{N}(S,\lambda)
-        \log\Big(1 + \mu x_i^*X(S)^{-1}x_i\Big)\\
+        P_\mathcal{N}^\lambda(S)
+        \log\Big(1 + \mu x_i^*A(S)^{-1}x_i\Big)\\
         &\hspace{-3.5em}+\frac{1}{2}
         \sum_{\substack{S\subset\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
-        P_\mathcal{N}(S\cup\{i\},\lambda)
-        \log\Big(1 + \mu x_i^*X(S\cup\{i\})^{-1}x_i\Big)\\
+        P_\mathcal{N}^\lambda(S\cup\{i\})
+        \log\Big(1 + \mu x_i^*A(S\cup\{i\})^{-1}x_i\Big)\\
         &\geq \frac{1}{2}
         \sum_{S\subset\mathcal{N}}
-        P_\mathcal{N}(S,\lambda)
-        \log\Big(1 + \mu x_i^*X(S)^{-1}x_i\Big)\\
+        P_\mathcal{N}^\lambda(S)
+        \log\Big(1 + \mu x_i^*A(S)^{-1}x_i\Big)\\
     \end{align*}
 
-    Using that $X(S)\geq I_d$ we get that:
+    Using that $A(S)\geq I_d$ we get that:
     \begin{displaymath}
-        \mu x_i^*X(S)^{-1}x_i \leq \mu
+        \mu x_i^*A(S)^{-1}x_i \leq \mu
     \end{displaymath}
     
     Moreover:
@@ -308,7 +325,7 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$:
     \begin{displaymath}
         \partial_i F_\mathcal{N}(\lambda) \geq
         \frac{\log\big(1+\mu\big)}{2\mu}
-        x_i^*\bigg(\sum_{S\subset\mathcal{N}}P_\mathcal{N}(S,\lambda)X(S)^{-1}\bigg)x_i
+        x_i^*\bigg(\sum_{S\subset\mathcal{N}}P_\mathcal{N}^\lambda(S)A(S)^{-1}\bigg)x_i
     \end{displaymath}
     
     Finally, using that the inverse is a matrix convex function over symmetric
@@ -316,7 +333,7 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$:
     \begin{align*}
         \partial_i F_\mathcal{N}(\lambda) &\geq
         \frac{\log\big(1+\mu\big)}{2\mu}
-        x_i^*\bigg(\sum_{S\subset\mathcal{N}}P_\mathcal{N}(S,\lambda)X(S)\bigg)^{-1}x_i\\
+        x_i^*\bigg(\sum_{S\subset\mathcal{N}}P_\mathcal{N}^\lambda(S)A(S)\bigg)^{-1}x_i\\
         & \geq \frac{\log\big(1+\mu\big)}{2\mu}
         \partial_i L_\mathcal{N}(\lambda)
     \end{align*}