summaryrefslogtreecommitdiffstats
path: root/proof.tex
diff options
context:
space:
mode:
Diffstat (limited to 'proof.tex')
-rw-r--r--proof.tex109
1 files changed, 63 insertions, 46 deletions
diff --git a/proof.tex b/proof.tex
index 25061bd..9288ff2 100644
--- a/proof.tex
+++ b/proof.tex
@@ -8,6 +8,7 @@
\newtheorem{example}{Example}
\newtheorem{prop}{Proposition}
\newtheorem{theorem}{Theorem}
+\newcommand*{\defeq}{\stackrel{\text{def}}{=}}
\newcommand{\var}{\mathop{\mathrm{Var}}}
\newcommand{\condexp}[2]{\mathop{\mathbb{E}}\left[#1|#2\right]}
\newcommand{\expt}[1]{\mathop{\mathbb{E}}\left[#1\right]}
@@ -33,18 +34,34 @@
$(\varepsilon_i)_{i\in \mathcal{N}}$ are mutually independent.
\item prior knowledge of $\beta$: $\beta\sim\mathcal{N}(0,\kappa I_d)$
\item $\mu = \frac{\kappa}{\sigma^2}$ is the regularization factor.
+ \item after the variables $y_i$ are disclosed, the experimenter
+ does \emph{maximum a posteriori estimation} which is equivalent
+ under Gaussian prior to solving the ridge regression
+ maximisation problem:
+ \begin{displaymath}
+ \beta_{\text{max}} = \argmax \sum_i (y_i - \beta^*x_i)^2
+ + \frac{1}{\mu}\sum_i \norm{\beta}_2^2
+ \end{displaymath}
\end{itemize}
\end{itemize}
\subsection{Economics}
\begin{itemize}
- \item Value function:
+ \item Value function: following the experiment design theory, the value of
+ data is the decrease of uncertainty about the model. Common measure of
+ uncertainty, the entropy. Thus, the value of a set $S$ of users is:
+ \begin{displaymath}
+ V(S) = \mathbb{H}(\beta) - \mathbb{H}(\beta|Y_S)
+ \end{displaymath}
+ where $Y_S = \{y_i,\,i\in S\}$ is the set of observations.
+
+ In our case (under Gaussian prior):
\begin{align*}
\forall S\subset\mathcal{N},\; V(S)
& = \frac{1}{2}\log\det\left(I_d
+ \mu\sum_{i\in S} x_ix_i^*\right)\\
- & = \frac{1}{2}\log\det X(S)
+ & \defeq \frac{1}{2}\log\det A(S)
\end{align*}
\item each user $i$ has a cost $c_i$
\item the auctioneer has a budget constraint $B$
@@ -82,10 +99,10 @@ distribution over subsets of $\mathcal{N}$.
Let $\lambda\in[0,1]^n$, let us define:
\begin{displaymath}
- P_\mathcal{N}(S,\lambda) = \prod_{i\in S}\lambda_i
+ P_\mathcal{N}^\lambda(S) = \prod_{i\in S}\lambda_i
\prod_{i\in\mathcal{N}\setminus S}(1-\lambda_i)
\end{displaymath}
-$P_{\mathcal{N}}(S,\lambda)$ is the probability of picking the set $S$ if we select
+$P_\mathcal{N}^\lambda(S)$ is the probability of picking the set $S$ if we select
a subset of $\mathcal{N}$ at random by deciding independently for each point to
include it in the set with probability $\lambda_i$ (and to exclude it with
probability $1-\lambda_i$).
@@ -95,20 +112,19 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$:
\item the \emph{multi-linear extension} of $V$:
\begin{align*}
F_\mathcal{N}(\lambda)
- & = \mathbb{E}_{S\sim P_\mathcal{N}(S,\lambda)}\big[\log\det X(S)\big]\\
- & = \sum_{S\subset\mathcal{N}} P_\mathcal{N}(S,\lambda) V(S)\\
- & = \sum_{S\subset\mathcal{N}} P_{\mathcal{N}}(S,\lambda) \log\det X(S)\\
+ & = \mathbb{E}_{S\sim P_\mathcal{N}^\lambda}\big[\log\det A(S)\big]\\
+ & = \sum_{S\subset\mathcal{N}} P_\mathcal{N}^\lambda(S) V(S)\\
+ & = \sum_{S\subset\mathcal{N}} P_\mathcal{N}^\lambda(S) \log\det A(S)\\
\end{align*}
\item the \emph{concave relaxation} of $V$:
\begin{align*}
L_{\mathcal{N}}(\lambda)
- & = \log\det \mathbb{E}_{S\sim P_\mathcal{N}(S,\lambda)}\big[X(S)\big]\\
+ & = \log\det \mathbb{E}_{S\sim P_\mathcal{N}^\lambda}\big[A(S)\big]\\
& = \log\det\left(\sum_{S\subset N}
- P_\mathcal{N}(S,\lambda)X(S)\right)\\
- & = \log\det \tilde{X}(\lambda)\\
- & = \log\det\left(I_d
- + \mu\sum_{i\in\mathcal{N}}
- \lambda_ix_ix_i^*\right)
+ P_\mathcal{N}^\lambda(S)A(S)\right)\\
+ & = \log\det\left(I_d + \mu\sum_{i\in\mathcal{N}}
+ \lambda_ix_ix_i^*\right)\\
+ & \defeq \log\det \tilde{A}(\lambda)
\end{align*}
\end{itemize}
@@ -118,9 +134,9 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$:
\end{lemma}
\begin{proof}
- This follows almost immediately from the concavity of the $\log\det$
- function over symmetric positive semi-definite matrices. More precisely, if
- $A$ and $B$ are two symmetric positive semi-definite matrices, then:
+ This follows from the concavity of the $\log\det$ function over symmetric
+ positive semi-definite matrices. More precisely, if $A$ and $B$ are two
+ symmetric positive semi-definite matrices, then:
\begin{multline*}
\forall\alpha\in [0, 1],\; \log\det\big(\alpha A + (1-\alpha) B\big)\\
\geq \alpha\log\det A + (1-\alpha)\log\det B
@@ -141,7 +157,7 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$:
two fractional components, returns some $\lambda'$ with one less fractional
component, feasible such that:
\begin{displaymath}
- F(\lambda) \leq F(\lambda')
+ F_\mathcal{N}(\lambda) \leq F_\mathcal{N}(\lambda')
\end{displaymath}
Applying this procedure recursively yields the lemma's result.
@@ -164,7 +180,7 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$:
Furthermore, the function $F_\lambda$ is convex, indeed:
\begin{align*}
F_\lambda(\varepsilon)
- & = \mathbb{E}_{S'\sim P_{\mathcal{N}\setminus\{i,j\}}(S',\lambda)}\Big[
+ & = \mathbb{E}_{S'\sim P_{\mathcal{N}\setminus\{i,j\}}^\lambda(S')}\Big[
(\lambda_i+\varepsilon)\Big(\lambda_j-\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{i,j\})\\
& + (\lambda_i+\varepsilon)\Big(1-\lambda_j+\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{i\})\\
& + (1-\lambda_i-\varepsilon)\Big(\lambda_j-\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{j\})\\
@@ -172,7 +188,8 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$:
\end{align*}
Thus, $F_\lambda$ is a degree 2 polynomial whose dominant coefficient is:
\begin{multline*}
- \frac{c_i}{c_j}\mathbb{E}_{S'\sim P_{\mathcal{N}\setminus\{i,j\}}(S',\lambda)}\Big[
+ \frac{c_i}{c_j}\mathbb{E}_{S'\sim
+ P_{\mathcal{N}\setminus\{i,j\}}^\lambda(S')}\Big[
V(S'\cup\{i\})+V(S'\cup\{i\})\\
-V(S'\cup\{i,j\})-V(S')\Big]
\end{multline*}
@@ -221,12 +238,12 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$:
the $i$-th component.
For $F$, it suffices to look at the derivative of
- $P_\mathcal{N}(S,\lambda)$:
+ $P_\mathcal{N}^\lambda(S)$:
\begin{displaymath}
- \partial_i P_\mathcal{N}(S,\lambda) = \left\{
+ \partial_i P_\mathcal{N}^\lambda(S) = \left\{
\begin{aligned}
- & P_{\mathcal{N}\setminus\{i\}}(S\setminus\{i\},\lambda)\;\textrm{if}\; i\in S \\
- & - P_{\mathcal{N}\setminus\{i\}}(S,\lambda)\;\textrm{if}\;
+ & P_{\mathcal{N}\setminus\{i\}}^\lambda(S\setminus\{i\})\;\textrm{if}\; i\in S \\
+ & - P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\;\textrm{if}\;
i\in \mathcal{N}\setminus S \\
\end{aligned}\right.
\end{displaymath}
@@ -235,9 +252,9 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$:
\begin{multline*}
\partial_i F_\mathcal{N} =
\sum_{\substack{S\subset\mathcal{N}\\ i\in S}}
- P_{\mathcal{N}\setminus\{i\}}(S\setminus\{i\},\lambda)V(S)\\
+ P_{\mathcal{N}\setminus\{i\}}^\lambda(S\setminus\{i\})V(S)\\
- \sum_{\substack{S\subset\mathcal{N}\\ i\in \mathcal{N}\setminus S}}
- P_{\mathcal{N}\setminus\{i\}}(S,\lambda)V(S)\\
+ P_{\mathcal{N}\setminus\{i\}}^\lambda(S)V(S)\\
\end{multline*}
Now, using that every $S$ such that $i\in S$ can be uniquely written as
@@ -245,9 +262,9 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$:
\begin{multline*}
\partial_i F_\mathcal{N} =
\sum_{\substack{S\subset\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
- P_{\mathcal{N}\setminus\{i\}}(S,\lambda)V(S\cup\{i\})\\
+ P_{\mathcal{N}\setminus\{i\}}^\lambda(S)V(S\cup\{i\})\\
- \sum_{\substack{S\subset\mathcal{N}\\ i\in \mathcal{N}\setminus S}}
- P_{\mathcal{N}\setminus\{i\}}(S,\lambda)V(S)\\
+ P_{\mathcal{N}\setminus\{i\}}^\lambda(S)V(S)\\
\end{multline*}
Finally, by using the expression for the marginal contribution of $i$ to
@@ -255,47 +272,47 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$:
\begin{displaymath}
\partial_i F_\mathcal{N}(\lambda) =
\sum_{\substack{S\subset\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
- P_{\mathcal{N}\setminus\{i\}}(S,\lambda)
- \log\Big(1 + \mu x_i^*X(S)^{-1}x_i\Big)
+ P_{\mathcal{N}\setminus\{i\}}^\lambda(S)
+ \log\Big(1 + \mu x_i^*A(S)^{-1}x_i\Big)
\end{displaymath}
The computation of the derivative of $L_\mathcal{N}$ uses standard matrix
calculus and gives:
\begin{displaymath}
\partial_i L_\mathcal{N}(\lambda)
- = \mu x_i^* \tilde{X}(\lambda)^{-1}x_i
+ = \mu x_i^* \tilde{A}(\lambda)^{-1}x_i
\end{displaymath}
Using the following inequalities:
\begin{gather*}
- P_{\mathcal{N}\setminus\{i\}}(S,\lambda) \geq
- P_\mathcal{N}(S,\lambda)\\
- X(S)^{-1} \geq X(S\cup\{i\})^{-1}\\
- P_\mathcal{N}(S\cup\{i\},\lambda)\geq P_\mathcal{N}(S,\lambda)
+ P_{\mathcal{N}\setminus\{i\}}^\lambda(S) \geq
+ P_\mathcal{N}^\lambda(S)\\
+ A(S)^{-1} \geq A(S\cup\{i\})^{-1}\\
+ P_\mathcal{N}^\lambda(S\cup\{i\})\geq P_\mathcal{N}^\lambda(S)
\end{gather*}
we get:
\begin{align*}
\partial_i F_\mathcal{N}(\lambda)
& \geq \sum_{\substack{S\subset\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
- P_\mathcal{N}(S,\lambda)
- \log\Big(1 + \mu x_i^*X(S)^{-1}x_i\Big)\\
+ P_\mathcal{N}^\lambda(S)
+ \log\Big(1 + \mu x_i^*A(S)^{-1}x_i\Big)\\
& \geq \frac{1}{2}
\sum_{\substack{S\subset\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
- P_\mathcal{N}(S,\lambda)
- \log\Big(1 + \mu x_i^*X(S)^{-1}x_i\Big)\\
+ P_\mathcal{N}^\lambda(S)
+ \log\Big(1 + \mu x_i^*A(S)^{-1}x_i\Big)\\
&\hspace{-3.5em}+\frac{1}{2}
\sum_{\substack{S\subset\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
- P_\mathcal{N}(S\cup\{i\},\lambda)
- \log\Big(1 + \mu x_i^*X(S\cup\{i\})^{-1}x_i\Big)\\
+ P_\mathcal{N}^\lambda(S\cup\{i\})
+ \log\Big(1 + \mu x_i^*A(S\cup\{i\})^{-1}x_i\Big)\\
&\geq \frac{1}{2}
\sum_{S\subset\mathcal{N}}
- P_\mathcal{N}(S,\lambda)
- \log\Big(1 + \mu x_i^*X(S)^{-1}x_i\Big)\\
+ P_\mathcal{N}^\lambda(S)
+ \log\Big(1 + \mu x_i^*A(S)^{-1}x_i\Big)\\
\end{align*}
- Using that $X(S)\geq I_d$ we get that:
+ Using that $A(S)\geq I_d$ we get that:
\begin{displaymath}
- \mu x_i^*X(S)^{-1}x_i \leq \mu
+ \mu x_i^*A(S)^{-1}x_i \leq \mu
\end{displaymath}
Moreover:
@@ -308,7 +325,7 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$:
\begin{displaymath}
\partial_i F_\mathcal{N}(\lambda) \geq
\frac{\log\big(1+\mu\big)}{2\mu}
- x_i^*\bigg(\sum_{S\subset\mathcal{N}}P_\mathcal{N}(S,\lambda)X(S)^{-1}\bigg)x_i
+ x_i^*\bigg(\sum_{S\subset\mathcal{N}}P_\mathcal{N}^\lambda(S)A(S)^{-1}\bigg)x_i
\end{displaymath}
Finally, using that the inverse is a matrix convex function over symmetric
@@ -316,7 +333,7 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$:
\begin{align*}
\partial_i F_\mathcal{N}(\lambda) &\geq
\frac{\log\big(1+\mu\big)}{2\mu}
- x_i^*\bigg(\sum_{S\subset\mathcal{N}}P_\mathcal{N}(S,\lambda)X(S)\bigg)^{-1}x_i\\
+ x_i^*\bigg(\sum_{S\subset\mathcal{N}}P_\mathcal{N}^\lambda(S)A(S)\bigg)^{-1}x_i\\
& \geq \frac{\log\big(1+\mu\big)}{2\mu}
\partial_i L_\mathcal{N}(\lambda)
\end{align*}