From 74bf58e00a77a6d9cba5794680697bfeb70a8157 Mon Sep 17 00:00:00 2001 From: Thibaut Horel Date: Thu, 25 Oct 2012 00:53:54 -0700 Subject: Taking into account Stratis' suggestions --- proof.tex | 109 ++++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 63 insertions(+), 46 deletions(-) (limited to 'proof.tex') diff --git a/proof.tex b/proof.tex index 25061bd..9288ff2 100644 --- a/proof.tex +++ b/proof.tex @@ -8,6 +8,7 @@ \newtheorem{example}{Example} \newtheorem{prop}{Proposition} \newtheorem{theorem}{Theorem} +\newcommand*{\defeq}{\stackrel{\text{def}}{=}} \newcommand{\var}{\mathop{\mathrm{Var}}} \newcommand{\condexp}[2]{\mathop{\mathbb{E}}\left[#1|#2\right]} \newcommand{\expt}[1]{\mathop{\mathbb{E}}\left[#1\right]} @@ -33,18 +34,34 @@ $(\varepsilon_i)_{i\in \mathcal{N}}$ are mutually independent. \item prior knowledge of $\beta$: $\beta\sim\mathcal{N}(0,\kappa I_d)$ \item $\mu = \frac{\kappa}{\sigma^2}$ is the regularization factor. + \item after the variables $y_i$ are disclosed, the experimenter + does \emph{maximum a posteriori estimation} which is equivalent + under Gaussian prior to solving the ridge regression + maximisation problem: + \begin{displaymath} + \beta_{\text{max}} = \argmax \sum_i (y_i - \beta^*x_i)^2 + + \frac{1}{\mu}\sum_i \norm{\beta}_2^2 + \end{displaymath} \end{itemize} \end{itemize} \subsection{Economics} \begin{itemize} - \item Value function: + \item Value function: following the experiment design theory, the value of + data is the decrease of uncertainty about the model. Common measure of + uncertainty, the entropy. Thus, the value of a set $S$ of users is: + \begin{displaymath} + V(S) = \mathbb{H}(\beta) - \mathbb{H}(\beta|Y_S) + \end{displaymath} + where $Y_S = \{y_i,\,i\in S\}$ is the set of observations. + + In our case (under Gaussian prior): \begin{align*} \forall S\subset\mathcal{N},\; V(S) & = \frac{1}{2}\log\det\left(I_d + \mu\sum_{i\in S} x_ix_i^*\right)\\ - & = \frac{1}{2}\log\det X(S) + & \defeq \frac{1}{2}\log\det A(S) \end{align*} \item each user $i$ has a cost $c_i$ \item the auctioneer has a budget constraint $B$ @@ -82,10 +99,10 @@ distribution over subsets of $\mathcal{N}$. Let $\lambda\in[0,1]^n$, let us define: \begin{displaymath} - P_\mathcal{N}(S,\lambda) = \prod_{i\in S}\lambda_i + P_\mathcal{N}^\lambda(S) = \prod_{i\in S}\lambda_i \prod_{i\in\mathcal{N}\setminus S}(1-\lambda_i) \end{displaymath} -$P_{\mathcal{N}}(S,\lambda)$ is the probability of picking the set $S$ if we select +$P_\mathcal{N}^\lambda(S)$ is the probability of picking the set $S$ if we select a subset of $\mathcal{N}$ at random by deciding independently for each point to include it in the set with probability $\lambda_i$ (and to exclude it with probability $1-\lambda_i$). @@ -95,20 +112,19 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$: \item the \emph{multi-linear extension} of $V$: \begin{align*} F_\mathcal{N}(\lambda) - & = \mathbb{E}_{S\sim P_\mathcal{N}(S,\lambda)}\big[\log\det X(S)\big]\\ - & = \sum_{S\subset\mathcal{N}} P_\mathcal{N}(S,\lambda) V(S)\\ - & = \sum_{S\subset\mathcal{N}} P_{\mathcal{N}}(S,\lambda) \log\det X(S)\\ + & = \mathbb{E}_{S\sim P_\mathcal{N}^\lambda}\big[\log\det A(S)\big]\\ + & = \sum_{S\subset\mathcal{N}} P_\mathcal{N}^\lambda(S) V(S)\\ + & = \sum_{S\subset\mathcal{N}} P_\mathcal{N}^\lambda(S) \log\det A(S)\\ \end{align*} \item the \emph{concave relaxation} of $V$: \begin{align*} L_{\mathcal{N}}(\lambda) - & = \log\det \mathbb{E}_{S\sim P_\mathcal{N}(S,\lambda)}\big[X(S)\big]\\ + & = \log\det \mathbb{E}_{S\sim P_\mathcal{N}^\lambda}\big[A(S)\big]\\ & = \log\det\left(\sum_{S\subset N} - P_\mathcal{N}(S,\lambda)X(S)\right)\\ - & = \log\det \tilde{X}(\lambda)\\ - & = \log\det\left(I_d - + \mu\sum_{i\in\mathcal{N}} - \lambda_ix_ix_i^*\right) + P_\mathcal{N}^\lambda(S)A(S)\right)\\ + & = \log\det\left(I_d + \mu\sum_{i\in\mathcal{N}} + \lambda_ix_ix_i^*\right)\\ + & \defeq \log\det \tilde{A}(\lambda) \end{align*} \end{itemize} @@ -118,9 +134,9 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$: \end{lemma} \begin{proof} - This follows almost immediately from the concavity of the $\log\det$ - function over symmetric positive semi-definite matrices. More precisely, if - $A$ and $B$ are two symmetric positive semi-definite matrices, then: + This follows from the concavity of the $\log\det$ function over symmetric + positive semi-definite matrices. More precisely, if $A$ and $B$ are two + symmetric positive semi-definite matrices, then: \begin{multline*} \forall\alpha\in [0, 1],\; \log\det\big(\alpha A + (1-\alpha) B\big)\\ \geq \alpha\log\det A + (1-\alpha)\log\det B @@ -141,7 +157,7 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$: two fractional components, returns some $\lambda'$ with one less fractional component, feasible such that: \begin{displaymath} - F(\lambda) \leq F(\lambda') + F_\mathcal{N}(\lambda) \leq F_\mathcal{N}(\lambda') \end{displaymath} Applying this procedure recursively yields the lemma's result. @@ -164,7 +180,7 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$: Furthermore, the function $F_\lambda$ is convex, indeed: \begin{align*} F_\lambda(\varepsilon) - & = \mathbb{E}_{S'\sim P_{\mathcal{N}\setminus\{i,j\}}(S',\lambda)}\Big[ + & = \mathbb{E}_{S'\sim P_{\mathcal{N}\setminus\{i,j\}}^\lambda(S')}\Big[ (\lambda_i+\varepsilon)\Big(\lambda_j-\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{i,j\})\\ & + (\lambda_i+\varepsilon)\Big(1-\lambda_j+\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{i\})\\ & + (1-\lambda_i-\varepsilon)\Big(\lambda_j-\varepsilon\frac{c_i}{c_j}\Big)V(S'\cup\{j\})\\ @@ -172,7 +188,8 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$: \end{align*} Thus, $F_\lambda$ is a degree 2 polynomial whose dominant coefficient is: \begin{multline*} - \frac{c_i}{c_j}\mathbb{E}_{S'\sim P_{\mathcal{N}\setminus\{i,j\}}(S',\lambda)}\Big[ + \frac{c_i}{c_j}\mathbb{E}_{S'\sim + P_{\mathcal{N}\setminus\{i,j\}}^\lambda(S')}\Big[ V(S'\cup\{i\})+V(S'\cup\{i\})\\ -V(S'\cup\{i,j\})-V(S')\Big] \end{multline*} @@ -221,12 +238,12 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$: the $i$-th component. For $F$, it suffices to look at the derivative of - $P_\mathcal{N}(S,\lambda)$: + $P_\mathcal{N}^\lambda(S)$: \begin{displaymath} - \partial_i P_\mathcal{N}(S,\lambda) = \left\{ + \partial_i P_\mathcal{N}^\lambda(S) = \left\{ \begin{aligned} - & P_{\mathcal{N}\setminus\{i\}}(S\setminus\{i\},\lambda)\;\textrm{if}\; i\in S \\ - & - P_{\mathcal{N}\setminus\{i\}}(S,\lambda)\;\textrm{if}\; + & P_{\mathcal{N}\setminus\{i\}}^\lambda(S\setminus\{i\})\;\textrm{if}\; i\in S \\ + & - P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\;\textrm{if}\; i\in \mathcal{N}\setminus S \\ \end{aligned}\right. \end{displaymath} @@ -235,9 +252,9 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$: \begin{multline*} \partial_i F_\mathcal{N} = \sum_{\substack{S\subset\mathcal{N}\\ i\in S}} - P_{\mathcal{N}\setminus\{i\}}(S\setminus\{i\},\lambda)V(S)\\ + P_{\mathcal{N}\setminus\{i\}}^\lambda(S\setminus\{i\})V(S)\\ - \sum_{\substack{S\subset\mathcal{N}\\ i\in \mathcal{N}\setminus S}} - P_{\mathcal{N}\setminus\{i\}}(S,\lambda)V(S)\\ + P_{\mathcal{N}\setminus\{i\}}^\lambda(S)V(S)\\ \end{multline*} Now, using that every $S$ such that $i\in S$ can be uniquely written as @@ -245,9 +262,9 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$: \begin{multline*} \partial_i F_\mathcal{N} = \sum_{\substack{S\subset\mathcal{N}\\ i\in\mathcal{N}\setminus S}} - P_{\mathcal{N}\setminus\{i\}}(S,\lambda)V(S\cup\{i\})\\ + P_{\mathcal{N}\setminus\{i\}}^\lambda(S)V(S\cup\{i\})\\ - \sum_{\substack{S\subset\mathcal{N}\\ i\in \mathcal{N}\setminus S}} - P_{\mathcal{N}\setminus\{i\}}(S,\lambda)V(S)\\ + P_{\mathcal{N}\setminus\{i\}}^\lambda(S)V(S)\\ \end{multline*} Finally, by using the expression for the marginal contribution of $i$ to @@ -255,47 +272,47 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$: \begin{displaymath} \partial_i F_\mathcal{N}(\lambda) = \sum_{\substack{S\subset\mathcal{N}\\ i\in\mathcal{N}\setminus S}} - P_{\mathcal{N}\setminus\{i\}}(S,\lambda) - \log\Big(1 + \mu x_i^*X(S)^{-1}x_i\Big) + P_{\mathcal{N}\setminus\{i\}}^\lambda(S) + \log\Big(1 + \mu x_i^*A(S)^{-1}x_i\Big) \end{displaymath} The computation of the derivative of $L_\mathcal{N}$ uses standard matrix calculus and gives: \begin{displaymath} \partial_i L_\mathcal{N}(\lambda) - = \mu x_i^* \tilde{X}(\lambda)^{-1}x_i + = \mu x_i^* \tilde{A}(\lambda)^{-1}x_i \end{displaymath} Using the following inequalities: \begin{gather*} - P_{\mathcal{N}\setminus\{i\}}(S,\lambda) \geq - P_\mathcal{N}(S,\lambda)\\ - X(S)^{-1} \geq X(S\cup\{i\})^{-1}\\ - P_\mathcal{N}(S\cup\{i\},\lambda)\geq P_\mathcal{N}(S,\lambda) + P_{\mathcal{N}\setminus\{i\}}^\lambda(S) \geq + P_\mathcal{N}^\lambda(S)\\ + A(S)^{-1} \geq A(S\cup\{i\})^{-1}\\ + P_\mathcal{N}^\lambda(S\cup\{i\})\geq P_\mathcal{N}^\lambda(S) \end{gather*} we get: \begin{align*} \partial_i F_\mathcal{N}(\lambda) & \geq \sum_{\substack{S\subset\mathcal{N}\\ i\in\mathcal{N}\setminus S}} - P_\mathcal{N}(S,\lambda) - \log\Big(1 + \mu x_i^*X(S)^{-1}x_i\Big)\\ + P_\mathcal{N}^\lambda(S) + \log\Big(1 + \mu x_i^*A(S)^{-1}x_i\Big)\\ & \geq \frac{1}{2} \sum_{\substack{S\subset\mathcal{N}\\ i\in\mathcal{N}\setminus S}} - P_\mathcal{N}(S,\lambda) - \log\Big(1 + \mu x_i^*X(S)^{-1}x_i\Big)\\ + P_\mathcal{N}^\lambda(S) + \log\Big(1 + \mu x_i^*A(S)^{-1}x_i\Big)\\ &\hspace{-3.5em}+\frac{1}{2} \sum_{\substack{S\subset\mathcal{N}\\ i\in\mathcal{N}\setminus S}} - P_\mathcal{N}(S\cup\{i\},\lambda) - \log\Big(1 + \mu x_i^*X(S\cup\{i\})^{-1}x_i\Big)\\ + P_\mathcal{N}^\lambda(S\cup\{i\}) + \log\Big(1 + \mu x_i^*A(S\cup\{i\})^{-1}x_i\Big)\\ &\geq \frac{1}{2} \sum_{S\subset\mathcal{N}} - P_\mathcal{N}(S,\lambda) - \log\Big(1 + \mu x_i^*X(S)^{-1}x_i\Big)\\ + P_\mathcal{N}^\lambda(S) + \log\Big(1 + \mu x_i^*A(S)^{-1}x_i\Big)\\ \end{align*} - Using that $X(S)\geq I_d$ we get that: + Using that $A(S)\geq I_d$ we get that: \begin{displaymath} - \mu x_i^*X(S)^{-1}x_i \leq \mu + \mu x_i^*A(S)^{-1}x_i \leq \mu \end{displaymath} Moreover: @@ -308,7 +325,7 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$: \begin{displaymath} \partial_i F_\mathcal{N}(\lambda) \geq \frac{\log\big(1+\mu\big)}{2\mu} - x_i^*\bigg(\sum_{S\subset\mathcal{N}}P_\mathcal{N}(S,\lambda)X(S)^{-1}\bigg)x_i + x_i^*\bigg(\sum_{S\subset\mathcal{N}}P_\mathcal{N}^\lambda(S)A(S)^{-1}\bigg)x_i \end{displaymath} Finally, using that the inverse is a matrix convex function over symmetric @@ -316,7 +333,7 @@ We will consider two relaxations of the value function $V$ over $\mathcal{N}$: \begin{align*} \partial_i F_\mathcal{N}(\lambda) &\geq \frac{\log\big(1+\mu\big)}{2\mu} - x_i^*\bigg(\sum_{S\subset\mathcal{N}}P_\mathcal{N}(S,\lambda)X(S)\bigg)^{-1}x_i\\ + x_i^*\bigg(\sum_{S\subset\mathcal{N}}P_\mathcal{N}^\lambda(S)A(S)\bigg)^{-1}x_i\\ & \geq \frac{\log\big(1+\mu\big)}{2\mu} \partial_i L_\mathcal{N}(\lambda) \end{align*} -- cgit v1.2.3-70-g09d2