diff options
| -rw-r--r-- | paper.tex | 135 |
1 files changed, 134 insertions, 1 deletions
@@ -40,9 +40,142 @@ \section{Problem formulation} +\subsection{Notations} + +\begin{itemize} + \item If $x\in\mathbf{R}^d$, $x^*$ will denote the transpose of vector $x$. + If $(x, y)\in\mathbf{R}^d\times\mathbf{R}^d$, $x^*y$ is the standard + inner product of $\mathbf{R}^d$. + \item We will often use the following order over symmetric matrices: if $A$ + and $B$ are two $d\times d$ real symmetric matrices, we write that + $A\leq B$ iff: + \begin{displaymath} + \forall x\in\mathbf{R}^d,\quad x^*Ax \leq x^*Bx + \end{displaymath} + That is, iff $B-A$ is symmetric semi-definite positive. + \item Let us recall this property of the ordered defined above: if $A$ and + $B$ are two symmetric definite positive matrices such that $A\leq B$, + then $A^{-1}\leq B^{-1}$. +\end{itemize} + +\subsection{Data model} +\begin{itemize} + \item set of $n$ users $\mathcal{N} = \{1,\ldots, n\}$ + \item each user has a public vector of features $x_i\in\mathbf{R}^d$ and some + undisclosed variable $y_i\in\mathbf{R}$ + \item we assume that the data has been normalized: $\forall + i\in\mathcal{N}$, $\norm{x_i}\leq 1$ + \item \textbf{Ridge regression model:} + \begin{itemize} + \item $y_i = \beta^*x_i + \varepsilon_i$ + \item $\varepsilon_i \sim \mathcal{N}(0,\sigma^2)$, + $(\varepsilon_i)_{i\in \mathcal{N}}$ are mutually independent. + \item prior knowledge of $\beta$: $\beta\sim\mathcal{N}(0,\kappa I_d)$ + \item $\mu = \frac{\kappa}{\sigma^2}$ is the regularization factor. + \item after the variables $y_i$ are disclosed, the experimenter + does \emph{maximum a posteriori estimation} which is equivalent + under Gaussian prior to solving the ridge regression + maximisation problem: + \begin{displaymath} + \beta_{\text{max}} = \argmax \sum_i (y_i - \beta^*x_i)^2 + + \frac{1}{\mu}\sum_i \norm{\beta}_2^2 + \end{displaymath} + \end{itemize} +\end{itemize} + +\subsection{Economics} + +Value function: following the experiment design theory, the value of +data is the decrease of uncertainty about the model. Common measure of +uncertainty, the entropy. Thus, the value of a set $S$ of users is: +\begin{displaymath} + V(S) = \mathbb{H}(\beta) - \mathbb{H}(\beta|Y_S) +\end{displaymath} +where $Y_S = \{y_i,\,i\in S\}$ is the set of observations. + +In our case (under Gaussian prior): +\begin{align*} + \forall S\subset\mathcal{N},\; V(S) + & = \frac{1}{2}\log\det\left(I_d + + \mu\sum_{i\in S} x_ix_i^*\right)\\ + & \defeq \frac{1}{2}\log\det A(S) +\end{align*} + +\begin{lemma}[Marginal contribution] + \begin{displaymath} + \Delta_i V(S)\defeq V(S\cup\{i\}) - V(S) + = \frac{1}{2}\log\left(1 + \mu x_i^*A(S)^{-1}x_i\right) + \end{displaymath} +\end{lemma} + +\begin{proof} + We have: + \begin{align*} + V(S\cup\{i\}) & = \frac{1}{2}\log\det A(S\cup\{i\})\\ + & = \frac{1}{2}\log\det\left(A(S) + \mu x_i x_i^*\right)\\ + & = V(S) + \frac{1}{2}\log\det\left(I_d + \mu A(S)^{-1}x_i + x_i^*\right)\\ + & = V(S) + \frac{1}{2}\log\left(1 + \mu x_i^* A(S)^{-1}x_i\right) + \end{align*} + where the last equality comes from Sylvester's determinant formula. +\end{proof} +\begin{itemize} + \item each user $i$ has a cost $c_i$ + \item the auctioneer has a budget constraint $B$ + \item optimisation problem: + \begin{displaymath} + OPT(V,\mathcal{N}, B) = \max_{S\subset\mathcal{N}} \left\{ V(S)\,|\, + \sum_{i\in S}c_i\leq B\right\} + \end{displaymath} +\end{itemize} + +Problem, the costs are reported by users, they can lie. It is an auction +problem. Goal design a mechanism (selection and payment scheme) which is: +\begin{itemize} + \item truthful + \item individually rational + \item budget feasible + \item has a good approximation ratio +\end{itemize} + \section{Main result} +\begin{algorithm}\label{mechanism} + \caption{Budget feasible mechanism for ridge regression} + \begin{algorithmic}[1] + \State $i^* \gets \argmax_{j\in\mathcal{N}}V(j)$ + \State $x^* \gets \argmax_{x\in[0,1]^n} \{L_{\mathcal{N}\setminus\{i^*\}}(x) + \,|\, c(x)\leq B\}$ + \Statex + \If{$L(x^*) < CV(i^*)$} + \State \textbf{return} $\{i^*\}$ + \Else + \State $i \gets \argmax_{1\leq j\leq n}\frac{V(j)}{c_j}$ + \State $S \gets \emptyset$ + \While{$c_i\leq \frac{B}{2}\frac{V(S\cup\{i\})-V(S)}{V(S\cup\{i\})}$} + \State $S \gets S\cup\{i\}$ + \State $i \gets \argmax_{j\in\mathcal{N}\setminus S} + \frac{V(S\cup\{j\})-V(S)}{c_j}$ + \EndWhile + \State \textbf{return} $S$ + \EndIf + \end{algorithmic} +\end{algorithm} + +Choosing $C=...$ we have: + +\begin{theorem} + The mechanism is truthful, individually rational, budget feasible. + Furthermore, by choosing $C= ...$ we get an approximation ratio of: + \begin{multline*} + 1 + C^* = \frac{5e-1 + C_\mu(4e-1)}{2C_\mu(e-1)}\\ + + \frac{\sqrt{C_\mu^2(1+2e)^2 + + 2C_\mu(14e^2+5e+1) + (1-5e)^2}}{2C_\mu(e-1)} + \end{multline*} +\end{theorem} + \section{General setup} -\section{Conclusions} +\section{Conclusion} + \end{document} |
