\documentclass{acm_proc_article-sp} \usepackage[utf8]{inputenc} \usepackage{amsmath,amsfonts} \usepackage{algorithm} \usepackage{algpseudocode} \newtheorem{lemma}{Lemma} \newtheorem{fact}{Fact} \newtheorem{example}{Example} \newtheorem{prop}{Proposition} \newtheorem{theorem}{Theorem} \newcommand*{\defeq}{\stackrel{\text{def}}{=}} \newcommand{\var}{\mathop{\mathrm{Var}}} \newcommand{\condexp}[2]{\mathop{\mathbb{E}}\left[#1|#2\right]} \newcommand{\expt}[1]{\mathop{\mathbb{E}}\left[#1\right]} \newcommand{\norm}[1]{\lVert#1\rVert} \newcommand{\tr}[1]{#1^*} \newcommand{\ip}[2]{\langle #1, #2 \rangle} \newcommand{\mse}{\mathop{\mathrm{MSE}}} \DeclareMathOperator{\trace}{tr} \DeclareMathOperator*{\argmax}{arg\,max} \title{Budgeted mechanism for optimal experiment design} \begin{document} \section{Intro} \begin{itemize} \item already existing field of experiment design: survey-like setup, what are the best points to include in your experiment. Measure of the usefulness of the data: variance-reduction or entropy-reduction \item nowadays, there is also a big focus on purchasing data: paid surveys, mechanical turk, etc. that add economic aspects to the problem of experiment desgin \item recent advances (Singer, Chen) in the field of budgeted mechanisms \item we study ridge regression, very widely used in statistical learning, and treat it as a problem of budgeted experiment design \item we make the following contributions: ... \item extension to a more general setup which includes a wider class of machine learning problems \end{itemize} \section{Problem formulation} \subsection{Notations} \begin{itemize} \item If $x\in\mathbf{R}^d$, $x^*$ will denote the transpose of vector $x$. If $(x, y)\in\mathbf{R}^d\times\mathbf{R}^d$, $x^*y$ is the standard inner product of $\mathbf{R}^d$. \item We will often use the following order over symmetric matrices: if $A$ and $B$ are two $d\times d$ real symmetric matrices, we write that $A\leq B$ iff: \begin{displaymath} \forall x\in\mathbf{R}^d,\quad x^*Ax \leq x^*Bx \end{displaymath} That is, iff $B-A$ is symmetric semi-definite positive. \item Let us recall this property of the ordered defined above: if $A$ and $B$ are two symmetric definite positive matrices such that $A\leq B$, then $A^{-1}\leq B^{-1}$. \end{itemize} \subsection{Data model} \begin{itemize} \item set of $n$ users $\mathcal{N} = \{1,\ldots, n\}$ \item each user has a public vector of features $x_i\in\mathbf{R}^d$ and some undisclosed variable $y_i\in\mathbf{R}$ \item we assume that the data has been normalized: $\forall i\in\mathcal{N}$, $\norm{x_i}\leq 1$ \item \textbf{Ridge regression model:} \begin{itemize} \item $y_i = \beta^*x_i + \varepsilon_i$ \item $\varepsilon_i \sim \mathcal{N}(0,\sigma^2)$, $(\varepsilon_i)_{i\in \mathcal{N}}$ are mutually independent. \item prior knowledge of $\beta$: $\beta\sim\mathcal{N}(0,\kappa I_d)$ \item $\mu = \frac{\kappa}{\sigma^2}$ is the regularization factor. \item after the variables $y_i$ are disclosed, the experimenter does \emph{maximum a posteriori estimation} which is equivalent under Gaussian prior to solving the ridge regression maximisation problem: \begin{displaymath} \beta_{\text{max}} = \argmax \sum_i (y_i - \beta^*x_i)^2 + \frac{1}{\mu}\sum_i \norm{\beta}_2^2 \end{displaymath} \end{itemize} \end{itemize} \subsection{Economics} Value function: following the experiment design theory, the value of data is the decrease of uncertainty about the model. Common measure of uncertainty, the entropy. Thus, the value of a set $S$ of users is: \begin{displaymath} V(S) = \mathbb{H}(\beta) - \mathbb{H}(\beta|Y_S) \end{displaymath} where $Y_S = \{y_i,\,i\in S\}$ is the set of observations. In our case (under Gaussian prior): \begin{align*} \forall S\subset\mathcal{N},\; V(S) & = \frac{1}{2}\log\det\left(I_d + \mu\sum_{i\in S} x_ix_i^*\right)\\ & \defeq \frac{1}{2}\log\det A(S) \end{align*} \begin{lemma}[Marginal contribution] \begin{displaymath} \Delta_i V(S)\defeq V(S\cup\{i\}) - V(S) = \frac{1}{2}\log\left(1 + \mu x_i^*A(S)^{-1}x_i\right) \end{displaymath} \end{lemma} \begin{proof} We have: \begin{align*} V(S\cup\{i\}) & = \frac{1}{2}\log\det A(S\cup\{i\})\\ & = \frac{1}{2}\log\det\left(A(S) + \mu x_i x_i^*\right)\\ & = V(S) + \frac{1}{2}\log\det\left(I_d + \mu A(S)^{-1}x_i x_i^*\right)\\ & = V(S) + \frac{1}{2}\log\left(1 + \mu x_i^* A(S)^{-1}x_i\right) \end{align*} where the last equality comes from Sylvester's determinant formula. \end{proof} \begin{itemize} \item each user $i$ has a cost $c_i$ \item the auctioneer has a budget constraint $B$ \item optimisation problem: \begin{displaymath} OPT(V,\mathcal{N}, B) = \max_{S\subset\mathcal{N}} \left\{ V(S)\,|\, \sum_{i\in S}c_i\leq B\right\} \end{displaymath} \end{itemize} Problem, the costs are reported by users, they can lie. It is an auction problem. Goal design a mechanism (selection and payment scheme) which is: \begin{itemize} \item truthful \item individually rational \item budget feasible \item has a good approximation ratio \end{itemize} \section{Main result} \begin{algorithm}\label{mechanism} \caption{Budget feasible mechanism for ridge regression} \begin{algorithmic}[1] \State $i^* \gets \argmax_{j\in\mathcal{N}}V(j)$ \State $x^* \gets \argmax_{x\in[0,1]^n} \{L_{\mathcal{N}\setminus\{i^*\}}(x) \,|\, c(x)\leq B\}$ \Statex \If{$L(x^*) < CV(i^*)$} \State \textbf{return} $\{i^*\}$ \Else \State $i \gets \argmax_{1\leq j\leq n}\frac{V(j)}{c_j}$ \State $S \gets \emptyset$ \While{$c_i\leq \frac{B}{2}\frac{V(S\cup\{i\})-V(S)}{V(S\cup\{i\})}$} \State $S \gets S\cup\{i\}$ \State $i \gets \argmax_{j\in\mathcal{N}\setminus S} \frac{V(S\cup\{j\})-V(S)}{c_j}$ \EndWhile \State \textbf{return} $S$ \EndIf \end{algorithmic} \end{algorithm} Choosing $C=...$ we have: \begin{theorem} The mechanism is truthful, individually rational, budget feasible. Furthermore, by choosing $C= ...$ we get an approximation ratio of: \begin{multline*} 1 + C^* = \frac{5e-1 + C_\mu(4e-1)}{2C_\mu(e-1)}\\ + \frac{\sqrt{C_\mu^2(1+2e)^2 + 2C_\mu(14e^2+5e+1) + (1-5e)^2}}{2C_\mu(e-1)} \end{multline*} \end{theorem} \section{General setup} \section{Conclusion} \end{document}