\documentclass{llncs} \usepackage[numbers]{natbib} \usepackage[utf8x]{inputenc} \usepackage{amsmath,amsfonts} \usepackage{algorithm, algpseudocode} \usepackage{bbm,color,verbatim} \input{definitions} \usepackage[pagebackref=true,breaklinks=true,colorlinks=true]{hyperref} \title{Budget Feasible Mechanisms\\ for Experimental Design} \author{ Thibaut Horel\inst{1} \and Stratis Ioannidis\inst{2} \and S. Muthukrishnan\inst{3} } \institute{École Normale Supérieure, \email{thibaut.horel@normalesup.org} \and Technicolor, \email{stratis.ioannidis@technicolor.com} \and Rutgers University, \email{muthu@cs.rutgers.edu} } \begin{document} \maketitle \vspace{2em} In the classical {\em experimental design} setting, an experimenter \E\ has access to a population of $n$ potential experiment subjects $i\in \{1,\ldots,n\}$, each associated with a vector of features $x_i\in\reals^d$. Conducting an experiment with subject $i$ reveals an unknown value $y_i\in \reals$ to \E. \E\ typically assumes some hypothetical relationship between $x_i$'s and $y_i$'s, \emph{e.g.}, $y_i \approx \T{\beta} x_i$, and estimates $\beta$ from experiments, \emph{e.g.}, through linear regression. As a proxy for various practical constraints, \E{} may select only a subset of subjects on which to conduct the experiment. We initiate the study of budgeted mechanisms for experimental design. In this setting, \E{} has a budget $B$. Each subject $i$ declares an associated cost $c_i >0$ to be part of the experiment, and must be paid at least her cost. In particular, the {\em Experimental Design Problem} (\SEDP) is to find a set $S$ of subjects for the experiment that maximizes $V(S) = \log\det(I_d+\sum_{i\in S}x_i\T{x_i})$ under the constraint $\sum_{i\in S}c_i\leq B$; our objective function corresponds to the information gain in parameter $\beta$ that is learned through linear regression methods, and is related to the so-called $D$-optimality criterion. Further, the subjects are \emph{strategic} and may lie about their costs. Thus, we need to design a mechanism for \SEDP{} with suitable properties. We present a deterministic, polynomial time, budget feasible mechanism scheme, that is approximately truthful and yields a 12.98 factor approximation to \EDP. % By applying previous work on budget feasible mechanisms with % a submodular objective, one could {\em only} have derived either an exponential % time deterministic mechanism or a randomized polynomial time mechanism. We also establish that no truthful, budget-feasible mechanism is possible within a factor $2$ approximation, and show how to generalize our approach to a wide class of learning problems, beyond linear regression. \end{document}