\documentclass[portrait]{beamer} \usepackage[utf8]{inputenc} \usepackage[orientation=portrait,size=custom,width=55,height=71,scale=1]{beamerposter} \usetheme{confposter} \usepackage{times} \newlength{\sepwid} \newlength{\onecolwid} \newlength{\twocolwid} \newlength{\threecolwid} \setlength{\paperwidth}{22in} \setlength{\paperheight}{28in} \setlength{\sepwid}{0.01\paperwidth} \setlength{\onecolwid}{0.47\paperwidth} \setlength{\twocolwid}{0.96\paperwidth} \setlength{\fboxrule}{2pt} \setlength{\fboxsep}{10pt} \usepackage{graphicx} \usepackage{amsmath} \title{Budget Feasible Mechanisms for Experimental Design} \author{Thibaut Horel$^*$ \and Stratis Ioannidis$^\dag$ \and Muthu Muthukrishnan$^\ddag$} \institute{$^*$École Normale Supérieure, $^\dag$Technicolor, $^\ddag$Rutgers University} \begin{document} \setlength{\belowcaptionskip}{2ex} % White space under figures \setlength\belowdisplayshortskip{2ex} % White space under equations \begin{frame}[t] \begin{columns}[t] \begin{column}{\sepwid}\end{column} \begin{column}{\sepwid}\end{column} \begin{column}{\onecolwid} \begin{block}{Setting} \begin{center} \includegraphics[scale=1]{st.pdf} \end{center} $N$ users, each of them has: \begin{itemize} \item \textbf{public} features $x_i$ (\emph{e.g.} age, gender, height, etc.) \item \textbf{private} data $y_i$ (\emph{e.g.} survey answer, medical test outcome) \item cost $c_i$ to reveal their data \end{itemize} Experimenter \textsf{E}: \begin{itemize} \item pays users to reveal their private data \item has a budget constraint $B$ \end{itemize} \end{block} \begin{block}{Ridge Regression} \begin{enumerate} \item Experimenter $E$ assumes a linear model: \begin{displaymath} y_i = \beta^{T}x_i + \varepsilon_i,\quad\varepsilon_i\sim\mathcal{N}(0,\sigma^2) \end{displaymath} and a normal prior: $\beta\sim\mathcal{N}(0,\sigma^2I_d)$. \item Experimenter $E$ selects a set $S$ of users within his budget constraint, and learns $\beta$ through ridge regression: \begin{displaymath} \hat{\beta} = \sigma^{-2}\arg\,\min_\beta\Big[ \sum_{i\in S}(y_i-\beta^Tx_i)^2 + \|\beta\|^2\Big] \end{displaymath} \end{enumerate} \end{block} \begin{block}{Value of Data} \textbf{Goal} minimize the variance-covariance matrix of the estimator: \begin{displaymath} \mathrm{Var}\, \hat{\beta} = \Big(I_d+\sum_{i\in S}x_ix_i^T\Big)^{-1} \end{displaymath} Minimizing the volume ($\det$) of the variance is equivalent to maximizing the information gain (entropy reduction) on $\beta$: \begin{displaymath} V(S) = \log\det\Big(I_d+\sum_{i\in S} x_ix_i^T\Big) \end{displaymath} \end{block} \begin{block}{Experimental Design} The experimenter selects set of user $S$ solution to: \begin{displaymath} \begin{split} &\text{Maximize } V(S) = \log\det\Big(I_d+\sum_{i\in S} x_ix_i^T\Big)\\ &\text{Subject to } \sum_{i\in S} c_i\leq B \end{split} \end{displaymath} \begin{itemize} \item specific case of budgeted submodular maximization \item there exists a poly-time $\frac{e}{e-1}$-approximate algorithm \end{itemize} \end{block} \end{column} \begin{column}{\sepwid}\end{column} \vrule{} \begin{column}{\sepwid}\end{column} \begin{column}{\onecolwid} \begin{block}{Experimental Design with Strategic Users} \begin{itemize} \item users might lie about the reported costs $c_i$ \item payments and selection rule must be adapted accordingly \end{itemize} Specific case of \textbf{budget feasible mechanism design}; the payments and selection rule must satisfy: \begin{itemize} \item individual rationality and truthfulness \item budget feasibility \end{itemize} Can we still obtain a constant approximation ratio in polynomial time? \end{block} \begin{block}{Mechanism blueprint for Experimental Design} \fbox{\parbox{0.95\textwidth}{ \begin{itemize} \item Compute $i^* = \arg\max_i V(\{i\})$ \item Compute $L^*$ (see below) \item Compute $S_G$ greedily: \begin{itemize} \item repeatedly add element with \emph{highest marginal-value-per-cost}: \begin{align*} i = \argmax_{j\in[N]\setminus S}\frac{V(S\cup\{i\}) - V(S)}{c_i}\label{greedy} \end{align*} \item stop when the budget is exhausted \end{itemize} \item Return: \begin{itemize} \item $i^*$ if $V(\{i^*\})>L^*$ \item $S_G$ otherwise \end{itemize} \end{itemize} }} \vspace{0.3cm} $L^*$ must be: \begin{itemize} \item close to $OPT:=\max_{S\subseteq [N]\setminus \{i^*\}} \{V(S); \sum_{i\in S}c_i\leq B\}$ \item monotone in $c$; computable in poly-time \end{itemize} \end{block} \begin{block}{Convex Relaxation for Experimental Design} We introduce the following concave function: \begin{displaymath} L(\lambda) = \sum_{1\leq i\leq N}\log\det\Big(I_d + \sum_{1\leq i\leq N} \lambda_i x_ix_i^T\Big). \end{displaymath} Then using $L^* = \max_\lambda \big\{L(\lambda); \sum_{1\leq i\leq N} c_i\leq B,\; \lambda_{i^*} = 0\big\}$: \vspace{0.3cm} \fbox{\parbox{0.95\textwidth}{ \textbf{Theorem:} \emph{The above mechanism is truthful, individually rational, budget-feasible, runs in polynomial time and its approximation \mbox{ratio} is $\simeq 12.984$.}}} \vspace{0.5cm} Technical challenges: \begin{itemize} \item $L^*$ is close to $OPT$ (shown via pipage rounding) \item $L^*$ can be approximated monotonously in $c$ \end{itemize} \end{block} \begin{block}{Generalization} \begin{itemize} \item Generative model of the form: \begin{displaymath} y_i = f(x_i) + \varepsilon_i \end{displaymath} \item Prior knowledge: $f$ is a random variable, the entropy $H(f)$ captures the experimenter's uncertainty. \item Value function: information gain (entropy reduction) induced by the observed data of the users in $S$: \begin{displaymath} V(S) = H(f) - H(f\,|\,S) \end{displaymath} \end{itemize} $V$ is again submodular and the previous framework applies. \end{block} \end{column} \begin{column}{\sepwid}\end{column} \begin{column}{\sepwid}\end{column} \end{columns} \end{frame} \end{document}