\documentclass{beamer} \usepackage[utf8x]{inputenc} \usepackage[greek,english]{babel} \usepackage[LGR,T1]{fontenc} \usepackage{amsmath} \usepackage{algpseudocode,algorithm} \usepackage{graphicx} \DeclareMathOperator*{\argmax}{arg\,max} \usetheme{Boadilla} \title[]{Budget Feasible Mechanisms for Experimental Design} \author[Thibaut Horel]{Thibaut Horel\\ Joint work with Stratis Ioannidis and S. Muthukrishnan} \institute[]{} \setbeamercovered{transparent} \setbeamertemplate{navigation symbols}{} %\AtBeginSection[] %{ %\begin{frame} %\frametitle{Outline} %\tableofcontents[currentsection] %\end{frame} %} \begin{document} \begin{frame} \maketitle \end{frame} \section{Introduction} \begin{frame}{Motivation} \begin{center} \includegraphics<1>[scale=0.5]{1.pdf} \includegraphics<2>[scale=0.5]{2.pdf} \includegraphics<3>[scale=0.5]{3.pdf} \includegraphics<4>[scale=0.5]{4.pdf} \end{center} \end{frame} \begin{frame}{Challenges} \pause \begin{itemize} \item Value of data? \vspace{1cm} \pause \item How to optimize it? \vspace{1cm} \pause \item Strategic users? \end{itemize} \end{frame} \begin{frame}{Contributions} \pause \begin{itemize} \item case of the linear regression \vspace{1cm} \pause \item deterministic mechanism \vspace{1cm} \pause \item generalization (randomized mechanism) \end{itemize} \end{frame} \section{Budget feasible mechanism design} \begin{frame}{Outline} \tableofcontents \end{frame} \begin{frame}{Outline} \tableofcontents[currentsection] \end{frame} \begin{frame}{Reverse auction} \begin{itemize} \item set of $N$ sellers: $\mathcal{A} = \{1,\ldots,N\}$; a buyer \vspace{0.3cm} \pause \item $V$ value function of the buyer, $V:2^\mathcal{A}\rightarrow \mathbb{R}^+$ \vspace{0.3cm} \pause \item $c_i\in\mathbb{R}^+$ price of seller's $i$ good \vspace{0.3cm} \pause \item $B$ budget constraint of the buyer \end{itemize} \vspace{0.5cm} \pause \begin{block}{Goal} \begin{itemize} \item Find $S\subset \mathcal{A}$ \alert{maximizing} $V(S)$ \vspace{0.3cm} \item Find \alert{payment} $p_i$ to seller $i\in S$ \end{itemize} \end{block} \end{frame} \begin{frame}{Objectives} Payments $(p_i)_{i\in S}$ must be: \pause \vspace{0.3cm} \begin{itemize} \item individually rational: $p_i\geq c_i,\;i\in S$ \vspace{0.2cm} \pause \item truthful: reporting one's true cost is a dominant strategy \vspace{0.2cm} \pause \item budget feasible: $\sum_{i\in S} p_i \leq B$ \vspace{0.2cm} \end{itemize} \pause \vspace{0.3cm} Mechanism must be: \vspace{0.3cm} \pause \begin{itemize} \item computationally efficient: polynomial time \pause \vspace{0.2cm} \item good approximation: $V(OPT) \leq \alpha V(S)$ with: \begin{displaymath} OPT = \arg\max_{S\subset \mathcal{A}} \left\{V(S)\mid \sum_{i\in S}c_i\leq B\right\} \end{displaymath} \end{itemize} \end{frame} \begin{frame}{Known results} When $V$ is submodular: \vspace{1cm} \pause \begin{itemize} \item \alert{randomized} budget feasible mechanism, approximation ratio: $7.91$ (Chen et al., 2011) \vspace{1cm} \pause \item \alert{deterministic} mechanisms for: \vspace{0.3cm} \begin{itemize} \item Knapsack: $2+\sqrt{2}$ (Chen et al., 2011) \vspace{0.3cm} \item Matching: 7.37 (Singer, 2010) \vspace{0.3cm} \item Coverage: 31 (Singer, 2012) \vspace{0.3cm} \end{itemize} \end{itemize} \end{frame} \section{Budget feasible mechanism for linear regression} \begin{frame}{Outline} \tableofcontents[currentsection] \end{frame} \begin{frame}{Linear Regression} \begin{center} \includegraphics<1-4>[scale=0.5]{5.pdf} \includegraphics<5>[scale=0.5]{6.pdf} \includegraphics<6>[scale=0.5]{7.pdf} \includegraphics<7>[scale=0.5]{8.pdf} \end{center} \begin{center} \begin{overprint} \onslide<1>\begin{center}$N$ users\end{center} \onslide<2> \begin{center} $x_i$: public features (e.g. age, gender, height, etc.) \end{center} \onslide<3> \begin{center} $y_i$: private data (e.g. disease, etc.) \end{center} \onslide<4> \begin{center} Gaussian Linear model: $y_i = \beta^Tx_i + \varepsilon_i$ \end{center} \begin{displaymath} \beta^* = \arg\min_\beta \sum_i |y_i-\beta^Tx_i|^2 \end{displaymath} \end{overprint} \end{center} \end{frame} \begin{frame}{Experimental design} \begin{itemize} \item Public vector of features $x_i\in\mathbb{R}^d$ \item Private data $y_i\in\mathbb{R}$ \end{itemize} \vspace{0.5cm} Gaussian linear model: \begin{displaymath} y_i = \beta^T x_i + \varepsilon_i,\quad\beta\in\mathbb{R}^d,\; \varepsilon_i\sim\mathcal{N}(0,\sigma^2) \end{displaymath} \pause \vspace{0.5cm} Which users to select?\pause{} Experimental design $\Rightarrow$ D-optimal criterion \vspace{0.5cm} \begin{block}{Experimental Design} \begin{displaymath} \textsf{\alert{maximize}}\quad V(S) = \log\det\left(I_d + \sum_{i\in S} x_i x_i^T\right)\quad \textsf{\alert{subject to}}\quad |S|\leq k \end{displaymath} \end{block} \end{frame} \begin{frame}{Budgeted Experimental design} \begin{displaymath} \textsf{\alert{maximize}}\quad V(S) = \log\det\left(I_d + \sum_{i\in S} x_i x_i^T\right)\quad \textsf{\alert{subject to}}\quad \sum_{i\in S}c_i\leq B \end{displaymath} \vspace{1cm} \begin{itemize} \item the non-strategic optimization problem is NP-hard \vspace{0.3cm} \pause \item $V$ is submodular \vspace{0.3cm} \pause \item previous results give a randomized budget feasible mechanism \vspace{0.3cm} \pause \item deterministic mechanism? \end{itemize} \end{frame} \begin{frame}{Main result} \begin{theorem} There exists a budget feasible, individually rational and truthful mechanism for budgeted experimental design which runs in polynomial time. Its approximation ratio is: \begin{displaymath} \frac{10e-3 + \sqrt{64e^2-24e + 9}}{2(e-1)} \simeq 12.98 \end{displaymath} \end{theorem} \end{frame} \begin{frame}{Sketch of proof} \begin{block}{Mechanism (Chen et. al, 2011) for submodular $V$} \begin{itemize} \item Find $i^* = \arg\max_i V(\{i\})$ \item Compute $S_G$ greedily \item Return: \begin{itemize} \item $\{i^*\}$ if $V(\{i^*\}) \geq \only<1-2>{V(OPT_{-i^*})}\only<3>{\alert{V(OPT_{-i^*})}}\only<4->{\alert{L^*}}$ \item $S_G$ otherwise \end{itemize} \end{itemize} \end{block} \vspace{0.5cm} \pause Valid mechanism, approximation ratio: 8.34 \vspace{0.5cm} \pause \alert{Problem:} $OPT_{-i^*}$ is NP-hard to compute \vspace{0.5cm} \pause \begin{columns}[c] \begin{column}{0.53\textwidth} \alert{Solution:} Replace $V(OPT_{-i^*})$ with \alert<7>{$L^*$}: \pause \begin{itemize} \item computable in polynomial time \pause \item close to $V(OPT_{-i^*})$ \end{itemize} \end{column} \pause \begin{column}{0.45\textwidth} \begin{itemize} \item Knapsack (Chen et al., 2011) \item Coverage (Singer, 2012) \end{itemize} \end{column} \end{columns} \end{frame} \begin{frame}{Sketch of proof (2)} \begin{displaymath} L^* = \arg\max_{\lambda\in [0,1]^n} \left\{\log\det\left(I_d + \sum_i \lambda_i x_i x_i^T\right)\mid \sum_{i=1}^n\lambda_i c_i\leq B\right\} \end{displaymath} \vspace{1cm} \begin{itemize} \item polynomial time?\pause{} convex optimization problem \pause \vspace{0.5cm} \item close to $V(OPT_{-i^*})$?\pause \vspace{0.5cm} \begin{block}{Technical lemma} \begin{displaymath} L^* \leq 2 V(OPT) + V(\{i^*\}) \end{displaymath} \end{block} \end{itemize} \end{frame} \section{Generalization} \begin{frame}{Outline} \tableofcontents[currentsection] \end{frame} \begin{frame}{Generalization} \begin{itemize} \item Generative model: $y_i = f(x_i) + \varepsilon_i,\;i\in\mathcal{A}$ \pause \item prior knowledge of the experimenter: $f$ is a \alert{random variable} \pause \item \alert{uncertainty} of the experimenter: entropy $H(f)$ \pause \item after observing $\{y_i,\; i\in S\}$, uncertainty: $H(f\mid S)$ \end{itemize} \pause \vspace{0.5cm} \begin{block}{Value function: Information gain} \begin{displaymath} V(S) = H(f) - H(f\mid S),\quad S\subset\mathcal{A} \end{displaymath} \end{block} \pause \vspace{0.5cm} $V$ is \alert{submodular} $\Rightarrow$ randomized budget feasible mechanism \end{frame} \begin{frame}{Conclusion} \begin{itemize} \item Experimental design + Auction theory = powerful framework \vspace{1cm} \pause \item deterministic mechanism for the general case? other learning tasks? \vspace{1cm} \pause \item approximation ratio $\simeq \alert{13}$. Lower bound: \alert{$2$} \end{itemize} \end{frame} \end{document}