\documentclass[10pt]{beamer} \usepackage[utf8x]{inputenc} \usepackage{amsmath,verbatim} \usepackage{algorithm,algpseudocode} \usepackage{graphicx} \DeclareMathOperator*{\argmax}{arg\,max} \DeclareMathOperator*{\argmin}{arg\,min} \usetheme{Boadilla} \newcommand{\E}{{\tt E}} \title[LATIN 2014]{Budget Feasible Mechanism\\for Experimental Design} \author[Thibaut Horel]{\textbf{Thibaut Horel}, Stratis Ioannidis, and S. Muthukrishnan} %\setbeamercovered{transparent} \setbeamertemplate{navigation symbols}{} \newcommand{\ie}{\emph{i.e.}} \newcommand{\eg}{\emph{e.g.}} \newcommand{\etc}{\emph{etc.}} \newcommand{\etal}{\emph{et al.}} \newcommand{\reals}{\ensuremath{\mathbb{R}}} \begin{document} \maketitle \begin{frame}{Motivation} \begin{center} \includegraphics<1>[scale=0.4]{stg-8.pdf} \includegraphics<2>[scale=0.4]{stg-7.pdf} \includegraphics<3>[scale=0.4]{stg-6.pdf} \includegraphics<4>[scale=0.4]{stg-5.pdf} \includegraphics<5>[scale=0.4]{stg-4.pdf} \includegraphics<6>[scale=0.4]{stg-3.pdf} \includegraphics<7>[scale=0.4]{stg-2.pdf} \includegraphics<8>[scale=0.4]{stg-1.pdf} \end{center} \end{frame} \begin{frame}{Applications} \begin{itemize} \item<1-> Applications \begin{itemize} \item Medicine/Sociology \item Online surveys \item Data markets \end{itemize} \vspace*{1cm} \item<2-> Challenges \begin{itemize} \item Which users are \alert{the most valuable}? \item What if users are \alert{strategic}? \end{itemize} \end{itemize} \end{frame} \begin{frame}{Contributions} \pause \begin{itemize} \item Formulating the problem in the Experimental Design framework \pause \item Experimental design with \alert{strategic agents}?\pause \begin{itemize} \item Budget Feasible Mechanisms [Singer, 2010] \end{itemize} \end{itemize} \pause \vspace*{1cm} For Linear Regression: \begin{itemize} \item deterministic, poly-time, truthful, budget feasible, 12.9-approximate mechanism. \pause \item Lower bound of 2. \pause \item Prior results: \pause \begin{itemize} \item deterministic exponential-time mechanism [Singer, 2010; Chen \etal, 2011] \pause \item universally truthful (randomized) mechanism [Chen \etal, 2011; Singer, 2012] \end{itemize} \end{itemize} \pause \vspace*{1cm} Generalization to more general statistical learning tasks. \end{frame} \begin{frame}{Problem} \begin{center} \includegraphics<1>[scale=0.4]{st2.pdf} \includegraphics<2>[scale=0.4]{st3.pdf} \includegraphics<3>[scale=0.4]{st4.pdf} \includegraphics<4>[scale=0.4]{st5.pdf} \includegraphics<5>[scale=0.4]{st6.pdf} \includegraphics<6>[scale=0.4]{st6a.pdf} \includegraphics<7>[scale=0.4]{st6b.pdf} \includegraphics<8>[scale=0.4]{st6c.pdf} \includegraphics<9>[scale=0.4]{st6d.pdf} \includegraphics<10->[scale=0.4]{st6e.pdf} \end{center} \vspace*{-2em} \begin{center} \begin{overprint} \onslide<1>\begin{center}$N$ ``experiment subjects'', $[N]\equiv \{1,\ldots,N\}$ \end{center} \onslide<2> \begin{center} $x_i\in \reals^d$: public features (\eg, age, gender, height, \etc) \end{center} \onslide<3> \begin{center} $y_i\in \reals$: private data (\eg, survey answer, medical test outcome, movie rating\ldots) \end{center} \onslide<4> \textbf{Gaussian Linear Model.} There exists $\beta\in \reals^d$ s.t.\vspace*{-1em} \begin{displaymath} y_i = \beta^T x_i + \varepsilon_i,\quad \varepsilon_i\sim\mathcal{N}(0,\sigma^2), \quad i\in [N] \end{displaymath} \onslide<5> \begin{center} Experimenter {\tt E} wishes to learn $\beta$. \end{center} \onslide<6> \begin{center} Each subject $i\in [N]$ has a cost $c_i\in \reals_+$ \end{center} \onslide<7> \begin{center} \E\ has a budget $B$. \end{center} \onslide<8-9> \begin{center} \E\ pays subjects\visible<9>{; $y_i$ is revealed upon payment.} \end{center} \onslide<10> \begin{center} {\tt E} estimates ${\beta}$ through \emph{ridge regression}. \end{center} \onslide<11> \begin{center} Goal: Determine who to pay how much so that $\hat{\beta}$ is as accurate as possible. \end{center} \onslide<12> \begin{center} Users are \alert{strategic} and may lie about costs $c_i$\\users cannot manipulate $x_i$ or $y_i$. \end{center} \end{overprint} \end{center} \end{frame} \begin{frame}{Value of data} Let $S\subset [N]$ be the set of selected users. \E\ has a \emph{prior} on $\beta$: $$\beta \sim \mathcal{N}(0,\sigma^2 R^{-1}). $$ \pause Information Gain: reduction of uncertainty on $\beta$ \begin{align*} I(\beta;S)&=H(\beta)-H(\beta\mid y_i,i\in S)\\ \pause & = \frac{1}{2}\log\det \Big(R+\sum_{i\in S}x_ix_i^T\Big)-\frac{1}{2}\log\det R \end{align*} \end{frame} \begin{frame}{Experimental Design} \begin{block}{\textsc{Experimental Design Problem (EDP)} } \vspace*{-0.4cm} \begin{align*} \text{Maximize}\qquad &V(S) = \log \det (I+\sum_{i\in S}x_ix_i^T) \\ \text{subj. to}\qquad &\textstyle\sum_{i\in S}c_i\leq B \end{align*} \end{block} \vspace*{1cm} \pause \begin{itemize} \item EDP is NP-hard \pause \item $V$ is submodular, monotone, non-negative, and $V(\emptyset)=0$ \pause \item $\frac{e}{e-1}$-approximable (Sviridenko 2004, Krause and Guestrin 2005) \end{itemize} \end{frame} \begin{frame}{Experimental Design with Strategic Agents} \begin{block}{Value function} \vspace*{-0.4cm} \begin{align*} \text{Maximize}\qquad &V(S) = \log \det (I+\sum_{i\in S}x_ix_i^T) \\ \text{subj. to}\qquad &\textstyle\sum_{i\in S}c_i\leq B \end{align*} \end{block} \vspace*{1cm} \pause Budgeted auction mechanism design problem. \pause \vspace*{0.5cm} Payments and allocation rule must be: \begin{itemize} \item normalized, truthful and individually rational \pause \item computable in polynomial time, give a good approximation ratio \end{itemize} \end{frame} \begin{frame}{Our Main Results} \begin{theorem} There exists a deterministic, budget feasible, individually rational and truthful mechanism for EDP which runs in polynomial time. Its approximation ratio is: \begin{displaymath} \frac{10e-3 + \sqrt{64e^2-24e + 9}}{2(e-1)} \simeq 12.98 \end{displaymath} \end{theorem} \pause \vspace*{0.5cm} \begin{theorem} There is no 2-approximate, budget feasible, individually rational and truthful mechanism for EDP. \end{theorem} \pause \vspace*{0.5cm} \alert{Bonus:} two technical approximation results. \end{frame} \begin{frame}{Blueprint for Deterministic, Poly-time Algorithm} Azar and Gamzu 2008, Singer 2010, Chen \etal\ 2011: \begin{block}{Allocation rule} \begin{itemize} \item Find $i^* = \arg\max_i V(\{i\})$ \item Compute $S_G$ greedily \item Return: \begin{itemize} \item $\{i^*\}$ if $V(\{i^*\}) \geq \alert<2>{OPT_{-i^*}}$ \item $S_G$ otherwise \end{itemize} \end{itemize} \end{block} \vspace{0.5cm} \pause \pause \begin{columns}[c] \begin{column}{0.53\textwidth} Replace $OPT_{-i^*}$ with a \alert{relaxation $L^*$}:\pause \begin{itemize} \item computable in polynomial time \pause \item close to $OPT_{-i^*}$ \pause \item monotone in costs $c$ \end{itemize} \end{column} \pause \begin{column}{0.45\textwidth} \begin{itemize} \item \textsc{Knapsack} (Chen \etal, 2011) \item \textsc{Coverage} (Singer, 2012) \end{itemize} \end{column} \end{columns} \end{frame} \begin{frame}{A relaxation for \textsc{EDP}} \alt<1>{ \begin{block}{\textsc{EDP} } \vspace*{-0.4cm} \begin{align*} \text{Maximize}\qquad &V(S) = \log \det\big(I+\sum_{i\in S}x_ix_i^T\big) \\ \text{subj. to}\qquad &\textstyle\sum_{i\in S}c_i\leq B \end{align*} \end{block} } { \begin{block}{Convex Relaxation} \vspace*{-0.4cm} \begin{align*} \text{Maximize}\qquad &\alert<2>{L(\lambda)} = \log \det \big(I+\sum_{i\in [N]}\alert<2>{\lambda_i}x_ix_i^T\big) \\ \text{subj. to}\qquad &\textstyle\sum_{i\in [N]}\alert<2>{\lambda_i}c_i\leq B, \lambda_i\in [0,1] \end{align*} \end{block} } \pause \vspace{0.5cm} $L(\lambda^*)$ is: \begin{itemize} \item Monotone in $c$ \pause \item Poly-time: $L$ is concave \end{itemize} \end{frame} \begin{frame}{First technical result: integrality gap} \begin{align*} \text{Maximize}\qquad &L(\lambda) = \log \det \big(I+\sum_{i\in [N]}\lambda_ix_ix_i^T\big) \\ \text{subj. to}\qquad &\textstyle\sum_{i\in [N]}\lambda_ic_i\leq B, \lambda_i\in [0,1] \end{align*} \pause \vspace{0.5cm} \begin{block}{Integrality gap} For $\lambda^*$ the optimal solution: \begin{displaymath} OPT\leq L(\lambda^*) \leq 2 OPT + 2V(\{i^*\}) \end{displaymath} \end{block} \pause \vspace{0.5cm} \alert{Proof:} \begin{itemize} \item relate $L$ to the multilinear extension of $V$ \item use \alert{pipage rounding} [Ageev and Sviridenko, 2004] \end{itemize} \end{frame} \begin{frame}{Second technical result: monotone approximation} \begin{block}{EDP-relaxed} \begin{align*} \text{Maximize}\qquad &L(\lambda) = \log \det \big(I+\sum_{i\in [N]}\lambda_ix_ix_i^T\big) \\ \text{subj. to}\qquad &\textstyle\sum_{i\in [N]}\lambda_ic_i\leq B, \lambda_i\in [\only<1-2>{0}\only<3->{\alert<3>{\alpha}},1] \end{align*} \end{block} \vspace*{1cm} \pause Convex optimization only gives $\delta$-approximation $\alert{\Rightarrow}$ breaks monotonicity. \vspace*{1cm} \pause Shrink feasible set: \begin{itemize} \pause \item $L$ is ``sufficiently'' monotone \pause \item a $\delta$-approximation of its optimum is $\varepsilon(\delta,\alpha)$-monotone in the costs \pause \item tune $\delta$, $\alpha$ $\Rightarrow$ $\varepsilon$-truthful mechanism \end{itemize} \end{frame} %\begin{frame}{Towards More General ML Tasks} % \begin{center} % \includegraphics<1>[scale=0.4]{st2.pdf} % \includegraphics<2>[scale=0.4]{st3.pdf} % \includegraphics<3>[scale=0.4]{st4.pdf} % \includegraphics<4-5>[scale=0.4]{st25.pdf} % \includegraphics<6->[scale=0.4]{st26.pdf} % \end{center} % % \begin{overprint} % \onslide<1> % \begin{center}$N$ experiment subjects\end{center} % \onslide<2> % \begin{center} % $x_i\in \Omega$: public features % \end{center} % \onslide<3> % \begin{center} % $y_i\in \reals$: private data % \end{center} % \onslide<4-5> % \begin{center} % \textbf{Hypothesis.} There exists $h:\Omega\to \reals$ s.t. % % \begin{displaymath} % $y_i = h(x_i) + \varepsilon_i,$ %\quad % where $\varepsilon_i$ are independent. % \visible<5>{Examples: Logistic Regression, LDA, SVMs, \etc} % % \end{displaymath} % \end{center} %%$$y_i = \beta^Tx_i + \varepsilon_i, i=1,\ldots, N$$ % %\end{center} % \onslide<6> % \begin{center} % Experimenter {\tt E} wishes to learn $h$, and has a prior distribution over $$\mathcal{H}=\{\text{mappings from }\Omega\text{ to }\reals\}.$$ % \end{center} % \onslide<7->\begin{center} %\vspace*{-0.5cm} %% \begin{block} %%{Value function: Information gain} % \begin{displaymath} % V(S) = H(h) - H(h\mid y_i,i\in S),\quad S\subseteq[N] % \end{displaymath} % % \end{block} % \visible<8>{$V$ is submodular! [Krause and Guestrin 2009]} % \end{center} % \end{overprint} % %\end{frame} \begin{frame}{Conclusion} \begin{itemize} \item Experimental design + Budget Feasible Mechanisms \vspace{1cm} \pause \item Deterministic mechanism for other ML Tasks? General submodular functions? \vspace{1cm} \pause \item Approximation ratio $\simeq \alert{13}$. Lower bound: \alert{$2$} \end{itemize} \end{frame} \begin{frame}{} \vspace{\stretch{1}} \begin{center} {\Huge Thank you!} \end{center} \vspace{\stretch{1}} \end{frame} \begin{frame}{Non-Homotropic Prior} Recall that: \begin{align*} V(S)&=H(\beta)-H(\beta\mid y_i,i\in S)\\ & = \frac{1}{2}\log\det (R^{-1}+\sum_{i\in S}x_ix_i^T) \end{align*} What if $R\neq I$? \begin{theorem} There exists a truthful, poly-time, and budget feasible mechanism for the objective function $V$ above. Furthermore, the algorithm computes a set $S^*$ such that: \begin{displaymath} OPT \leq \frac{5e-1}{e-1}\frac{2}{\mu\log(1+1/\mu)}V(S^*) + 5.1 \end{displaymath} where $\mu$ is the largest eigenvalue of $R$. \end{theorem} \end{frame} \end{document}