diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2013-02-07 14:37:17 -0800 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2013-02-07 14:37:17 -0800 |
| commit | 58ccbe548085600cc2fbb005726191438b450bc7 (patch) | |
| tree | 2f55375aff50a738072090cb1f4df7e830bbfe1d /slides/slides.tex | |
| parent | a148a6f046173cd114e009c6ef50e77d5d5941f4 (diff) | |
| download | recommendation-58ccbe548085600cc2fbb005726191438b450bc7.tar.gz | |
Slides for dimacs workshop at Stanford
Diffstat (limited to 'slides/slides.tex')
| -rw-r--r-- | slides/slides.tex | 493 |
1 files changed, 250 insertions, 243 deletions
diff --git a/slides/slides.tex b/slides/slides.tex index 81f6deb..1bccd02 100644 --- a/slides/slides.tex +++ b/slides/slides.tex @@ -4,21 +4,21 @@ \usepackage[LGR,T1]{fontenc} \usepackage{amsmath} \usepackage{algpseudocode,algorithm} +\usepackage{graphicx} \DeclareMathOperator*{\argmax}{arg\,max} \usetheme{Boadilla} -\usecolortheme{beaver} -\title[Budget Feasible Mechanisms for Experimental Design]{Budget Feasible Mechanisms for Experimental Design} -\author{xx\\ Joint work with yy and zz} +\title[]{Budget Feasible Mechanisms for Experimental Design} +\author[Thibaut Horel]{Thibaut Horel\\ Joint work with Stratis Ioannidis and S. Muthukrishnan} \institute[]{} \setbeamercovered{transparent} - -\AtBeginSection[] -{ -\begin{frame}<beamer> -\frametitle{Outline} -\tableofcontents[currentsection] -\end{frame} -} +\setbeamertemplate{navigation symbols}{} +%\AtBeginSection[] +%{ +%\begin{frame}<beamer> +%\frametitle{Outline} +%\tableofcontents[currentsection] +%\end{frame} +%} \begin{document} @@ -26,229 +26,214 @@ \maketitle \end{frame} -\begin{frame} -\tableofcontents -\end{frame} +\section{Introduction} -\section{Experimental Design} -\begin{frame}{Experimental Design} - An experimenter is about to conduct an experiment with users. - \begin{itemize} - \item $n$ users - \item $x_i\in\mathbb{R}^d$: public features of user $i$ - \item $c_i\in\mathbb{R}^+$: cost of user $i$ for taking part in the experiment - \end{itemize} +\begin{frame}{Motivation} + \begin{center} + \includegraphics<1-4>[scale=0.5]{1.pdf} + \includegraphics<5>[scale=0.5]{2.pdf} + \includegraphics<6>[scale=0.5]{3.pdf} + \includegraphics<7>[scale=0.5]{4.pdf} + \end{center} - When paid $c_i$, user $i$ reveals $y_i$: - \begin{displaymath} - y_i = \beta^T x_i + \varepsilon_i,\quad\beta\in\mathbb{R}^d,\; - \varepsilon_i\sim\mathcal{N}(0,\sigma^2) - \end{displaymath} - $\beta$ is \alert{unknown} to the experimenter. - - \begin{block}{} - \alert{Question:} budget constraint $B$, which users to pick to learn $\beta$ as - well as possible without spending more than $B$? - \end{block} + \begin{center} + \begin{overprint} + \onslide<1>\begin{center}$N$ users\end{center} + \onslide<2> + \begin{center} + $x_i\in\mathbb{R}^d$: public features (e.g. age, sex, height, etc.) + \end{center} + \onslide<3> + \begin{center} + $y_i\in\mathbb{R}$: private date (e.g. disease, etc.) + \end{center} + \onslide<4> + \begin{center} + Linear model: $y_i = \beta^Tx_i + \varepsilon_i$ + \end{center} + \begin{displaymath} + \beta^* = \arg\min_\beta \sum_i |y_i-\beta^Tx_i|^2 + \end{displaymath} + \end{overprint} + \end{center} \end{frame} -\begin{frame}{Evaluation criterion} - \begin{block}{} - \alert{Question:} How to measure the helpfulness of a set of users $S$ in learning $\beta$? - \end{block} - - \vspace{0.5cm} - Bayesian approach: +\begin{frame}{Challenges} \begin{itemize} - \item \emph{a priori} knowledge: prior distribution over $\beta$ - \item the entropy of $\beta$, $H(\beta)$ measures the uncertainty of the experimenter - \item after observing $Y_S = \{y_i,\; i\in S\}$, uncertainty becomes $H(\beta\mid Y_S)$ + \item Value of data? + \vspace{1cm} + \pause + \item How to optimize it? + \vspace{1cm} + \pause + \item Strategic users? \end{itemize} - - \vspace{0.5cm} - - The value of $S$ is the \alert{entropy reduction}: - \begin{displaymath} - V(S) = H(\beta) - H(\beta\mid Y_S) - \end{displaymath} \end{frame} -\begin{frame}{Value function} - Gaussian linear model: - \begin{displaymath} - y_i = \beta^T x_i + \varepsilon_i,\quad\beta\in\mathbb{R}^d,\; - \varepsilon_i\sim\mathcal{N}(0,\sigma^2) - \end{displaymath} - - Typical prior distribution $\beta\sim\mathcal{N}(0,\tau I_d)$ - - \vspace{0.5cm} - - \begin{block}{} - Under the Gaussian linear model and Gaussian prior distribution: - \begin{displaymath} - V(S) = \log\det\Big(I_d + \lambda\sum_{i\in S}x_i x_i^T \Big) - \end{displaymath} - \end{block} - - \vspace{0.5cm} - - \alert{Remark:} $V$ can be computed efficiently using a Cholesky decomposition +\begin{frame}{Contributions} + \begin{itemize} + \item case of the linear regression + \vspace{1cm} + \pause + \item deterministic mechanism + \vspace{1cm} + \pause + \item generalization (randomized mechanism) + \end{itemize} \end{frame} -\section{Non-strategic case} -\begin{frame}{Optimization Problem} - \begin{block}{P1} - \begin{align*} - &\text{maximize}\; V(S) = \log\det\Big(I_d + \lambda\sum_{i\in S}x_i x_i^T \Big)\\ - &\text{subject to}\; \sum_{i\in S}c_i\leq B\quad\text{(Knapsack constraint)} - \end{align*} - \end{block} +\section{Budget feasible mechanism design} - \onslide<2-> - Good news: - - $V$ is submodular (diminishing return): - \begin{displaymath} - \forall S\subseteq T\, \forall i,\; V(S\cup\{i\}) - V(S)\geq - V(T\cup\{i\}) - V(T) - \end{displaymath} - - \onslide<3-> - Bad news: - - Maximizing a submodular function under a knapsack constraint is NP-hard (even - in the specific case of $\log\det$) +\begin{frame}{Outline} +\tableofcontents \end{frame} -\begin{frame}{Approximation} - \begin{definition}[Approximation ratio] - Let $OPT$ denote the optimal solution to P1, $S$ is an $\alpha$-approximation - if: - \begin{displaymath} - V(OPT)\leq \alpha V(S) - \end{displaymath} - \end{definition} - - \vspace{0.5cm} - - \begin{theorem}[Sviridenko] - \begin{enumerate} - \item There is an algorithm which returns a set $S$ which is a $\frac{e}{e-1}$-approximation - \item No algorithm can do better unless P=NP - \end{enumerate} - \end{theorem} - - \vspace{0.5cm} - - The algorithm is a combination of an enumeration method and a greedy heuristic. +\begin{frame}{Outline} + \tableofcontents[currentsection] \end{frame} -\begin{frame}{Greedy algorithm} - \begin{block}{Greedy($V$, $B$)} - \begin{algorithmic} - \State $i \gets \argmax_{1\leq j\leq n}\frac{V(j)}{c_j}$ - \State $S \gets \emptyset$ - \While{$c_i\leq B - \sum_{j\in S} c_j$} - \State $S \gets S\cup\{i\}$ - \State $i \gets \argmax_{1\leq j\leq n} - \frac{V(S\cup\{j\})-V(S)}{c_j}$ - \EndWhile - \State \textbf{return} $S$ - \end{algorithmic} - \end{block} +\begin{frame}{Reverse auction} +\begin{itemize} + \item set of $N$ sellers $\mathcal{A} = \{1,\ldots,N\}$ and a buyer + \vspace{0.3cm} + \pause + \item $V$ value function of the buyer, $V:2^\mathcal{A}\rightarrow \mathbb{R}^+$ + \vspace{0.3cm} + \pause + \item $c_i\in\mathbb{R}^+$ price of seller's $i$ good + \vspace{0.3cm} + \pause + \item $B$ budget constraint of the buyer +\end{itemize} - \vspace{0.5cm} +\vspace{0.5cm} +\pause - \begin{lemma} - Greedy has an \alert{unbounded} approximation ratio. - \end{lemma} +\begin{block}{Goal} + \begin{itemize} + \item Find $S\subset \mathcal{A}$ \alert{maximizing} $V(S)$ + \vspace{0.3cm} + \item Find \alert{payment} $p_i$ to seller $i$ + \end{itemize} +\end{block} \end{frame} -\begin{frame} - \begin{block}{MaxGreedy} - \begin{algorithmic} - \State $i^* \gets \argmax_{1\leq j\leq n} V(j)$ - \State $S \gets$ Greedy($V$, $B$) - \If{$V(i^*)\geq V(S)$} - \State \textbf{return} $\{i^*\}$ - \Else - \State \textbf{return} $S$ - \EndIf - \end{algorithmic} - \end{block} +\begin{frame}{Objectives} + Payments $(p_i)_{i\in S}$ must be: + \pause + \vspace{0.3cm} + \begin{itemize} + \item individually rational: $p_i\geq c_i,\;i\in S$ + \vspace{0.2cm} + \pause + \item truthful: reporting one's true cost is a dominant strategy + \vspace{0.2cm} + \pause + \item budget feasible: $\sum_{i\in S} p_i \leq B$ + \vspace{0.2cm} + \end{itemize} - \vspace{0.5cm} + \pause + \vspace{0.3cm} - \begin{lemma} - MaxGreedy is a $\frac{5e}{e-1}$ approximation - \end{lemma} + Mechanism must be: + \vspace{0.3cm} + \pause + \begin{itemize} + \item computationally efficient: polynomial time + \pause + \vspace{0.2cm} + \item good approximation: $V(OPT) \leq \alpha V(S)$ with: + \begin{displaymath} + OPT = \arg\max_{S\subset \mathcal{A}} \left\{V(S)\mid \sum_{i\in S}c_i\leq B\right\} + \end{displaymath} + \end{itemize} \end{frame} -\section{Strategic case} - -\begin{frame}{Reverse auction} +\begin{frame}{Known results} + When $V$ is submodular: + \vspace{1cm} + \pause + \begin{itemize} + \item \alert{randomized} budget feasible mechanism, + approximation ratio: $7.91$ (Chen et al., 2011) + \vspace{1cm} + \pause + \item \alert{deterministic} mechanisms for: + \vspace{0.3cm} + \begin{itemize} + \item Knapsack: $2+\sqrt{2}$ (Chen et al., 2011) + \vspace{0.3cm} + \item Matching: 7.37 (Singer, 2010) + \vspace{0.3cm} + \item Coverage: 31 (Singer, 2012) + \vspace{0.3cm} + \end{itemize} + \end{itemize} +\end{frame} - \alert{Problem:} In real life, user are strategic agents. They lie about their costs. - This is a \alert{reverse auction}: the experimenter wants to buy data - from users. +\section{Budget feasible mechanism for linear regression} - \vspace{0.5cm} +\begin{frame}{Outline} + \tableofcontents[currentsection] +\end{frame} - \alert{Goal:} Design an auction mechanism: +\begin{frame}{Linear regression} + Each user $i\in\mathcal{A}$ has: \begin{itemize} - \item a selection rule $s: (c_1,\ldots,c_n) \mapsto (s_1,\ldots,s_n)$, - $s_i\in\{0,1\}$ - \item a payment rule $p: (c_1,\ldots,c_n) \mapsto (p_1,\ldots,p_n)$, - $p_i\in\mathbb{R}^+$ + \item Public feature vector $x_i\in\mathbb{R}^d$ + \item Private data $y_i\in\mathbb{R}$ \end{itemize} + \pause \vspace{0.5cm} - As usual in auction mechanism design, we want payments to be: - \begin{itemize} - \item individually rational: $p_i\geq c_i$ - \item truthful: reporting one's true cost is a dominant strategy - \item normalized: $s_i=0$ implies $p_i=0$ - \end{itemize} -\end{frame} + Gaussian linear model: + \begin{displaymath} + y_i = \beta^T x_i + \varepsilon_i,\quad\beta\in\mathbb{R}^d,\; + \varepsilon_i\sim\mathcal{N}(0,\sigma^2) + \end{displaymath} -\begin{frame}{Optimization problem for budget feasible auctions} - \begin{block}{P2} - \begin{align*} - &\text{maximize}\; V\big(s(c_1,\ldots,c_n)\big) = \log\det\Big(I_d + \lambda\sum_{i\in S}x_i x_i^T \Big)\\ - &\text{subject to}\; \sum_{i\in S}\alert{p_i}\leq B\quad\text{(budget feasible)} - \end{align*} - where the payments are further constrained to be individually rational, truthful - and normalized - \end{block} + \pause + \vspace{0.5cm} - \vspace{1cm} + Which users to select?\pause{} Experimental design $\Rightarrow$ D-optimal criterion - This is again NP-hard $\Rightarrow$ we seek good approximation ratios + \vspace{0.5cm} + + \begin{block}{Experimental Design} + \begin{displaymath} + \textsf{\alert{maximize}}\quad V(S) = \log\det\left(I_d + \sum_{i\in S} x_i x_i^T\right),\quad S\subset\mathcal{A} + \end{displaymath} + \end{block} \end{frame} -\begin{frame}{Myerson's theorem} - \begin{theorem}[Myerson] - Let $(s, p)$ be a mechanism. If: - \begin{itemize} - \item $s$ is \alert{monotonic}: if $c_i\leq c_i'$ then $s_i \geq s_i'$ - \item users are paid \alert{threshold payments}: $p_i = \inf\{c_i':i\notin s(c_i',c_{-i})\}$ - \end{itemize} - Then, the mechanism is individually rational and truthful. - \end{theorem} +\begin{frame}{Experimental design} + \begin{displaymath} + \textsf{\alert{maximize}}\quad V(S) = \log\det\left(I_d + \sum_{i\in S} x_i x_i^T\right),\quad S\subset\mathcal{A} + \end{displaymath} \vspace{1cm} - \alert{Consequence:} It suffices to find a monotonic selection rule such that - the threshold payments are within the budget constraint. + \begin{itemize} + \item the non-strategic optimization problem is NP-hard + \vspace{0.3cm} + \pause + \item $V$ is submodular + \vspace{0.3cm} + \pause + \item previous results give a randomized budget feasible mechanism + \vspace{0.3cm} + \pause + \item deterministic mechanism? + \end{itemize} \end{frame} \begin{frame}{Main result} \begin{theorem} - There is a budget feasible, individually rational and truthful mechanism for - the experimental design problem which runs in polynomial time. Its + There exists a budget feasible, individually rational and truthful mechanism for + experimental design which runs in polynomial time. Its approximation ratio is: \begin{displaymath} \frac{10e-3 + \sqrt{64e^2-24e + 9}}{2(e-1)} @@ -257,80 +242,102 @@ \end{theorem} \end{frame} - -\begin{frame}{Greedy revisited} - \begin{block}{Greedy\only<2>{\alert{'}}($V$, $B$)} - \begin{algorithmic} - \State $i \gets \argmax_{1\leq j\leq n}\frac{V(j)}{c_j}$ - \State $S \gets \emptyset$ - \While{$c_i\leq \only<1>{B - \sum_{j\in S} c_j} \only<2->{\alert{\frac{B}{2}\frac{V(S\cup\{i\})-V(S)}{V(S\cup\{i\})}}}$} - \State $S \gets S\cup\{i\}$ - \State $i \gets \argmax_{1\leq j\leq n} - \frac{V(S\cup\{j\})-V(S)}{c_j}$ - \EndWhile - \State \textbf{return} $S$ - \end{algorithmic} +\begin{frame}{Sketch of proof} + \begin{block}{Mechanism (Chen et. al, 2011)} + \begin{itemize} + \item Find $i^* = \arg\max_i V(\{i\})$ + \item Compute $S_G$ greedily + \item Return: + \begin{itemize} + \item $\{i^*\}$ if $V(\{i^*\}) \geq \only<1-2>{V(OPT_{-i^*})}\only<3>{\alert{V(OPT_{-i^*})}}\only<4->{\alert{L^*}}$ + \item $S_G$ otherwise + \end{itemize} + \end{itemize} \end{block} - \vspace{1cm} - - \alert{Problem:} The threshold payments are not budget feasible. -\end{frame} + \vspace{0.5cm} + \pause -\begin{frame}{MaxGreedy revisited} - \begin{block}{MaxGreedy} - \begin{algorithmic} - \State $i^* \gets \argmax_{1\leq j\leq n} V(j)$ - \State $S \gets$ Greedy\alert{'}($V$, $B$) - \If{$V(i^*)\geq \only<1>{V(S)}\only<2,3>{\alert{OPT_{-i^*}}}\only<4>{\alert{OPT'_{i^*}}}$} - \State \textbf{return} $\{i^*\}$ - \Else - \State \textbf{return} $S$ - \EndIf - \end{algorithmic} - \end{block} + Valid mechanism, approximation ratio: 8.34 \vspace{0.5cm} + \pause - \only<1>{\alert{Problem:} MaxGreedy is not monotonic} + \alert{Problem:} $OPT_{-i^*}$ is NP-hard to compute - \only<2,3>{\alert{Solution 1:} use $OPT_{-i^*} = \max_{S, i^*\notin S}\{V(S) \mid \sum_{j\in S} c_j\}$. Gives a 8.34 approximation} + \vspace{0.5cm} + \pause - \only<3>{\alert{Problem:} $OPT_{-i^*}$ cannot be computed in polynomial time} + \alert{Solution:} Replace $V(OPT_{-i^*})$ with $L^*$: + \pause + \begin{itemize} + \item computable in polynomial time + \pause + \item close to $V(OPT_{-i^*})$ + \end{itemize} +\end{frame} - \only<4->{\alert{Solution 2:} use a concave relaxation: +\begin{frame}{Sketch of proof (2)} \begin{displaymath} - OPT'_{i^*} = \max_{\lambda\in[0, 1]^n, \lambda_{i^*} = 0} L(\lambda) - = \max_{\lambda\in[0, 1]^n, \lambda_{i^*} = 0}\log\det\big(I_d + \sum_{i=1}^n \lambda_i x_ix_i^T\big) + L^* = \arg\max_{\lambda\in [0,1]^n} \left\{\log\det\left(I_d + \sum_i \lambda_i x_i x_i^T\right)\mid \sum_{i=1}^n\lambda_i c_i\leq B\right\} \end{displaymath} - } + \vspace{1cm} + \begin{itemize} + \item polynomial time?\pause{} convex optimization problem + \pause + \vspace{0.5cm} + \item close to $V(OPT_{-i^*})$?\pause + \vspace{0.5cm} + \begin{block}{Technical lemma} + \begin{displaymath} + L^* \leq 2 V(OPT) + V(\{i^*\}) + \end{displaymath} + \end{block} + \end{itemize} \end{frame} -\begin{frame}{Proof} -\begin{itemize} - \item Monotonicity: OK - \pause - \item Budget feasibility: OK - \pause - \item Approximation ratio: what do we lose by using a concave relaxation? Powerful - method, \alert{pipage rounding} [Ageev, Sviridenko] +\section{Generalization} + +\begin{frame}{Outline} + \tableofcontents[currentsection] +\end{frame} + +\begin{frame}{Generalization} \begin{itemize} - \item let's introduce $F(\lambda) = \mathbb{E}\Big[\log\det(I_d+\sum_{i=1}^n \mu_ix_ix_i^T)\Big]$, $\mu_i\sim\mathcal{B}(\lambda_i)$ - \item $L(\lambda) \leq 2 F(\lambda)$ (technical lemma) - \item $F$ is well-behaved: $\lambda$ can be round to a binary vector $\bar{\lambda}$ s.t $F(\lambda)\leq F(\bar{\lambda})$ - \item $OPT'_{-i^*}\leq 2 OPT_{-i^*} + V(i^*)$ - \item we can conclude by using the approximation ratio for $OPT_{-i^*}$ + \item Generative model: $y_i = f(x_i) + \varepsilon_i,\;i\in\mathcal{A}$ + \pause + \item prior knowledge of the experimenter: $f$ is a \alert{random variable} + \pause + \item \alert{uncertainty} of the experimenter: entropy $H(f)$ + \pause + \item after observing $\{y_i,\; i\in S\}$, uncertainty: $H(f\mid S)$ \end{itemize} -\end{itemize} + + \pause + \vspace{0.5cm} + + \begin{block}{Value function: entropy reduction} + \begin{displaymath} + V(S) = H(f) - H(f\mid Y_S),\quad S\subset\mathcal{A} + \end{displaymath} + \end{block} + + \pause + \vspace{0.5cm} + + $V$ is \alert{submodular} $\Rightarrow$ randomized budget feasible mechanism \end{frame} + \begin{frame}{Conclusion} \begin{itemize} - \item Experimental design + Auction theory = powerful framework when working with user data + \item Experimental design + Auction theory = powerful framework \vspace{1cm} - \item Can we extend this to learning tasks beyond linear regression? + \pause + \item deterministic mechanism for the general case? other learning tasks? \vspace{1cm} - \item $\simeq \alert{13}$ approximation ratio. Lower bound: \alert{$2$} + \pause + \item approximation ratio $\simeq \alert{13}$. Lower bound: \alert{$2$} \end{itemize} \end{frame} \end{document} |
