summaryrefslogtreecommitdiffstats
path: root/slides/BudgetFeasibleExperimentalDesign.tex
diff options
context:
space:
mode:
Diffstat (limited to 'slides/BudgetFeasibleExperimentalDesign.tex')
-rw-r--r--slides/BudgetFeasibleExperimentalDesign.tex389
1 files changed, 294 insertions, 95 deletions
diff --git a/slides/BudgetFeasibleExperimentalDesign.tex b/slides/BudgetFeasibleExperimentalDesign.tex
index 04e2f8e..ca60e04 100644
--- a/slides/BudgetFeasibleExperimentalDesign.tex
+++ b/slides/BudgetFeasibleExperimentalDesign.tex
@@ -27,6 +27,7 @@
\newcommand{\ie}{\emph{i.e.}}
\newcommand{\eg}{\emph{e.g.}}
\newcommand{\etc}{\emph{etc.}}
+\newcommand{\etal}{\emph{et al.}}
\newcommand{\reals}{\ensuremath{\mathbb{R}}}
\usefonttheme[onlymath]{serif}
@@ -83,9 +84,14 @@
\item Linear Regression
\pause
\begin{itemize}
- \item \emph{Deterministic}, truthful, budget feasible, 12.98-appriximate mechanism.
+ \item We present a \alert{deterministic, poly-time}, truthful, budget feasible, 12.98-appriximate mechanism.
\pause
- \item Singer 2010, Chen 2011:\\ \emph{Randomized}, universaly truthful, budget feasible, 7.91-approximate mechanism.
+ \vspace*{0.5cm}
+ \item Previous results:
+ \begin{itemize}
+ \item \alert{Randomized}, poly-time, universally truthful, 7.91-approximate mechanism. [Singer 2010, Chen 2011]
+ \item Deterministic, \alert{exponential time}, truthful mechanism. [Chen 2011]
+ \end{itemize}
\end{itemize}
\pause
\vspace*{1cm}
@@ -104,21 +110,23 @@
\end{frame}
-\begin{frame}{Experimental Design}
+\begin{frame}{Formulating the Problem}
\begin{center}
\includegraphics<1>[scale=0.4]{st2.pdf}
\includegraphics<2>[scale=0.4]{st3.pdf}
\includegraphics<3>[scale=0.4]{st4.pdf}
\includegraphics<4>[scale=0.4]{st5.pdf}
\includegraphics<5>[scale=0.4]{st6.pdf}
- \includegraphics<6>[scale=0.4]{st7.pdf}
- \includegraphics<7>[scale=0.4]{st8.pdf}
- \includegraphics<8>[scale=0.4]{st9.pdf}
+ \includegraphics<6>[scale=0.4]{st6a.pdf}
+ \includegraphics<7>[scale=0.4]{st6b.pdf}
+ \includegraphics<8>[scale=0.4]{st6c.pdf}
+ \includegraphics<9>[scale=0.4]{st6d.pdf}
+ \includegraphics<10->[scale=0.4]{st6e.pdf}
\end{center}
\begin{center}
\begin{overprint}
- \onslide<1>\begin{center}$N$ users (``experiment subjects'') \end{center}
+ \onslide<1>\begin{center}$N$ users (``experiment subjects''), $[N]\equiv \{1,\ldots,N\}$ \end{center}
\onslide<2>
\begin{center}
$x_i\in \reals^d$: public features (\eg, age, gender, height, \etc)
@@ -132,56 +140,157 @@
\textbf{Gaussian Linear Model.} There exists $\beta\in \reals^d$ s.t.
\begin{displaymath}
y_i = \beta^T x_i + \varepsilon_i,\quad
- \varepsilon_i\sim\mathcal{N}(0,\sigma^2), \quad i=1,\ldots,N
+ \varepsilon_i\sim\mathcal{N}(0,\sigma^2), \quad i\in [N]
\end{displaymath}
%$$y_i = \beta^Tx_i + \varepsilon_i, i=1,\ldots, N$$
%\end{center}
- \onslide<5-7>
+ \onslide<5>
\begin{center}
- Experimenter {\tt E} wishes to learn $\beta$ and can perform at most $k$ experiments.
+ Experimenter {\tt E} wishes to learn $\beta$.
\end{center}
- \onslide<8>
- \begin{center}{\tt E} computes estimate $\hat{\beta}$ of $\beta$ through ridge regression.
+ \onslide<6>\begin{center}
+ Each subject $i\in [N]$ has a cost $c_i\in \reals_+$
+ \end{center}
+ \onslide<7>
+ \begin{center}
+ \E\ has a budget $B$.
+ \end{center}
+ \onslide<8-9>
+ \begin{center}
+ \E\ pays subjects\visible<9>{; $y_i$ is revealed only if $i$ is paid.}
+ \end{center}
+ \onslide<10>
+ \begin{center}{\tt E} estimates ${\beta}$ through \emph{ridge regression}.
+ \end{center}
+ \onslide<11>
+ \begin{center}
+%\begin{enumerate}
+Goal: Determine (a) who to pay and (b) how much,\\ so that $\hat{\beta}$ is as accurate as possible.
+%\begin{align*}
+% \text{Determine (a) who to pay:}\quad &S\subseteq [N], \text{ and}\\
+% \text{(b)~ how much:}\quad &p_i\in \reals_+,~~i\in S.
+%\end{align*}
+%\end{enumerate}
+ \end{center}
+ \onslide<12->
+ \begin{center}Users are \alert{strategic}: the may lie about costs $c_i$\visible<13>{\\ Users \alert{cannot manipulate $x_i$ or $y_i$}.} %\\(tamper-proof experiments)
\end{center}
+
\end{overprint}
\end{center}
\end{frame}
-\begin{frame}{Linear (Ridge) Regression}
-%There exists $\beta\in \reals^d$ s.t.
-% \begin{displaymath}
-% y_i = \beta^T x_i + \varepsilon_i,\quad
-% \varepsilon_i\sim\mathcal{N}(0,\sigma^2), \quad i=1,\ldots,N
-% \end{displaymath}
-\visible<1->{Let $S\subset [N]\equiv \{1,\ldots,N\}$ be the set of experiments performed.\\\medskip}
-\visible<2->{Assume that \E\ has a \emph{prior} on $\beta$:
+\begin{frame}{Budget Feasible Mechanism Design [Singer 2010]}
+Let $S\subset [N]$ be the set of experiments performed, and
+ $c=[c_i]_{i\in [N]}$ be the subject costs. \\\bigskip
+\visible<2->{Let $V(S)\in\reals_+$ denote the \alert{value} of the experiments.}
+\visible<3->{$$\text{High }V(S) \Leftrightarrow \text{ estimate }\hat{\beta}(S) \text{ is close to }\beta $$}
+\visible<4->{
+%\item<1->Fix budget $B$ and value function $V:2^{[N]}\to\reals_+$
+\begin{block}{Reverse Auction Mechanism}
+ A mechanism $\mathcal{M}(c)=(S(c),p(c))$ comprises
+\begin{itemize}
+\item an \alert{allocation function} $S:\reals_+^N\to 2^{[N]}$, and
+\item a \alert{payment function} $p:\reals_+^N\to \reals_+^N$.
+\end{itemize}
+\end{block}
+}
+\end{frame}
+
+
+\begin{frame}{Budget Feasible Mechanism Design [Singer 2010] (cont'd)}
+ We seek mechanisms $\mathcal{M}=(S,p)$ that are:
+ \pause
+ \vspace{0.3cm}
+ \begin{itemize}
+ \item Normalized: $p_i=0$ if $i\notin S$.
+ \vspace{0.2cm}
+ \pause
+ \item Individually Rational: $p_i\geq c_i,\;i\in S$
+ \vspace{0.2cm}
+ \pause
+ \item Truthful: $p_i(c_i,c_{-i})-\mathbbm{1}_{i\in S(c_i,c_{-i})}\cdot c_i \geq p_i(c_i',c_{-i})-\mathbbm{1}_{i\in S(c_i',c_{-i})}\cdot c_i $
+ \vspace{0.2cm}
+ \pause
+ \item budget feasible: $\sum_{i\in S} p_i \leq B$
+ \vspace{0.2cm}
+ \end{itemize}
+
+ \pause
+ \vspace{0.3cm}
+
+ In addition, $\mathcal{M}$ must be:
+ \vspace{0.3cm}
+ \pause
+ \begin{itemize}
+ \item computationally efficient: polynomial time
+ \pause
+ \vspace{0.2cm}
+ \item constant approximation: $V(OPT) \leq \alpha V(S)$ with:
+ \begin{displaymath}
+ OPT = \arg\max_{S\subset \mathcal{A}} \left\{V(S)\mid \sum_{i\in S}c_i\leq B\right\}
+ \end{displaymath}
+ \end{itemize}
+\end{frame}
+
+
+
+
+\begin{frame}{Which Experiments Are Informative?}
+
+\alt<1>{What is the appropriate value function $V$?} {Let $S\subset [N]$ be the set of experiments performed.\\\medskip}
+%\end{overprint}
+\visible<3->{Assume that \E\ has a \emph{prior} on $\beta$:
$$\beta \sim \mathcal{N}(0,\sigma^2 R). $$}
-\visible<3->{Maximum a-posteriori estimation:
+\visible<4->{Maximum a-posteriori estimation:
\begin{align*}
\hat{\beta} &= \argmax_{\beta\in\reals^d} \mathbf{P}(\beta\mid y_i, i\in S) \\
&=\argmin_{\beta\in\reals^d} \big(\sum_{i\in S} (y_i - {\beta}^Tx_i)^2
- + \only<3-4>{\alert<4->{{\beta}^TR^{-1}\beta}\big)} \only<5>{\alert{\|\beta\|_2^2}\big)~~~~~}
+ + \only<4-5>{\alert<5->{{\beta}^TR^{-1}\beta}\big)} \only<6>{\alert{\|\beta\|_2^2}\big)~~~~~}
%= (R+{X_S}^TX_S)^{-1}X_S^Ty_S
\end{align*}
}
-\visible<4->{\hspace*{3in}\alert{Ridge Regression}\\}
-\visible<5>{\hspace*{3in}$R=I$: homotropic prior}
+\visible<5->{\hspace*{3in}\alert{Ridge Regression}\\}
+\visible<6>{\hspace*{2.8in}\alert{$R=I$: homotropic prior}}
\end{frame}
-\begin{frame}{Value Function}
-Select $S\subset [N]$, s.t. $|S|\leq k$.
-\\\bigskip
+\begin{frame}{Which Experiments Are Informative? (contd.)}
+Let $S\subset [N]$ be the set of experiments performed.\\\bigskip
+
\visible<2->{
Information Gain/D-optimality Criterion:
\begin{align*}
V(S)&=H(\beta)-H(\beta\mid y_i,i\in S)\\
-\visible<3->{& = \frac{1}{2}\log\det (R^{-1}+X_S^TX_S)}
+\visible<3->{& = \frac{1}{2}\log\det (R^{-1}+\sum_{i\in S}x_ix_i^T)}
\end{align*}
-\visible<3->{where $$X_S=[x_i^T]_{i\in S}\in \reals^{|S|\times d}$$}
+
+\visible<4->{ \emph{I.e.}: $$\alert{\text{Value of~}S=\text{Reduction of Uncertainty.}} $$}
+\vspace*{-0.7cm}
}
\end{frame}
+\begin{frame}{Full Information Setting}
+\onslide<2->{
+\begin{block}{\textsc{Experimental Design Problem (EDP)} }
+\vspace*{-0.4cm}
+\begin{align*}
+ \text{Maximize}\qquad &V(S) = \log \det (\alert<3>{I}+\sum_{i\in S}x_ix_i^T) \\
+ \text{subj. to}\qquad &\textstyle\sum_{i\in S}c_i\leq B
+\end{align*}
+\end{block}
+}
+\begin{itemize}
+\item<3->$R=I$: homotropic prior
+\item<4-> EDP is NP-hard
+\item<5-> $V$ is submodular, monotone, non-negative, and $V(\emptyset)=0$
+\item<6-> $\frac{1}{1-1/e}$-approximable (Sviridenko 2004, Krause and Guestrin 2005)
+\end{itemize}
+
+
+
+\end{frame}
+
%\begin{frame}{Classic Experimental Design}
%\begin{center}
%\includegraphics[scale=0.3]{st10.pdf}
@@ -192,7 +301,7 @@ V(S)&=H(\beta)-H(\beta\mid y_i,i\in S)\\
%\end{align*}
%\end{frame}
-
+\begin{comment}
\begin{frame}{Cardinality vs. Budget Constraint}
\begin{center}
\includegraphics<1>[scale=0.3]{st10a.pdf}
@@ -209,17 +318,6 @@ V(S)&=H(\beta)-H(\beta\mid y_i,i\in S)\\
\text{subj. to}& & |S|&\leq k
\end{align*}
- \onslide<2>\begin{center}
- Each subject $i\in [N]$ has a cost $c_i\in \reals_+$
- \end{center}
- \onslide<3>
- \begin{center}
- \E\ has a budget $B$.
- \end{center}
- \onslide<4-5>
- \begin{center}
- \E\ can pay subjects\visible<5>{; $y_i$ is revealed only if $i$ is paid.}
- \end{center}
\onslide<6->
\begin{block}{\textsc{Experimental Design Problem (EDP)} }
\vspace*{-0.4cm}
@@ -306,53 +404,22 @@ Subjects \alert{do not lie} about $y_i$ (tamper-proof experiments).}
% \end{itemize}
%\end{block}
%\end{frame}
+\end{comment}
-\begin{frame}{Objectives}
- Mechanism $\mathcal{M}=(S,p)$ must be:
- \pause
- \vspace{0.3cm}
- \begin{itemize}
- \item Normalized: $p_i=0$ if $i\notin S$.
- \vspace{0.2cm}
- \pause
- \item Individually Rational: $p_i\geq c_i,\;i\in S$
- \vspace{0.2cm}
- \pause
- \item Truthful: $p_i(c_i,c_{-i})-\mathbbm{1}_{i\in S(c_i,c_{-i})}\cdot c_i \geq p_i(c_i',c_{-i})-\mathbbm{1}_{i\in S(c_i',c_{-i})}\cdot c_i $
- \vspace{0.2cm}
- \pause
- \item budget feasible: $\sum_{i\in S} p_i \leq B$
- \vspace{0.2cm}
- \end{itemize}
-
- \pause
- \vspace{0.3cm}
-
- Mechanism must be:
- \vspace{0.3cm}
- \pause
- \begin{itemize}
- \item computationally efficient: polynomial time
- \pause
- \vspace{0.2cm}
- \item constant approximation: $V(OPT) \leq \alpha V(S)$ with:
- \begin{displaymath}
- OPT = \arg\max_{S\subset \mathcal{A}} \left\{V(S)\mid \sum_{i\in S}c_i\leq B\right\}
- \end{displaymath}
- \end{itemize}
-\end{frame}
-
-
-\begin{frame}{Known Results}
+\begin{frame}{Strategic Subjects}
When $V$ is submodular:
\vspace{1cm}
\pause
\begin{itemize}
- \item \alert{randomized}, universally truthful, budget feasible mechanism,
+ \item \alert{Randomized}, poly-time, universally truthful mechanism,
approximation ratio: $7.91$ [Singer 2010, Chen \emph{et al.}, 2011]
- \vspace{1cm}
+ \vspace{0.5cm}
\pause
- \item \alert{deterministic} mechanisms for specific submodular $V$ functions:
+ \item Deterministic, \alert{exponential time}, truthful mechanism,
+ approximation ratio: $8.34$ [Singer 2010, Chen \emph{et al.}, 2011]
+ \vspace{0.5cm}
+ \pause
+ \item \alert{Deterministic, poly-time}, truthful mechanisms for specific submodular functions $V$ :
\vspace{0.3cm}
\begin{itemize}
\item Knapsack: $2+\sqrt{2}$ [Singer 2010, Chen \emph{et al.}, 2011]
@@ -453,7 +520,7 @@ Subjects \alert{do not lie} about $y_i$ (tamper-proof experiments).}
\begin{frame}{Main result}
\end{comment}
-\section{Main Result}
+\section{Main Results}
\begin{frame}{Outline}
@@ -461,41 +528,173 @@ Subjects \alert{do not lie} about $y_i$ (tamper-proof experiments).}
\end{frame}
-\begin{frame}{Our Main Result}
- \begin{theorem}
+\begin{frame}{Main Results}
+ \begin{theorem}<2->
There exists deterministic, budget feasible, individually rational and truthful mechanism for
EDP which runs in polynomial time. Its
approximation ratio is:
\begin{displaymath}
- \frac{10e-3 + \sqrt{64e^2-24e + 9}}{2(e-1)}+\varepsilon
- \simeq 12.98 +\varepsilon
+ \frac{10e-3 + \sqrt{64e^2-24e + 9}}{2(e-1)}
+ \simeq 12.98
\end{displaymath}
\end{theorem}
+ \begin{theorem}<3->
+ There is no 2-approximate, budget feasible, individually rational and truthful mechanism for
+ EDP. \end{theorem}
+
+\end{frame}
+
+\begin{frame}{Myerson's Theorem}
+\begin{theorem}[Myerson 1981]
+ A normalized mechanism $\mathcal{M} = (S,p)$ for a single parameter auction is
+truthful iff:
+\begin{enumerate}
+\item<2->
+ $S$ is \alert{monotone}, \emph{i.e.},%\\ for any agent $i$ and $c_i' \leq c_i$, for any fixed costs $c_{-i}$ of agents in $[N]\setminus\{i\}$,
+ $$\text{for all}~c_i' \leq c_i,\quad i\in S(c_i,
+c_{-i})\text{ implies }i\in S(c_i', c_{-i}),$$ and
+\item<3->
+ subjects are paid \alert{threshold payments}, \emph{i.e.}, $$\text{for all }i\in S(c), \qquad p_i(c)=\inf\{c_i': i\in S(c_i', c_{-i})\}.$$
+\end{enumerate}
+\end{theorem}
+\uncover<4->{Focus on \alert{monotone} mechanisms.}
+
+\end{frame}
+
+\begin{frame}{A Greedy Construction for Submodular $V$}
+Construct $S_G$ by adding elements with \emph{highest marginal-value-per-cost}.\\
+\bigskip
+\visible<2->{If $S_G=S$ so far, add to it:
+\begin{align*}
+ i = \argmax_{j\in\mathcal{N}\setminus S}\frac{V(S\cup\{i\}) - V(S)}{c_i}\label{greedy}
+\end{align*}
+Stop when budget $B$ is reached.
+}
+
+\end{frame}
+
+\begin{frame}{A Randomized Mechanism for Submodular $V$ [Singer 2010]}
+ \begin{block}{Allocation $S$:}
+ \begin{itemize}
+ \item Let $i^* = \arg\max_i V(\{i\})$
+ \item Compute $S_G$ greedily over $[N]\setminus i^*$
+ \item Return $S$:
+ \begin{itemize}
+ \item Selected at random between $i^*$ and $S_G$
+ \end{itemize}
+ \end{itemize}
+ \end{block}
+ \vspace{0.5cm}
+\uncover<2->{
+[Singer 2010] Along with threshold payments, this mechanism is:
+\begin{itemize}
+\item poly-time,
+\item universally truthful,
+\item budget-feasible
+\item 7.91-approximate [Chen \etal\ 2011].
+\end{itemize}
+}
\end{frame}
-\begin{frame}{Sketch of proof}
- \begin{block}{Mechanism (Chen et. al, 2011) for submodular $V$}
+\begin{frame}{A Deterministic Mechanism for Submodular $V$ [Singer 2010]}
+ \begin{block}{Allocation $S$}
\begin{itemize}
\item Find $i^* = \arg\max_i V(\{i\})$
\item Compute $S_G$ greedily
\item Return:
\begin{itemize}
- \item $\{i^*\}$ if $V(\{i^*\}) \geq \only<1-2>{V(OPT_{-i^*})}\only<3>{\alert{V(OPT_{-i^*})}}\only<4->{\alert{L^*}}$
+ \item $\{i^*\}$ if $V(\{i^*\}) \geq \alert<3>{V(OPT_{-i^*})}$
\item $S_G$ otherwise
\end{itemize}
\end{itemize}
\end{block}
-
+ \vspace{0.5cm}
+\uncover<2->{
+[Singer 2010] Along with threshold payments, this mechanism is:
+\begin{itemize}
+\item truthful,
+\item budget-feasible
+\item 8.34-approximate [Chen \etal\ 2011].
+\end{itemize}
\vspace{0.5cm}
\pause
+ \alert{Problem:} $OPT_{-i^*}$ is NP-hard to compute
+}
+\end{frame}
- Valid mechanism, approximation ratio: 8.34
+\begin{frame}{Blueprint for Deterministic, Poly-time Algorithm}
+Azar and Gamzu 2008, Singer 2010, Chen \etal\ 2011:
+ \begin{block}{Allocation $S$}
+ \begin{itemize}
+ \item Find $i^* = \arg\max_i V(\{i\})$
+ \item Compute $S_G$ greedily
+ \item Return:
+ \begin{itemize}
+ \item $\{i^*\}$ if $V(\{i^*\}) \geq \alert{L^*}$
+ \item $S_G$ otherwise
+ \end{itemize}
+ \end{itemize}
+ \end{block}
\vspace{0.5cm}
- \pause
+ \begin{columns}[c]
+ \begin{column}{0.53\textwidth}
+Key idea:\\ Replace $OPT$ with a \alert{relaxation $L^*$}:\pause
+ \begin{itemize}
+ \item computable in polynomial time
+ \pause
+ \item close to $V(OPT_{-i^*})$
+ \pause
+ \item monotone in costs $c$
+ \end{itemize}
+ \end{column}
+ \pause
+ \begin{column}{0.45\textwidth}
+ \begin{itemize}
+ \item Knapsack (Chen \etal, 2011)
+ \item Coverage (Singer, 2012)
+ \end{itemize}
+ \end{column}
+ \end{columns}
+\end{frame}
- \alert{Problem:} $OPT_{-i^*}$ is NP-hard to compute
+\begin{frame}{Main Technical Contribution: Relaxation for EDP}
+\alt<1>{\begin{block}{\textsc{EDP} }
+\vspace*{-0.4cm}
+\begin{align*}
+ \text{Maximize}\qquad &V(S) = \log \det (I+\sum_{i\in S}x_ix_i^T) \\
+ \text{subj. to}\qquad &\textstyle\sum_{i\in S}c_i\leq B
+\end{align*}
+\end{block}
+}
+{
+\begin{block}{Convex Relaxation}
+\vspace*{-0.4cm}
+\begin{align*}
+ \text{Maximize}\qquad &L(\lambda) = \log \det (I+\sum_{i\in [N]}\lambda_ix_ix_i^T) \\
+ \text{subj. to}\qquad &\textstyle\sum_{i\in S}\lambda_ic_i\leq B, \lambda_i\in [0,1]
+\end{align*}
+\end{block}
+}
+\pause
+\pause
+ \begin{itemize}
+ \item Poly-time: $L$ is convex and self-concordant
+ \pause
+ \vspace{0.5cm}
+ \item Close to $V(OPT)$:\pause
+ \vspace{0.5cm}
+ \begin{block}{Technical Lemma}
+ \begin{displaymath}
+ L^* \leq 2 V(OPT) + V(\{i^*\})
+ \end{displaymath}
+ \end{block}
+ \end{itemize}
+ \pause
+ Proved using \emph{pipage rounding} [Ageev and Sviridenko 2004]
+\end{frame}
+\begin{comment}
\vspace{0.5cm}
\pause
@@ -518,7 +717,6 @@ Subjects \alert{do not lie} about $y_i$ (tamper-proof experiments).}
\end{column}
\end{columns}
\end{frame}
-
\begin{frame}{Sketch of proof (2)}
\begin{displaymath}
L^* = \arg\max_{\lambda\in [0,1]^n} \left\{\log\det\left(I_d + \sum_i \lambda_i x_i x_i^T\right)\mid \sum_{i=1}^n\lambda_i c_i\leq B\right\}
@@ -537,8 +735,9 @@ Subjects \alert{do not lie} about $y_i$ (tamper-proof experiments).}
\end{block}
\end{itemize}
\end{frame}
+\end{comment}
-\section{Generalization}
+\section{Extensions \& Generalization}
\begin{frame}{Outline}
\tableofcontents[currentsection]