Slides for dimacs workshop at Stanford

author: Thibaut Horel <thibaut.horel@gmail.com> 2013-02-07 14:37:17 -0800
committer: Thibaut Horel <thibaut.horel@gmail.com> 2013-02-07 14:37:17 -0800
commit: 58ccbe548085600cc2fbb005726191438b450bc7 (patch)
tree: 2f55375aff50a738072090cb1f4df7e830bbfe1d /slides/slides.tex
parent: a148a6f046173cd114e009c6ef50e77d5d5941f4 (diff)
download: recommendation-58ccbe548085600cc2fbb005726191438b450bc7.tar.gz
1 files changed, 250 insertions, 243 deletions
diff --git a/slides/slides.tex b/slides/slides.tex
index 81f6deb..1bccd02 100644
--- a/slides/slides.tex
+++ b/slides/slides.tex
@@ -4,21 +4,21 @@
 \usepackage[LGR,T1]{fontenc}
 \usepackage{amsmath}
 \usepackage{algpseudocode,algorithm}
+\usepackage{graphicx}
 \DeclareMathOperator*{\argmax}{arg\,max}
 \usetheme{Boadilla}
-\usecolortheme{beaver}
-\title[Budget Feasible Mechanisms for Experimental Design]{Budget Feasible Mechanisms for Experimental Design}
-\author{xx\\ Joint work with yy and zz}
+\title[]{Budget Feasible Mechanisms for Experimental Design}
+\author[Thibaut Horel]{Thibaut Horel\\ Joint work with Stratis Ioannidis and S. Muthukrishnan}
 \institute[]{}
 \setbeamercovered{transparent}
-
-\AtBeginSection[]
-{
-\begin{frame}<beamer>
-\frametitle{Outline}
-\tableofcontents[currentsection]
-\end{frame}
-}
+\setbeamertemplate{navigation symbols}{}
+%\AtBeginSection[]
+%{
+%\begin{frame}<beamer>
+%\frametitle{Outline}
+%\tableofcontents[currentsection]
+%\end{frame}
+%}
 
 \begin{document}
 
@@ -26,229 +26,214 @@
 \maketitle
 \end{frame}
 
-\begin{frame}
-\tableofcontents
-\end{frame}
+\section{Introduction}
 
-\section{Experimental Design}
-\begin{frame}{Experimental Design}
-    An experimenter is about to conduct an experiment with users. 
-    \begin{itemize}
-        \item $n$ users
-        \item $x_i\in\mathbb{R}^d$: public features of user $i$
-        \item $c_i\in\mathbb{R}^+$: cost of user $i$ for taking part in the experiment
-    \end{itemize}
+\begin{frame}{Motivation}
+    \begin{center}
+        \includegraphics<1-4>[scale=0.5]{1.pdf}
+        \includegraphics<5>[scale=0.5]{2.pdf}
+        \includegraphics<6>[scale=0.5]{3.pdf}
+        \includegraphics<7>[scale=0.5]{4.pdf}
+    \end{center}
 
-    When paid $c_i$, user $i$ reveals $y_i$:
-    \begin{displaymath}
-        y_i = \beta^T x_i + \varepsilon_i,\quad\beta\in\mathbb{R}^d,\;
-        \varepsilon_i\sim\mathcal{N}(0,\sigma^2)
-    \end{displaymath}
-    $\beta$ is \alert{unknown} to the experimenter.
-
-    \begin{block}{}
-    \alert{Question:} budget constraint $B$, which users to pick to learn $\beta$ as
-    well as possible without spending more than $B$?
-    \end{block}
+    \begin{center}
+        \begin{overprint}
+        \onslide<1>\begin{center}$N$ users\end{center}
+            \onslide<2>
+            \begin{center}
+                $x_i\in\mathbb{R}^d$: public features (e.g. age, sex, height, etc.)
+            \end{center}
+            \onslide<3>
+            \begin{center}
+                $y_i\in\mathbb{R}$: private date (e.g. disease, etc.)
+            \end{center}
+            \onslide<4>
+            \begin{center}
+                Linear model: $y_i = \beta^Tx_i + \varepsilon_i$
+            \end{center}
+            \begin{displaymath}
+                \beta^* = \arg\min_\beta \sum_i |y_i-\beta^Tx_i|^2
+            \end{displaymath}
+        \end{overprint}
+    \end{center}
 \end{frame}
 
-\begin{frame}{Evaluation criterion}
-    \begin{block}{}
-        \alert{Question:} How to measure the helpfulness of a set of users $S$ in learning $\beta$?
-    \end{block}
-    
-    \vspace{0.5cm}
 
-    Bayesian approach:
+\begin{frame}{Challenges}
     \begin{itemize}
-        \item \emph{a priori} knowledge: prior distribution over $\beta$
-        \item the entropy of $\beta$, $H(\beta)$ measures the uncertainty of the experimenter
-        \item after observing $Y_S = \{y_i,\; i\in S\}$, uncertainty becomes $H(\beta\mid Y_S)$
+        \item Value of data?
+            \vspace{1cm}
+            \pause
+        \item How to optimize it?
+            \vspace{1cm}
+            \pause
+        \item Strategic users?
     \end{itemize}
-
-    \vspace{0.5cm}
-
-    The value of $S$ is the \alert{entropy reduction}:
-    \begin{displaymath}
-        V(S) = H(\beta) - H(\beta\mid Y_S)
-    \end{displaymath}
 \end{frame}
 
-\begin{frame}{Value function}
-    Gaussian linear model:
-    \begin{displaymath}
-        y_i = \beta^T x_i + \varepsilon_i,\quad\beta\in\mathbb{R}^d,\;
-        \varepsilon_i\sim\mathcal{N}(0,\sigma^2)
-    \end{displaymath}
-    
-    Typical prior distribution $\beta\sim\mathcal{N}(0,\tau I_d)$
-   
-    \vspace{0.5cm}
-
-    \begin{block}{}
-        Under the Gaussian linear model and Gaussian prior distribution:
-        \begin{displaymath}
-            V(S) = \log\det\Big(I_d + \lambda\sum_{i\in S}x_i x_i^T \Big)
-        \end{displaymath}
-    \end{block}
-
-    \vspace{0.5cm}
-
-    \alert{Remark:} $V$ can be computed efficiently using a Cholesky decomposition
+\begin{frame}{Contributions}
+    \begin{itemize}
+        \item case of the linear regression
+            \vspace{1cm}
+            \pause
+        \item deterministic mechanism
+            \vspace{1cm}
+            \pause
+        \item generalization (randomized mechanism)
+    \end{itemize}
 \end{frame}
 
-\section{Non-strategic case}
-\begin{frame}{Optimization Problem}
-    \begin{block}{P1}
-        \begin{align*}
-            &\text{maximize}\; V(S) = \log\det\Big(I_d + \lambda\sum_{i\in S}x_i x_i^T \Big)\\
-            &\text{subject to}\; \sum_{i\in S}c_i\leq B\quad\text{(Knapsack constraint)}
-        \end{align*}
-    \end{block}
+\section{Budget feasible mechanism design}
 
-    \onslide<2->
-    Good news:
-
-    $V$ is submodular (diminishing return):
-    \begin{displaymath}
-        \forall S\subseteq T\, \forall i,\; V(S\cup\{i\}) - V(S)\geq
-        V(T\cup\{i\}) - V(T)
-    \end{displaymath}
-
-    \onslide<3->
-    Bad news:
-
-    Maximizing a submodular function under a knapsack constraint is NP-hard (even
-    in the specific case of $\log\det$)
+\begin{frame}{Outline}
+\tableofcontents
 \end{frame}
 
-\begin{frame}{Approximation}
-    \begin{definition}[Approximation ratio]
-        Let $OPT$ denote the optimal solution to P1, $S$ is an $\alpha$-approximation
-        if:
-        \begin{displaymath}
-            V(OPT)\leq \alpha V(S)
-        \end{displaymath}
-    \end{definition}
-
-    \vspace{0.5cm}
-
-    \begin{theorem}[Sviridenko]
-        \begin{enumerate}
-            \item There is an algorithm which returns a set $S$ which is a $\frac{e}{e-1}$-approximation
-            \item No algorithm can do better unless P=NP
-        \end{enumerate}
-    \end{theorem}
-
-    \vspace{0.5cm}
-
-    The algorithm is a combination of an enumeration method and a greedy heuristic.
+\begin{frame}{Outline}
+    \tableofcontents[currentsection]
 \end{frame}
 
-\begin{frame}{Greedy algorithm}
-    \begin{block}{Greedy($V$, $B$)}  
-        \begin{algorithmic}
-            \State $i \gets \argmax_{1\leq j\leq n}\frac{V(j)}{c_j}$
-            \State $S \gets \emptyset$
-            \While{$c_i\leq B - \sum_{j\in S} c_j$}
-                \State $S \gets S\cup\{i\}$
-                \State $i \gets \argmax_{1\leq j\leq n}
-                \frac{V(S\cup\{j\})-V(S)}{c_j}$
-            \EndWhile
-            \State \textbf{return} $S$
-        \end{algorithmic}
-    \end{block}
+\begin{frame}{Reverse auction}
+\begin{itemize}
+    \item set of $N$ sellers $\mathcal{A} = \{1,\ldots,N\}$ and a buyer
+        \vspace{0.3cm}
+        \pause
+    \item $V$ value function of the buyer, $V:2^\mathcal{A}\rightarrow \mathbb{R}^+$
+        \vspace{0.3cm}
+        \pause
+    \item $c_i\in\mathbb{R}^+$ price of seller's $i$ good
+        \vspace{0.3cm}
+        \pause
+    \item $B$ budget constraint of the buyer
+\end{itemize}
 
-    \vspace{0.5cm}
+\vspace{0.5cm}
+\pause
 
-    \begin{lemma}
-        Greedy has an \alert{unbounded} approximation ratio.
-    \end{lemma}
+\begin{block}{Goal}
+    \begin{itemize}
+        \item Find $S\subset \mathcal{A}$ \alert{maximizing} $V(S)$
+            \vspace{0.3cm}
+        \item Find \alert{payment} $p_i$ to seller $i$
+    \end{itemize}
+\end{block}
 \end{frame}
 
-\begin{frame}
-    \begin{block}{MaxGreedy}
-        \begin{algorithmic}
-            \State $i^* \gets \argmax_{1\leq j\leq n} V(j)$
-            \State $S \gets$ Greedy($V$, $B$)
-            \If{$V(i^*)\geq V(S)$}
-                \State \textbf{return} $\{i^*\}$
-            \Else
-                \State \textbf{return} $S$
-            \EndIf
-        \end{algorithmic}
-    \end{block}
+\begin{frame}{Objectives}
+    Payments $(p_i)_{i\in S}$ must be: 
+    \pause
+    \vspace{0.3cm}
+    \begin{itemize}
+        \item individually rational: $p_i\geq c_i,\;i\in S$
+            \vspace{0.2cm}
+            \pause
+        \item truthful: reporting one's true cost is a dominant strategy
+            \vspace{0.2cm}
+            \pause
+        \item budget feasible: $\sum_{i\in S} p_i \leq B$
+            \vspace{0.2cm}
+    \end{itemize}
 
-    \vspace{0.5cm}
+    \pause
+    \vspace{0.3cm}
 
-    \begin{lemma}
-        MaxGreedy is a $\frac{5e}{e-1}$ approximation
-    \end{lemma}
+    Mechanism must be:
+    \vspace{0.3cm}
+    \pause
+    \begin{itemize}
+        \item computationally efficient: polynomial time
+            \pause
+            \vspace{0.2cm}
+        \item good approximation: $V(OPT) \leq \alpha V(S)$  with:
+            \begin{displaymath}
+                OPT = \arg\max_{S\subset \mathcal{A}} \left\{V(S)\mid \sum_{i\in S}c_i\leq B\right\}
+            \end{displaymath}
+    \end{itemize}
 \end{frame}
 
-\section{Strategic case}
-
-\begin{frame}{Reverse auction}
+\begin{frame}{Known results}
+    When $V$ is submodular:
+    \vspace{1cm}
+    \pause
+    \begin{itemize}
+        \item \alert{randomized} budget feasible mechanism,
+            approximation ratio: $7.91$ (Chen et al., 2011)
+        \vspace{1cm}
+        \pause
+        \item \alert{deterministic} mechanisms for:
+            \vspace{0.3cm}
+            \begin{itemize}
+                \item Knapsack: $2+\sqrt{2}$ (Chen et al., 2011)
+                    \vspace{0.3cm}
+                \item Matching: 7.37 (Singer, 2010)
+                    \vspace{0.3cm}
+                \item Coverage: 31 (Singer, 2012)
+                    \vspace{0.3cm}
+            \end{itemize}
+    \end{itemize}
+\end{frame}
 
-    \alert{Problem:} In real life, user are strategic agents. They lie about their costs.
-    This is a \alert{reverse auction}: the experimenter wants to buy data
-    from users.
+\section{Budget feasible mechanism for linear regression}
 
-    \vspace{0.5cm}
+\begin{frame}{Outline}
+    \tableofcontents[currentsection]
+\end{frame}
 
-    \alert{Goal:} Design an auction mechanism:
+\begin{frame}{Linear regression}
+    Each user $i\in\mathcal{A}$ has:
     \begin{itemize}
-        \item a selection rule $s: (c_1,\ldots,c_n) \mapsto (s_1,\ldots,s_n)$,
-            $s_i\in\{0,1\}$
-        \item a payment rule $p: (c_1,\ldots,c_n) \mapsto (p_1,\ldots,p_n)$,
-            $p_i\in\mathbb{R}^+$
+        \item Public feature vector $x_i\in\mathbb{R}^d$
+        \item Private data $y_i\in\mathbb{R}$
     \end{itemize}
 
+    \pause
     \vspace{0.5cm}
 
-    As usual in auction mechanism design, we want payments to be:
-    \begin{itemize}
-        \item individually rational: $p_i\geq c_i$
-        \item truthful: reporting one's true cost is a dominant strategy
-        \item normalized: $s_i=0$ implies $p_i=0$
-    \end{itemize}
-\end{frame}
+    Gaussian linear model:
+    \begin{displaymath}
+        y_i = \beta^T x_i + \varepsilon_i,\quad\beta\in\mathbb{R}^d,\;
+        \varepsilon_i\sim\mathcal{N}(0,\sigma^2)
+    \end{displaymath}
 
-\begin{frame}{Optimization problem for budget feasible auctions}
-    \begin{block}{P2}
-        \begin{align*}
-            &\text{maximize}\; V\big(s(c_1,\ldots,c_n)\big) = \log\det\Big(I_d + \lambda\sum_{i\in S}x_i x_i^T \Big)\\
-            &\text{subject to}\; \sum_{i\in S}\alert{p_i}\leq B\quad\text{(budget feasible)}
-        \end{align*}
-        where the payments are further constrained to be individually rational, truthful
-        and normalized
-    \end{block}
+    \pause 
+    \vspace{0.5cm} 
 
-    \vspace{1cm}
+    Which users to select?\pause{} Experimental design $\Rightarrow$ D-optimal criterion
 
-    This is again NP-hard $\Rightarrow$ we seek good approximation ratios
+    \vspace{0.5cm}
+
+    \begin{block}{Experimental Design}
+    \begin{displaymath}
+        \textsf{\alert{maximize}}\quad V(S) = \log\det\left(I_d + \sum_{i\in S} x_i x_i^T\right),\quad S\subset\mathcal{A}
+    \end{displaymath}
+    \end{block}
 \end{frame}
 
-\begin{frame}{Myerson's theorem}
-    \begin{theorem}[Myerson]
-        Let $(s, p)$ be a mechanism. If:
-        \begin{itemize}
-            \item $s$ is \alert{monotonic}: if $c_i\leq c_i'$ then $s_i \geq s_i'$
-            \item users are paid \alert{threshold payments}: $p_i = \inf\{c_i':i\notin s(c_i',c_{-i})\}$
-        \end{itemize}
-        Then, the mechanism is individually rational and truthful.
-    \end{theorem}
+\begin{frame}{Experimental design}
+    \begin{displaymath}
+        \textsf{\alert{maximize}}\quad V(S) = \log\det\left(I_d + \sum_{i\in S} x_i x_i^T\right),\quad S\subset\mathcal{A}
+    \end{displaymath}
 
     \vspace{1cm}
 
-    \alert{Consequence:} It suffices to find a monotonic selection rule such that
-    the threshold payments are within the budget constraint.
+    \begin{itemize}
+        \item the non-strategic optimization problem is NP-hard
+            \vspace{0.3cm}
+            \pause
+        \item $V$ is submodular
+            \vspace{0.3cm}
+            \pause
+        \item previous results give a randomized budget feasible mechanism
+            \vspace{0.3cm}
+            \pause
+        \item deterministic mechanism?
+    \end{itemize}
 \end{frame}
 
 \begin{frame}{Main result}
     \begin{theorem}
-        There is a budget feasible, individually rational and truthful mechanism for
-        the experimental design problem which runs in polynomial time. Its
+        There exists a budget feasible, individually rational and truthful mechanism for
+        experimental design which runs in polynomial time. Its
         approximation ratio is:
         \begin{displaymath}
         \frac{10e-3 + \sqrt{64e^2-24e + 9}}{2(e-1)}
@@ -257,80 +242,102 @@
     \end{theorem}
 \end{frame}
 
-
-\begin{frame}{Greedy revisited}
-    \begin{block}{Greedy\only<2>{\alert{'}}($V$, $B$)}  
-        \begin{algorithmic}
-            \State $i \gets \argmax_{1\leq j\leq n}\frac{V(j)}{c_j}$
-            \State $S \gets \emptyset$
-            \While{$c_i\leq \only<1>{B - \sum_{j\in S} c_j} \only<2->{\alert{\frac{B}{2}\frac{V(S\cup\{i\})-V(S)}{V(S\cup\{i\})}}}$}
-                \State $S \gets S\cup\{i\}$
-                \State $i \gets \argmax_{1\leq j\leq n}
-                \frac{V(S\cup\{j\})-V(S)}{c_j}$
-            \EndWhile
-            \State \textbf{return} $S$
-        \end{algorithmic}
+\begin{frame}{Sketch of proof}
+    \begin{block}{Mechanism (Chen et. al, 2011)}
+        \begin{itemize}
+        \item Find $i^* = \arg\max_i V(\{i\})$
+        \item Compute $S_G$ greedily
+        \item Return:
+            \begin{itemize}
+                \item $\{i^*\}$ if $V(\{i^*\}) \geq \only<1-2>{V(OPT_{-i^*})}\only<3>{\alert{V(OPT_{-i^*})}}\only<4->{\alert{L^*}}$
+                \item $S_G$ otherwise
+            \end{itemize}
+        \end{itemize}
     \end{block}
 
-    \vspace{1cm}
-
-    \alert{Problem:} The threshold payments are not budget feasible.
-\end{frame}
+    \vspace{0.5cm}
+    \pause
 
-\begin{frame}{MaxGreedy revisited}
-    \begin{block}{MaxGreedy}
-        \begin{algorithmic}
-            \State $i^* \gets \argmax_{1\leq j\leq n} V(j)$
-            \State $S \gets$ Greedy\alert{'}($V$, $B$)
-            \If{$V(i^*)\geq \only<1>{V(S)}\only<2,3>{\alert{OPT_{-i^*}}}\only<4>{\alert{OPT'_{i^*}}}$}
-                \State \textbf{return} $\{i^*\}$
-            \Else
-                \State \textbf{return} $S$
-            \EndIf
-        \end{algorithmic}
-    \end{block}
+    Valid mechanism, approximation ratio: 8.34
 
     \vspace{0.5cm}
+    \pause
 
-    \only<1>{\alert{Problem:} MaxGreedy is not monotonic}
+    \alert{Problem:} $OPT_{-i^*}$ is NP-hard to compute
 
-    \only<2,3>{\alert{Solution 1:} use $OPT_{-i^*} = \max_{S, i^*\notin S}\{V(S) \mid \sum_{j\in S} c_j\}$. Gives a 8.34 approximation}
+    \vspace{0.5cm}
+    \pause
 
-    \only<3>{\alert{Problem:} $OPT_{-i^*}$ cannot be computed in polynomial time}
+    \alert{Solution:} Replace $V(OPT_{-i^*})$ with $L^*$:
+    \pause
+    \begin{itemize}
+        \item computable in polynomial time
+            \pause
+        \item close to $V(OPT_{-i^*})$
+    \end{itemize}
+\end{frame}
 
-    \only<4->{\alert{Solution 2:} use a concave relaxation:
+\begin{frame}{Sketch of proof (2)}
     \begin{displaymath}
-        OPT'_{i^*} = \max_{\lambda\in[0, 1]^n, \lambda_{i^*} = 0} L(\lambda)
-        = \max_{\lambda\in[0, 1]^n, \lambda_{i^*} = 0}\log\det\big(I_d + \sum_{i=1}^n \lambda_i x_ix_i^T\big)
+        L^* = \arg\max_{\lambda\in [0,1]^n} \left\{\log\det\left(I_d + \sum_i \lambda_i x_i x_i^T\right)\mid \sum_{i=1}^n\lambda_i c_i\leq B\right\}
     \end{displaymath}
-    }
+    \vspace{1cm}
+    \begin{itemize}
+        \item polynomial time?\pause{} convex optimization problem
+            \pause
+            \vspace{0.5cm}
+        \item close to $V(OPT_{-i^*})$?\pause
+            \vspace{0.5cm}
+            \begin{block}{Technical lemma}
+                \begin{displaymath}
+                    L^* \leq 2 V(OPT) + V(\{i^*\})
+                \end{displaymath}
+            \end{block}
+    \end{itemize}
 \end{frame}
 
-\begin{frame}{Proof}
-\begin{itemize}
-    \item Monotonicity: OK
-    \pause
-    \item Budget feasibility: OK
-    \pause
-    \item Approximation ratio: what do we lose by using a concave relaxation? Powerful
-        method, \alert{pipage rounding} [Ageev, Sviridenko]
+\section{Generalization}
+
+\begin{frame}{Outline}
+    \tableofcontents[currentsection]
+\end{frame}
+
+\begin{frame}{Generalization}
     \begin{itemize}
-        \item let's introduce $F(\lambda) = \mathbb{E}\Big[\log\det(I_d+\sum_{i=1}^n \mu_ix_ix_i^T)\Big]$, $\mu_i\sim\mathcal{B}(\lambda_i)$
-        \item $L(\lambda) \leq 2 F(\lambda)$ (technical lemma)
-        \item $F$ is well-behaved: $\lambda$ can be round to a binary vector $\bar{\lambda}$ s.t $F(\lambda)\leq F(\bar{\lambda})$ 
-        \item $OPT'_{-i^*}\leq 2 OPT_{-i^*} + V(i^*)$ 
-        \item we can conclude by using the approximation ratio for $OPT_{-i^*}$
+        \item Generative model: $y_i = f(x_i) + \varepsilon_i,\;i\in\mathcal{A}$
+        \pause
+        \item prior knowledge of the experimenter: $f$ is a \alert{random variable}
+        \pause
+        \item \alert{uncertainty} of the experimenter: entropy $H(f)$
+        \pause
+        \item after observing $\{y_i,\; i\in S\}$, uncertainty: $H(f\mid S)$
     \end{itemize}
-\end{itemize}
+
+    \pause
+    \vspace{0.5cm}
+
+    \begin{block}{Value function: entropy reduction}
+        \begin{displaymath}
+            V(S) = H(f) - H(f\mid Y_S),\quad S\subset\mathcal{A}
+        \end{displaymath}
+    \end{block}
+
+    \pause
+    \vspace{0.5cm}
+
+    $V$ is \alert{submodular} $\Rightarrow$ randomized budget feasible mechanism
 \end{frame}
 
+
 \begin{frame}{Conclusion}
     \begin{itemize}
-        \item Experimental design + Auction theory = powerful framework when working with user data
+        \item Experimental design + Auction theory = powerful framework
         \vspace{1cm}
-        \item Can we extend this to learning tasks beyond linear regression?
+        \pause
+        \item deterministic mechanism for the general case? other learning tasks?
             \vspace{1cm}
-        \item $\simeq \alert{13}$ approximation ratio. Lower bound: \alert{$2$}
+            \pause
+        \item  approximation ratio $\simeq \alert{13}$. Lower bound: \alert{$2$}
     \end{itemize}
 \end{frame}
 \end{document}
author	Thibaut Horel <thibaut.horel@gmail.com>	2013-02-07 14:37:17 -0800
committer	Thibaut Horel <thibaut.horel@gmail.com>	2013-02-07 14:37:17 -0800
commit	58ccbe548085600cc2fbb005726191438b450bc7 (patch)
tree	2f55375aff50a738072090cb1f4df7e830bbfe1d /slides/slides.tex
parent	a148a6f046173cd114e009c6ef50e77d5d5941f4 (diff)
download	recommendation-58ccbe548085600cc2fbb005726191438b450bc7.tar.gz