1 files changed, 294 insertions, 95 deletions
diff --git a/slides/BudgetFeasibleExperimentalDesign.tex b/slides/BudgetFeasibleExperimentalDesign.tex
index 04e2f8e..ca60e04 100644
--- a/slides/BudgetFeasibleExperimentalDesign.tex
+++ b/slides/BudgetFeasibleExperimentalDesign.tex
@@ -27,6 +27,7 @@
 \newcommand{\ie}{\emph{i.e.}}
 \newcommand{\eg}{\emph{e.g.}}
 \newcommand{\etc}{\emph{etc.}}
+\newcommand{\etal}{\emph{et al.}}
 \newcommand{\reals}{\ensuremath{\mathbb{R}}}
 
 \usefonttheme[onlymath]{serif}
@@ -83,9 +84,14 @@
         \item Linear Regression 
 	    \pause
 	    \begin{itemize}
-                \item \emph{Deterministic}, truthful, budget feasible, 12.98-appriximate mechanism. 
+                \item We present a \alert{deterministic, poly-time}, truthful, budget feasible, 12.98-appriximate mechanism. 
 		\pause
-		\item Singer 2010, Chen 2011:\\ \emph{Randomized}, universaly truthful, budget feasible, 7.91-approximate mechanism.
+		\vspace*{0.5cm}
+		\item Previous results:
+			\begin{itemize}
+			\item  \alert{Randomized}, poly-time, universally truthful,  7.91-approximate mechanism. [Singer 2010, Chen 2011]
+			\item Deterministic, \alert{exponential time}, truthful mechanism. [Chen 2011] 
+			\end{itemize}
 	    \end{itemize}
             \pause
 	\vspace*{1cm}
@@ -104,21 +110,23 @@
 \end{frame}
 
 
-\begin{frame}{Experimental Design}
+\begin{frame}{Formulating the Problem}
    \begin{center}
         \includegraphics<1>[scale=0.4]{st2.pdf}
         \includegraphics<2>[scale=0.4]{st3.pdf}
         \includegraphics<3>[scale=0.4]{st4.pdf}
         \includegraphics<4>[scale=0.4]{st5.pdf}
         \includegraphics<5>[scale=0.4]{st6.pdf}
-        \includegraphics<6>[scale=0.4]{st7.pdf}
-        \includegraphics<7>[scale=0.4]{st8.pdf}
-        \includegraphics<8>[scale=0.4]{st9.pdf}
+        \includegraphics<6>[scale=0.4]{st6a.pdf}
+        \includegraphics<7>[scale=0.4]{st6b.pdf}
+        \includegraphics<8>[scale=0.4]{st6c.pdf}
+        \includegraphics<9>[scale=0.4]{st6d.pdf}
+        \includegraphics<10->[scale=0.4]{st6e.pdf}
     \end{center}
 
     \begin{center}
         \begin{overprint}
-        \onslide<1>\begin{center}$N$ users (``experiment subjects'') \end{center}
+        \onslide<1>\begin{center}$N$ users (``experiment subjects''), $[N]\equiv \{1,\ldots,N\}$ \end{center}
             \onslide<2>
             \begin{center}
                 $x_i\in \reals^d$: public features (\eg, age, gender, height, \etc)
@@ -132,56 +140,157 @@
                 \textbf{Gaussian Linear Model.}  There exists $\beta\in \reals^d$ s.t.
     \begin{displaymath}
         y_i = \beta^T x_i + \varepsilon_i,\quad
-        \varepsilon_i\sim\mathcal{N}(0,\sigma^2), \quad i=1,\ldots,N
+        \varepsilon_i\sim\mathcal{N}(0,\sigma^2), \quad i\in [N]
     \end{displaymath}
 %$$y_i = \beta^Tx_i + \varepsilon_i, i=1,\ldots, N$$ 
             %\end{center}
-	    \onslide<5-7>
+	    \onslide<5>
 	    \begin{center}
-		Experimenter {\tt E} wishes to learn $\beta$ and can perform  at most $k$ experiments. 
+		Experimenter {\tt E} wishes to learn $\beta$. 
 	    \end{center}
-	    \onslide<8>
-	    \begin{center}{\tt E} computes estimate $\hat{\beta}$ of $\beta$ through ridge regression.
+       \onslide<6>\begin{center}
+	Each subject $i\in [N]$ has a cost $c_i\in \reals_+$
+	\end{center}
+            \onslide<7>
+             \begin{center}
+                \E\ has a budget $B$.
+            \end{center}
+	    \onslide<8-9>
+	    \begin{center}
+		\E\ pays subjects\visible<9>{; $y_i$ is revealed only if $i$ is paid.}
+	    \end{center} 
+	    \onslide<10>
+	    \begin{center}{\tt E}  estimates ${\beta}$  through \emph{ridge regression}.
+	    \end{center}
+	    \onslide<11>
+	    \begin{center}
+%\begin{enumerate}
+Goal: Determine (a) who to pay and (b) how much,\\ so that $\hat{\beta}$ is as accurate as possible.
+%\begin{align*}
+% \text{Determine (a) who to pay:}\quad &S\subseteq [N], \text{ and}\\
+% \text{(b)~ how much:}\quad &p_i\in \reals_+,~~i\in S.
+%\end{align*}
+%\end{enumerate}
+	    \end{center}
+	    \onslide<12->
+	    \begin{center}Users are \alert{strategic}: the may lie about costs $c_i$\visible<13>{\\ Users \alert{cannot manipulate $x_i$ or $y_i$}.} %\\(tamper-proof experiments)
 	    \end{center}
+	    
         \end{overprint}
     \end{center}
 
 \end{frame}
 
-\begin{frame}{Linear (Ridge) Regression}
-%There exists $\beta\in \reals^d$ s.t.
-%    \begin{displaymath}
-%        y_i = \beta^T x_i + \varepsilon_i,\quad
-%        \varepsilon_i\sim\mathcal{N}(0,\sigma^2), \quad i=1,\ldots,N
-%    \end{displaymath}
-\visible<1->{Let $S\subset [N]\equiv \{1,\ldots,N\}$ be the set of experiments performed.\\\medskip}
-\visible<2->{Assume that \E\ has a \emph{prior} on $\beta$:
+\begin{frame}{Budget Feasible Mechanism Design [Singer 2010]}
+Let $S\subset [N]$ be the set of experiments performed, and
+  $c=[c_i]_{i\in [N]}$ be the subject costs.  \\\bigskip
+\visible<2->{Let $V(S)\in\reals_+$ denote the \alert{value} of the experiments.}
+\visible<3->{$$\text{High }V(S) \Leftrightarrow \text{ estimate }\hat{\beta}(S) \text{ is close to }\beta $$}
+\visible<4->{
+%\item<1->Fix budget $B$ and value function $V:2^{[N]}\to\reals_+$
+\begin{block}{Reverse Auction Mechanism}
+ A mechanism $\mathcal{M}(c)=(S(c),p(c))$ comprises
+\begin{itemize}
+\item an \alert{allocation function} $S:\reals_+^N\to 2^{[N]}$, and
+\item a \alert{payment function} $p:\reals_+^N\to \reals_+^N$.
+\end{itemize}
+\end{block}
+}
+\end{frame}
+
+
+\begin{frame}{Budget Feasible Mechanism Design [Singer 2010] (cont'd)}
+    We seek mechanisms $\mathcal{M}=(S,p)$  that are: 
+    \pause
+    \vspace{0.3cm}
+    \begin{itemize}
+	\item Normalized: $p_i=0$ if $i\notin S$.
+            \vspace{0.2cm}
+            \pause
+        \item Individually Rational: $p_i\geq c_i,\;i\in S$
+            \vspace{0.2cm}
+            \pause
+        \item Truthful: $p_i(c_i,c_{-i})-\mathbbm{1}_{i\in S(c_i,c_{-i})}\cdot c_i \geq p_i(c_i',c_{-i})-\mathbbm{1}_{i\in S(c_i',c_{-i})}\cdot c_i $
+            \vspace{0.2cm}
+            \pause
+        \item budget feasible: $\sum_{i\in S} p_i \leq B$
+            \vspace{0.2cm}
+    \end{itemize}
+
+    \pause
+    \vspace{0.3cm}
+
+    In addition, $\mathcal{M}$ must be:
+    \vspace{0.3cm}
+    \pause
+    \begin{itemize}
+        \item computationally efficient: polynomial time
+            \pause
+            \vspace{0.2cm}
+        \item constant approximation: $V(OPT) \leq \alpha V(S)$  with:
+            \begin{displaymath}
+                OPT = \arg\max_{S\subset \mathcal{A}} \left\{V(S)\mid \sum_{i\in S}c_i\leq B\right\}
+            \end{displaymath}
+    \end{itemize}
+\end{frame}
+
+
+
+
+\begin{frame}{Which Experiments Are Informative?}
+
+\alt<1>{What is the appropriate value function $V$?} {Let $S\subset [N]$ be the set of experiments performed.\\\medskip}
+%\end{overprint}
+\visible<3->{Assume that \E\ has a \emph{prior} on $\beta$:
 $$\beta \sim \mathcal{N}(0,\sigma^2 R). $$}
-\visible<3->{Maximum a-posteriori estimation:
+\visible<4->{Maximum a-posteriori estimation:
 \begin{align*}
     \hat{\beta} &= \argmax_{\beta\in\reals^d} \mathbf{P}(\beta\mid y_i, i\in S) \\
 &=\argmin_{\beta\in\reals^d} \big(\sum_{i\in S} (y_i - {\beta}^Tx_i)^2
-    + \only<3-4>{\alert<4->{{\beta}^TR^{-1}\beta}\big)} \only<5>{\alert{\|\beta\|_2^2}\big)~~~~~} 
+    + \only<4-5>{\alert<5->{{\beta}^TR^{-1}\beta}\big)} \only<6>{\alert{\|\beta\|_2^2}\big)~~~~~} 
 %= (R+{X_S}^TX_S)^{-1}X_S^Ty_S 
 \end{align*}
 }
-\visible<4->{\hspace*{3in}\alert{Ridge Regression}\\}
-\visible<5>{\hspace*{3in}$R=I$: homotropic prior}
+\visible<5->{\hspace*{3in}\alert{Ridge Regression}\\}
+\visible<6>{\hspace*{2.8in}\alert{$R=I$: homotropic prior}}
 \end{frame}
 
-\begin{frame}{Value Function}
-Select $S\subset [N]$, s.t. $|S|\leq k$. 
-\\\bigskip
+\begin{frame}{Which Experiments Are Informative? (contd.)}
+Let $S\subset [N]$ be the set of experiments performed.\\\bigskip
+
 \visible<2->{
 Information Gain/D-optimality Criterion:
 \begin{align*}
 V(S)&=H(\beta)-H(\beta\mid y_i,i\in S)\\
-\visible<3->{& = \frac{1}{2}\log\det (R^{-1}+X_S^TX_S)}
+\visible<3->{& = \frac{1}{2}\log\det (R^{-1}+\sum_{i\in S}x_ix_i^T)}
 \end{align*}
-\visible<3->{where $$X_S=[x_i^T]_{i\in S}\in \reals^{|S|\times d}$$}
+
+\visible<4->{ \emph{I.e.}: $$\alert{\text{Value of~}S=\text{Reduction of Uncertainty.}} $$}
+\vspace*{-0.7cm}		
 }
 \end{frame}
 
+\begin{frame}{Full Information Setting}
+\onslide<2->{
+\begin{block}{\textsc{Experimental Design Problem (EDP)} }
+\vspace*{-0.4cm}		
+\begin{align*}
+			\text{Maximize}\qquad &V(S) = \log \det (\alert<3>{I}+\sum_{i\in S}x_ix_i^T) \\
+			\text{subj. to}\qquad &\textstyle\sum_{i\in S}c_i\leq B
+\end{align*}
+\end{block}
+}
+\begin{itemize}
+\item<3->$R=I$: homotropic prior
+\item<4-> EDP is NP-hard 
+\item<5-> $V$ is submodular, monotone, non-negative, and $V(\emptyset)=0$
+\item<6-> $\frac{1}{1-1/e}$-approximable (Sviridenko 2004, Krause and Guestrin 2005)
+\end{itemize}
+	    
+
+
+\end{frame}
+
 %\begin{frame}{Classic Experimental Design}
 %\begin{center}
 %\includegraphics[scale=0.3]{st10.pdf}
@@ -192,7 +301,7 @@ V(S)&=H(\beta)-H(\beta\mid y_i,i\in S)\\
 %\end{align*}
 %\end{frame}
 	
-
+\begin{comment}
 \begin{frame}{Cardinality vs. Budget Constraint}
 \begin{center}
 \includegraphics<1>[scale=0.3]{st10a.pdf}
@@ -209,17 +318,6 @@ V(S)&=H(\beta)-H(\beta\mid y_i,i\in S)\\
 \text{subj. to}& & |S|&\leq k
 \end{align*}
  
-       \onslide<2>\begin{center}
-	Each subject $i\in [N]$ has a cost $c_i\in \reals_+$
-	\end{center}
-            \onslide<3>
-             \begin{center}
-                \E\ has a budget $B$.
-            \end{center}
-	    \onslide<4-5>
-	    \begin{center}
-		\E\ can pay subjects\visible<5>{; $y_i$ is revealed only if $i$ is paid.}
-	    \end{center} 
            \onslide<6->
 \begin{block}{\textsc{Experimental Design Problem (EDP)} }
 \vspace*{-0.4cm}		
@@ -306,53 +404,22 @@ Subjects \alert{do not lie} about $y_i$ (tamper-proof experiments).}
 %    \end{itemize}
 %\end{block}
 %\end{frame}
+\end{comment}
 
-\begin{frame}{Objectives}
-    Mechanism $\mathcal{M}=(S,p)$  must be: 
-    \pause
-    \vspace{0.3cm}
-    \begin{itemize}
-	\item Normalized: $p_i=0$ if $i\notin S$.
-            \vspace{0.2cm}
-            \pause
-        \item Individually Rational: $p_i\geq c_i,\;i\in S$
-            \vspace{0.2cm}
-            \pause
-        \item Truthful: $p_i(c_i,c_{-i})-\mathbbm{1}_{i\in S(c_i,c_{-i})}\cdot c_i \geq p_i(c_i',c_{-i})-\mathbbm{1}_{i\in S(c_i',c_{-i})}\cdot c_i $
-            \vspace{0.2cm}
-            \pause
-        \item budget feasible: $\sum_{i\in S} p_i \leq B$
-            \vspace{0.2cm}
-    \end{itemize}
-
-    \pause
-    \vspace{0.3cm}
-
-    Mechanism must be:
-    \vspace{0.3cm}
-    \pause
-    \begin{itemize}
-        \item computationally efficient: polynomial time
-            \pause
-            \vspace{0.2cm}
-        \item constant approximation: $V(OPT) \leq \alpha V(S)$  with:
-            \begin{displaymath}
-                OPT = \arg\max_{S\subset \mathcal{A}} \left\{V(S)\mid \sum_{i\in S}c_i\leq B\right\}
-            \end{displaymath}
-    \end{itemize}
-\end{frame}
-
-
-\begin{frame}{Known Results}
+\begin{frame}{Strategic Subjects}
     When $V$ is submodular:
     \vspace{1cm}
     \pause
     \begin{itemize}
-        \item \alert{randomized}, universally truthful, budget feasible mechanism,
+        \item \alert{Randomized}, poly-time, universally truthful  mechanism,
             approximation ratio: $7.91$ [Singer 2010, Chen \emph{et al.}, 2011]
-        \vspace{1cm}
+        \vspace{0.5cm}
         \pause
-        \item \alert{deterministic} mechanisms for specific submodular $V$ functions:
+        \item Deterministic, \alert{exponential time}, truthful  mechanism,
+            approximation ratio: $8.34$ [Singer 2010, Chen \emph{et al.}, 2011]
+        \vspace{0.5cm}
+        \pause
+        \item \alert{Deterministic, poly-time}, truthful mechanisms for specific submodular functions $V$ :
             \vspace{0.3cm}
             \begin{itemize}
                 \item Knapsack: $2+\sqrt{2}$ [Singer 2010, Chen \emph{et al.}, 2011]
@@ -453,7 +520,7 @@ Subjects \alert{do not lie} about $y_i$ (tamper-proof experiments).}
 \begin{frame}{Main result}
 \end{comment} 
 
-\section{Main Result}
+\section{Main Results}
 
 
 \begin{frame}{Outline}
@@ -461,41 +528,173 @@ Subjects \alert{do not lie} about $y_i$ (tamper-proof experiments).}
 \end{frame}
 
 
-\begin{frame}{Our Main Result}
-   \begin{theorem}
+\begin{frame}{Main Results}
+   \begin{theorem}<2->
         There exists deterministic,  budget feasible, individually rational and truthful mechanism for
         EDP which runs in polynomial time. Its
         approximation ratio is:
         \begin{displaymath}
-        \frac{10e-3 + \sqrt{64e^2-24e + 9}}{2(e-1)}+\varepsilon
-        \simeq 12.98 +\varepsilon
+        \frac{10e-3 + \sqrt{64e^2-24e + 9}}{2(e-1)}
+        \simeq 12.98
         \end{displaymath}
     \end{theorem}
+   \begin{theorem}<3->
+        There is no 2-approximate, budget feasible, individually rational and truthful mechanism for
+        EDP.     \end{theorem}
+
+\end{frame}
+
+\begin{frame}{Myerson's Theorem}
+\begin{theorem}[Myerson 1981]
+ A normalized mechanism $\mathcal{M} = (S,p)$ for a single parameter auction is
+truthful iff:
+\begin{enumerate}
+\item<2-> 
+ $S$ is \alert{monotone}, \emph{i.e.},%\\ for any agent $i$ and $c_i' \leq c_i$, for any fixed costs $c_{-i}$ of agents in $[N]\setminus\{i\}$,
+ $$\text{for all}~c_i' \leq c_i,\quad i\in S(c_i,
+c_{-i})\text{ implies }i\in S(c_i', c_{-i}),$$ and 
+\item<3->
+ subjects are paid \alert{threshold payments}, \emph{i.e.}, $$\text{for all }i\in S(c), \qquad p_i(c)=\inf\{c_i': i\in S(c_i', c_{-i})\}.$$
+\end{enumerate}
+\end{theorem}
+\uncover<4->{Focus on  \alert{monotone} mechanisms.}
+
+\end{frame}
+
+\begin{frame}{A Greedy Construction for Submodular $V$}
+Construct $S_G$ by adding elements with \emph{highest marginal-value-per-cost}.\\
+\bigskip
+\visible<2->{If $S_G=S$ so far, add to it: 
+\begin{align*}
+    i = \argmax_{j\in\mathcal{N}\setminus S}\frac{V(S\cup\{i\}) - V(S)}{c_i}\label{greedy}
+\end{align*}
+Stop when budget $B$ is reached.
+}
+
+\end{frame}
+
+\begin{frame}{A Randomized  Mechanism for Submodular $V$ [Singer 2010]}
+    \begin{block}{Allocation $S$:}
+        \begin{itemize}
+        \item Let $i^* = \arg\max_i V(\{i\})$
+        \item Compute $S_G$ greedily over $[N]\setminus i^*$
+	\item Return $S$:
+		\begin{itemize}
+		\item Selected at random between $i^*$ and $S_G$
+		\end{itemize}
+	\end{itemize}
+    \end{block}
+    \vspace{0.5cm}
+\uncover<2->{
+[Singer 2010] Along with threshold payments, this mechanism is:
+\begin{itemize}
+\item poly-time,
+\item universally truthful, 
+\item budget-feasible
+\item 7.91-approximate [Chen \etal\ 2011].
+\end{itemize}
+}
 \end{frame}
 
-\begin{frame}{Sketch of proof}
-    \begin{block}{Mechanism (Chen et. al, 2011) for submodular $V$}
+\begin{frame}{A Deterministic Mechanism for Submodular $V$ [Singer 2010]}
+    \begin{block}{Allocation $S$}
         \begin{itemize}
         \item Find $i^* = \arg\max_i V(\{i\})$
         \item Compute $S_G$ greedily
         \item Return:
             \begin{itemize}
-                \item $\{i^*\}$ if $V(\{i^*\}) \geq \only<1-2>{V(OPT_{-i^*})}\only<3>{\alert{V(OPT_{-i^*})}}\only<4->{\alert{L^*}}$
+                \item $\{i^*\}$ if $V(\{i^*\}) \geq \alert<3>{V(OPT_{-i^*})}$ 
                 \item $S_G$ otherwise
             \end{itemize}
         \end{itemize}
     \end{block}
-
+    \vspace{0.5cm}
+\uncover<2->{
+[Singer 2010] Along with threshold payments, this mechanism is:
+\begin{itemize}
+\item truthful, 
+\item budget-feasible
+\item 8.34-approximate [Chen \etal\ 2011].
+\end{itemize}
     \vspace{0.5cm}
     \pause
+    \alert{Problem:} $OPT_{-i^*}$ is NP-hard to compute
+}
+\end{frame}
 
-    Valid mechanism, approximation ratio: 8.34
 
+\begin{frame}{Blueprint for Deterministic, Poly-time Algorithm}
+Azar and Gamzu 2008, Singer 2010, Chen \etal\ 2011:
+    \begin{block}{Allocation $S$}
+        \begin{itemize}
+        \item Find $i^* = \arg\max_i V(\{i\})$
+        \item Compute $S_G$ greedily
+        \item Return:
+            \begin{itemize}
+                \item $\{i^*\}$ if $V(\{i^*\}) \geq \alert{L^*}$ 
+                \item $S_G$ otherwise
+            \end{itemize}
+        \end{itemize}
+    \end{block}
     \vspace{0.5cm}
-    \pause
+    \begin{columns}[c]
+        \begin{column}{0.53\textwidth}
+Key idea:\\ Replace $OPT$ with a \alert{relaxation $L^*$}:\pause
+            \begin{itemize}
+		   \item computable in polynomial time
+                    \pause
+                \item close to $V(OPT_{-i^*})$
+		    \pause
+		\item monotone in costs $c$
+            \end{itemize}
+ 	\end{column}
+	\pause
+        \begin{column}{0.45\textwidth}
+   	\begin{itemize}
+                \item Knapsack (Chen \etal, 2011)
+                \item Coverage (Singer, 2012)
+            \end{itemize}
+	\end{column}
+       \end{columns}
+\end{frame}
 
-    \alert{Problem:} $OPT_{-i^*}$ is NP-hard to compute
+\begin{frame}{Main Technical Contribution: Relaxation for EDP}
+\alt<1>{\begin{block}{\textsc{EDP} }
+\vspace*{-0.4cm}		
+\begin{align*}
+			\text{Maximize}\qquad &V(S) = \log \det (I+\sum_{i\in S}x_ix_i^T) \\
+			\text{subj. to}\qquad &\textstyle\sum_{i\in S}c_i\leq B
+\end{align*}
+\end{block}
+}
+{
+\begin{block}{Convex Relaxation}
+\vspace*{-0.4cm}		
+\begin{align*}
+			\text{Maximize}\qquad &L(\lambda) = \log \det (I+\sum_{i\in [N]}\lambda_ix_ix_i^T) \\
+			\text{subj. to}\qquad &\textstyle\sum_{i\in S}\lambda_ic_i\leq B, \lambda_i\in [0,1]
+\end{align*}
+\end{block}
+}
+\pause
+\pause
+    \begin{itemize}
+        \item Poly-time: $L$ is convex and self-concordant
+            \pause
+            \vspace{0.5cm}
+        \item Close to $V(OPT)$:\pause
+            \vspace{0.5cm}
+            \begin{block}{Technical Lemma}
+                \begin{displaymath}
+                    L^* \leq 2 V(OPT) + V(\{i^*\})
+                \end{displaymath}
+            \end{block}
+    \end{itemize}
+   \pause
+    Proved using \emph{pipage rounding} [Ageev and Sviridenko 2004]
+\end{frame}
 
+\begin{comment}
     \vspace{0.5cm}
     \pause
 
@@ -518,7 +717,6 @@ Subjects \alert{do not lie} about $y_i$ (tamper-proof experiments).}
         \end{column}
     \end{columns}
 \end{frame}
-
 \begin{frame}{Sketch of proof (2)}
     \begin{displaymath}
         L^* = \arg\max_{\lambda\in [0,1]^n} \left\{\log\det\left(I_d + \sum_i \lambda_i x_i x_i^T\right)\mid \sum_{i=1}^n\lambda_i c_i\leq B\right\}
@@ -537,8 +735,9 @@ Subjects \alert{do not lie} about $y_i$ (tamper-proof experiments).}
             \end{block}
     \end{itemize}
 \end{frame}
+\end{comment}
 
-\section{Generalization}
+\section{Extensions \& Generalization}
 
 \begin{frame}{Outline}
     \tableofcontents[currentsection]