\documentclass{beamer}
\usepackage[utf8x]{inputenc}
\usepackage[greek,english]{babel}
\usepackage[LGR,T1]{fontenc}
\usepackage{amsmath}
\usepackage{algpseudocode,algorithm}
\usepackage{graphicx}
\DeclareMathOperator*{\argmax}{arg\,max}
\usetheme{Boadilla}
\title[]{Budget Feasible Mechanisms for Experimental Design}
\author[Thibaut Horel]{Thibaut Horel\\ Joint work with Stratis Ioannidis and S. Muthukrishnan}
\institute[]{}
\setbeamercovered{transparent}
\setbeamertemplate{navigation symbols}{}
%\AtBeginSection[]
%{
%\begin{frame}<beamer>
%\frametitle{Outline}
%\tableofcontents[currentsection]
%\end{frame}
%}

\begin{document}

\begin{frame}
\maketitle
\end{frame}

\section{Introduction}

\begin{frame}{Motivation}
    \begin{center}
        \includegraphics<1>[scale=0.5]{1.pdf}
        \includegraphics<2>[scale=0.5]{2.pdf}
        \includegraphics<3>[scale=0.5]{3.pdf}
        \includegraphics<4>[scale=0.5]{4.pdf}
    \end{center}
\end{frame}
\begin{frame}{Challenges}
    \pause
    \begin{itemize}
        \item Value of data?
            \vspace{1cm}
            \pause
        \item How to optimize it?
            \vspace{1cm}
            \pause
        \item Strategic users?
    \end{itemize}
\end{frame}

\begin{frame}{Contributions}
    \pause
    \begin{itemize}
        \item case of the linear regression
            \vspace{1cm}
            \pause
        \item deterministic mechanism
            \vspace{1cm}
            \pause
        \item generalization (randomized mechanism)
    \end{itemize}
\end{frame}

\section{Budget feasible mechanism design}

\begin{frame}{Outline}
\tableofcontents
\end{frame}

\begin{frame}{Outline}
    \tableofcontents[currentsection]
\end{frame}

\begin{frame}{Reverse auction}
\begin{itemize}
    \item set of $N$ sellers: $\mathcal{A} = \{1,\ldots,N\}$; a buyer
        \vspace{0.3cm}
        \pause
    \item $V$ value function of the buyer, $V:2^\mathcal{A}\rightarrow \mathbb{R}^+$
        \vspace{0.3cm}
        \pause
    \item $c_i\in\mathbb{R}^+$ price of seller's $i$ good
        \vspace{0.3cm}
        \pause
    \item $B$ budget constraint of the buyer
\end{itemize}

\vspace{0.5cm}
\pause

\begin{block}{Goal}
    \begin{itemize}
        \item Find $S\subset \mathcal{A}$ \alert{maximizing} $V(S)$
            \vspace{0.3cm}
        \item Find \alert{payment} $p_i$ to seller $i\in S$
    \end{itemize}
\end{block}
\end{frame}

\begin{frame}{Objectives}
    Payments $(p_i)_{i\in S}$ must be: 
    \pause
    \vspace{0.3cm}
    \begin{itemize}
        \item individually rational: $p_i\geq c_i,\;i\in S$
            \vspace{0.2cm}
            \pause
        \item truthful: reporting one's true cost is a dominant strategy
            \vspace{0.2cm}
            \pause
        \item budget feasible: $\sum_{i\in S} p_i \leq B$
            \vspace{0.2cm}
    \end{itemize}

    \pause
    \vspace{0.3cm}

    Mechanism must be:
    \vspace{0.3cm}
    \pause
    \begin{itemize}
        \item computationally efficient: polynomial time
            \pause
            \vspace{0.2cm}
        \item good approximation: $V(OPT) \leq \alpha V(S)$  with:
            \begin{displaymath}
                OPT = \arg\max_{S\subset \mathcal{A}} \left\{V(S)\mid \sum_{i\in S}c_i\leq B\right\}
            \end{displaymath}
    \end{itemize}
\end{frame}

\begin{frame}{Known results}
    When $V$ is submodular:
    \vspace{1cm}
    \pause
    \begin{itemize}
        \item \alert{randomized} budget feasible mechanism,
            approximation ratio: $7.91$ (Chen et al., 2011)
        \vspace{1cm}
        \pause
        \item \alert{deterministic} mechanisms for:
            \vspace{0.3cm}
            \begin{itemize}
                \item Knapsack: $2+\sqrt{2}$ (Chen et al., 2011)
                    \vspace{0.3cm}
                \item Matching: 7.37 (Singer, 2010)
                    \vspace{0.3cm}
                \item Coverage: 31 (Singer, 2012)
                    \vspace{0.3cm}
            \end{itemize}
    \end{itemize}
\end{frame}

\section{Budget feasible mechanism for linear regression}

\begin{frame}{Outline}
    \tableofcontents[currentsection]
\end{frame}

\begin{frame}{Linear Regression}
    \begin{center}
        \includegraphics<1-4>[scale=0.5]{5.pdf}
        \includegraphics<5>[scale=0.5]{6.pdf}
        \includegraphics<6>[scale=0.5]{7.pdf}
        \includegraphics<7>[scale=0.5]{8.pdf}
    \end{center}

    \begin{center}
        \begin{overprint}
        \onslide<1>\begin{center}$N$ users\end{center}
            \onslide<2>
            \begin{center}
                $x_i$: public features (e.g. age, gender, height, etc.)
            \end{center}
            \onslide<3>
            \begin{center}
                $y_i$: private data (e.g. disease, etc.)
            \end{center}
            \onslide<4>
            \begin{center}
                Gaussian Linear model: $y_i = \beta^Tx_i + \varepsilon_i$
            \end{center}
            \begin{displaymath}
                \beta^* = \arg\min_\beta \sum_i |y_i-\beta^Tx_i|^2
            \end{displaymath}
        \end{overprint}
    \end{center}
\end{frame}

\begin{frame}{Experimental design}
    \begin{itemize}
        \item Public vector of features $x_i\in\mathbb{R}^d$
        \item Private data $y_i\in\mathbb{R}$
    \end{itemize}

    \vspace{0.5cm}

    Gaussian linear model:
    \begin{displaymath}
        y_i = \beta^T x_i + \varepsilon_i,\quad\beta\in\mathbb{R}^d,\;
        \varepsilon_i\sim\mathcal{N}(0,\sigma^2)
    \end{displaymath}

    \pause 
    \vspace{0.5cm} 

    Which users to select?\pause{} Experimental design $\Rightarrow$ D-optimal criterion

    \vspace{0.5cm}

    \begin{block}{Experimental Design}
    \begin{displaymath}
        \textsf{\alert{maximize}}\quad V(S) = \log\det\left(I_d + \sum_{i\in S} x_i x_i^T\right)\quad \textsf{\alert{subject to}}\quad |S|\leq k
    \end{displaymath}
    \end{block}
\end{frame}

\begin{frame}{Budgeted Experimental design}
    \begin{displaymath}
        \textsf{\alert{maximize}}\quad V(S) = \log\det\left(I_d + \sum_{i\in S} x_i x_i^T\right)\quad \textsf{\alert{subject to}}\quad \sum_{i\in S}c_i\leq B
    \end{displaymath}

    \vspace{1cm}

    \begin{itemize}
        \item the non-strategic optimization problem is NP-hard
            \vspace{0.3cm}
            \pause
        \item $V$ is submodular
            \vspace{0.3cm}
            \pause
        \item previous results give a randomized budget feasible mechanism
            \vspace{0.3cm}
            \pause
        \item deterministic mechanism?
    \end{itemize}
\end{frame}

\begin{frame}{Main result}
    \begin{theorem}
        There exists a budget feasible, individually rational and truthful mechanism for
        budgeted experimental design which runs in polynomial time. Its
        approximation ratio is:
        \begin{displaymath}
        \frac{10e-3 + \sqrt{64e^2-24e + 9}}{2(e-1)}
        \simeq 12.98 
        \end{displaymath}
    \end{theorem}
\end{frame}

\begin{frame}{Sketch of proof}
    \begin{block}{Mechanism (Chen et. al, 2011) for submodular $V$}
        \begin{itemize}
        \item Find $i^* = \arg\max_i V(\{i\})$
        \item Compute $S_G$ greedily
        \item Return:
            \begin{itemize}
                \item $\{i^*\}$ if $V(\{i^*\}) \geq \only<1-2>{V(OPT_{-i^*})}\only<3>{\alert{V(OPT_{-i^*})}}\only<4->{\alert{L^*}}$
                \item $S_G$ otherwise
            \end{itemize}
        \end{itemize}
    \end{block}

    \vspace{0.5cm}
    \pause

    Valid mechanism, approximation ratio: 8.34

    \vspace{0.5cm}
    \pause

    \alert{Problem:} $OPT_{-i^*}$ is NP-hard to compute

    \vspace{0.5cm}
    \pause

    \begin{columns}[c]
        \begin{column}{0.53\textwidth}
            \alert{Solution:} Replace $V(OPT_{-i^*})$ with \alert<7>{$L^*$}:
            \pause
            \begin{itemize}
                \item computable in polynomial time
                    \pause
                \item close to $V(OPT_{-i^*})$
            \end{itemize}
        \end{column}
        \pause
        \begin{column}{0.45\textwidth}
            \begin{itemize}
                \item Knapsack (Chen et al., 2011)
                \item Coverage (Singer, 2012)
            \end{itemize}
        \end{column}
    \end{columns}
\end{frame}

\begin{frame}{Sketch of proof (2)}
    \begin{displaymath}
        L^* = \arg\max_{\lambda\in [0,1]^n} \left\{\log\det\left(I_d + \sum_i \lambda_i x_i x_i^T\right)\mid \sum_{i=1}^n\lambda_i c_i\leq B\right\}
    \end{displaymath}
    \vspace{1cm}
    \begin{itemize}
        \item polynomial time?\pause{} convex optimization problem
            \pause
            \vspace{0.5cm}
        \item close to $V(OPT_{-i^*})$?\pause
            \vspace{0.5cm}
            \begin{block}{Technical lemma}
                \begin{displaymath}
                    L^* \leq 2 V(OPT) + V(\{i^*\})
                \end{displaymath}
            \end{block}
    \end{itemize}
\end{frame}

\section{Generalization}

\begin{frame}{Outline}
    \tableofcontents[currentsection]
\end{frame}

\begin{frame}{Generalization}
    \begin{itemize}
        \item Generative model: $y_i = f(x_i) + \varepsilon_i,\;i\in\mathcal{A}$
        \pause
        \item prior knowledge of the experimenter: $f$ is a \alert{random variable}
        \pause
        \item \alert{uncertainty} of the experimenter: entropy $H(f)$
        \pause
        \item after observing $\{y_i,\; i\in S\}$, uncertainty: $H(f\mid S)$
    \end{itemize}

    \pause
    \vspace{0.5cm}

    \begin{block}{Value function: Information gain}
        \begin{displaymath}
            V(S) = H(f) - H(f\mid S),\quad S\subset\mathcal{A}
        \end{displaymath}
    \end{block}

    \pause
    \vspace{0.5cm}

    $V$ is \alert{submodular} $\Rightarrow$ randomized budget feasible mechanism
\end{frame}


\begin{frame}{Conclusion}
    \begin{itemize}
        \item Experimental design + Auction theory = powerful framework
        \vspace{1cm}
        \pause
        \item deterministic mechanism for the general case? other learning tasks?
            \vspace{1cm}
            \pause
        \item  approximation ratio $\simeq \alert{13}$. Lower bound: \alert{$2$}
    \end{itemize}
\end{frame}
\end{document}