\documentclass[portrait]{beamer}
\usepackage[utf8]{inputenc}
\usepackage[orientation=portrait,size=custom,width=55,height=71,scale=1]{beamerposter}
\usetheme{confposter}
\usepackage{times}
\newlength{\sepwid}
\newlength{\onecolwid}
\newlength{\twocolwid}
\newlength{\threecolwid}
\setlength{\paperwidth}{22in}
\setlength{\paperheight}{28in}
\setlength{\sepwid}{0.01\paperwidth}
\setlength{\onecolwid}{0.47\paperwidth}
\setlength{\twocolwid}{0.96\paperwidth}
\setlength{\fboxrule}{2pt}
\setlength{\fboxsep}{10pt}

\usepackage{graphicx}
\usepackage{amsmath}

\title{Budget Feasible Mechanisms for Experimental Design}
\author{Thibaut Horel$^*$ \and Stratis Ioannidis$^\dag$ \and Muthu Muthukrishnan$^\ddag$}
\institute{$^*$École Normale Supérieure, $^\dag$Technicolor, $^\ddag$Rutgers University}

\begin{document}


\setlength{\belowcaptionskip}{2ex} % White space under figures
\setlength\belowdisplayshortskip{2ex} % White space under equations

\begin{frame}[t]
\begin{columns}[t]
\begin{column}{\sepwid}\end{column}
\begin{column}{\sepwid}\end{column}

\begin{column}{\onecolwid}

\begin{block}{Setting}
    \begin{center}
    \includegraphics[scale=1]{st.pdf}
\end{center}
$N$ users, each of them has:
\begin{itemize}
    \item \textbf{public} features $x_i$ (\emph{e.g.} age, gender, height, etc.)
    \item \textbf{private} data $y_i$ (\emph{e.g.} survey answer, medical test outcome)
    \item cost $c_i$ to reveal their data
\end{itemize}

Experimenter \textsf{E}:
\begin{itemize}
    \item pays users to reveal their private data
    \item has a budget constraint $B$
\end{itemize}
\end{block}

\begin{block}{Ridge Regression}
    \begin{enumerate}
        \item Experimenter $E$ assumes a linear model:
\begin{displaymath}
    y_i = \beta^{T}x_i + \varepsilon_i,\quad\varepsilon_i\sim\mathcal{N}(0,\sigma^2)
\end{displaymath}
and a normal prior: $\beta\sim\mathcal{N}(0,\sigma^2I_d)$.
\item Experimenter $E$ selects a set $S$ of users within his budget constraint,
and learns $\beta$ through ridge regression:
\begin{displaymath}
    \hat{\beta} = \sigma^{-2}\arg\,\min_\beta\Big[ \sum_{i\in S}(y_i-\beta^Tx_i)^2 + \|\beta\|^2\Big]
\end{displaymath}
\end{enumerate}
\end{block}

\begin{block}{Value of Data}
    \textbf{Goal} minimize the variance-covariance matrix of the
    estimator:
    \begin{displaymath}
        \mathrm{Var}\, \hat{\beta} = \Big(I_d+\sum_{i\in S}x_ix_i^T\Big)^{-1} 
    \end{displaymath}
    Minimizing the volume ($\det$) of the variance is equivalent to maximizing the information gain (entropy reduction) on $\beta$:
    \begin{displaymath}
        V(S) = \log\det\Big(I_d+\sum_{i\in S} x_ix_i^T\Big)
    \end{displaymath}
\end{block}

\begin{block}{Experimental Design}
The experimenter selects set of user $S$ solution to:
\begin{displaymath}
    \begin{split}
    &\text{Maximize } V(S) = \log\det\Big(I_d+\sum_{i\in S} x_ix_i^T\Big)\\
        &\text{Subject to } \sum_{i\in S} c_i\leq B
\end{split}
\end{displaymath}
\begin{itemize}
    \item specific case of budgeted submodular maximization
    \item there exists a poly-time $\frac{e}{e-1}$-approximate algorithm
\end{itemize}


\end{block}
\end{column}

\begin{column}{\sepwid}\end{column}
\vrule{}
\begin{column}{\sepwid}\end{column}

    \begin{column}{\onecolwid}
        \begin{block}{Experimental Design with Strategic Users}
            \begin{itemize}
                \item users might lie about the reported costs $c_i$
                \item payments and selection rule must be adapted accordingly
            \end{itemize}
            Specific case of \textbf{budget feasible mechanism design}; the payments and selection rule must satisfy:
            \begin{itemize}
                \item individual rationality and truthfulness
                \item budget feasibility
            \end{itemize}
            Can we still obtain a constant approximation ratio in polynomial time?
        \end{block}

        \begin{block}{Mechanism blueprint for Experimental Design}
            \fbox{\parbox{0.95\textwidth}{
            \begin{itemize}
                \item Compute $i^* = \arg\max_i V(\{i\})$
                \item Compute $L^*$ (see below)
                \item Compute $S_G$ greedily:
                    \begin{itemize}
                        \item repeatedly add element with \emph{highest
                            marginal-value-per-cost}:
                            \begin{align*}
                                i = \argmax_{j\in[N]\setminus
                                S}\frac{V(S\cup\{i\}) - V(S)}{c_i}\label{greedy}
                            \end{align*}
                        \item stop when the budget is exhausted
                    \end{itemize}
                \item Return:
                    \begin{itemize}
                        \item $i^*$ if $V(\{i^*\})>L^*$
                        \item $S_G$ otherwise
                    \end{itemize}
            \end{itemize}
    }}

    \vspace{0.3cm}

            $L^*$ must be:
            \begin{itemize}
                \item close to $OPT:=\max_{S\subseteq [N]\setminus \{i^*\}} \{V(S);
                    \sum_{i\in S}c_i\leq B\}$
                \item monotone in $c$; computable in poly-time
            \end{itemize}
        \end{block}
        
        \begin{block}{Convex Relaxation for Experimental Design}
            We introduce the following concave function:
            \begin{displaymath}
                L(\lambda) = \sum_{1\leq i\leq N}\log\det\Big(I_d
                + \sum_{1\leq i\leq N} \lambda_i x_ix_i^T\Big).
            \end{displaymath}
            Then using $L^* = \max_\lambda \big\{L(\lambda); \sum_{1\leq i\leq N} c_i\leq B,\; \lambda_{i^*} = 0\big\}$:

            \vspace{0.3cm}

            \fbox{\parbox{0.95\textwidth}{
            \textbf{Theorem:} \emph{The above mechanism is truthful, individually rational, budget-feasible, runs in polynomial time and its approximation \mbox{ratio} is $\simeq 12.984$.}}}

            \vspace{0.5cm}

            Technical challenges:
            \begin{itemize}
                \item $L^*$ is close to $OPT$ (shown via pipage rounding)
                \item $L^*$ can be approximated monotonously in $c$
            \end{itemize}
        \end{block}
        \begin{block}{Generalization}
            \begin{itemize}
                \item Generative model of the form: 
    \begin{displaymath}
        y_i = f(x_i) + \varepsilon_i
    \end{displaymath}
\item Prior knowledge: $f$ is a random variable, the entropy $H(f)$ captures the
    experimenter's uncertainty.
\item Value function: information gain (entropy reduction) induced by the observed data of the users in $S$:
    \begin{displaymath}
        V(S) = H(f) - H(f\,|\,S)
    \end{displaymath}
    \end{itemize}
    $V$ is again submodular and the previous framework applies.
        \end{block}
    \end{column}
\begin{column}{\sepwid}\end{column}
\begin{column}{\sepwid}\end{column}
\end{columns}

\end{frame}

\end{document}