diff options
Diffstat (limited to 'poster_wine_2013/main.tex')
| -rw-r--r-- | poster_wine_2013/main.tex | 208 |
1 files changed, 208 insertions, 0 deletions
diff --git a/poster_wine_2013/main.tex b/poster_wine_2013/main.tex new file mode 100644 index 0000000..2bf9a9e --- /dev/null +++ b/poster_wine_2013/main.tex @@ -0,0 +1,208 @@ +\documentclass[portrait]{beamer} +\usepackage[utf8]{inputenc} +\usepackage[orientation=portrait,size=custom,width=42,height=48,scale=1.5]{beamerposter} +\usetheme{confposter} +\usepackage{times} + +\newlength{\sepwid} +\newlength{\onecolwid} +\newlength{\twocolwid} +\newlength{\threecolwid} +\setlength{\paperwidth}{42in} +\setlength{\paperheight}{48in} +\setlength{\sepwid}{0.01\paperwidth} +\setlength{\onecolwid}{0.47\paperwidth} +\setlength{\twocolwid}{0.96\paperwidth} +\setlength{\fboxrule}{2pt} +\setlength{\fboxsep}{10pt} +\usepackage{graphicx} +\usepackage{amsmath} + +\title{Budget Feasible Mechanisms for Experimental Design} +\author{Thibaut Horel$^*$ \and Stratis Ioannidis$^\dag$ \and Muthu Muthukrishnan$^\ddag$} +\institute{$^*$Harvard University, $^\dag$Technicolor, $^\ddag$Rutgers University} + +\begin{document} + +\setlength{\belowcaptionskip}{2ex} % White space under figures +\setlength\belowdisplayshortskip{2ex} % White space under equations + +\begin{frame}[t] +\begin{columns}[t] +\begin{column}{\sepwid}\end{column} +\begin{column}{\sepwid}\end{column} + +\begin{column}{\onecolwid} + +\begin{block}{Setting} + \begin{center} + \includegraphics[scale=1.8]{st.pdf} +\end{center} +$N$ users, each of them has: +\begin{itemize} + \item \textbf{public} features $x_i$ (\emph{e.g.} age, gender, height, etc.) + \item \textbf{private} data $y_i$ (\emph{e.g.} survey answer, medical test outcome) + \item cost $c_i$ to reveal their data +\end{itemize} + +Experimenter \textsf{E}: +\begin{itemize} + \item pays users to reveal their private data + \item has a budget constraint $B$ +\end{itemize} +\end{block} +\begin{block}{Ridge Regression} + \begin{enumerate} + \item Experimenter $E$ assumes a linear model: +\begin{displaymath} + y_i = \beta^{T}x_i + \varepsilon_i,\quad\varepsilon_i\sim\mathcal{N}(0,\sigma^2) +\end{displaymath} +and a normal prior: $\beta\sim\mathcal{N}(0,\sigma^2I_d)$. +\item Experimenter $E$ selects a set $S$ of users within his budget constraint, + and learns $\beta$ through MAP estimation. Under a linear model, this leads + to ridge regression: +\begin{displaymath} + \hat{\beta} = \sigma^{-2}\arg\,\min_\beta\Bigg[ \sum_{i\in S}(y_i-\beta^Tx_i)^2 + \|\beta\|^2\Bigg] +\end{displaymath} +\end{enumerate} +\end{block} + +\begin{block}{Value of Data} + \textbf{Goal} minimize the variance-covariance matrix of the + estimator: + \begin{displaymath} + \mathrm{Var}\, \hat{\beta} = \Big(I_d+\sum_{i\in S}x_ix_i^T\Big)^{-1} + \end{displaymath} + Minimizing the volume ($\det$) of the variance is equivalent to maximizing the information gain (entropy reduction) on $\beta$: + \begin{displaymath} + V(S) = \log\det\Big(I_d+\sum_{i\in S} x_ix_i^T\Big) + \end{displaymath} +Marginal value of an experiment: +\begin{displaymath} + V(S\cup\{i\})-V(S) = \log(1+x_i^TA_S^{-1}x_i ), \;\text{where } A_S = I_d+\sum_{i\in S}x_ix_i^T +\end{displaymath} +\begin{itemize} + \item the value function is increasing: experiments always help + \item the value function is submodular: $V(S\cup\{i\})-V(S)\geq V(T\cup\{i\})-V(T)$ if $T\subseteq S$ +\end{itemize} +\end{block} + +\begin{block}{Experimental Design} +The experimenter selects set of user $S$ solution to: +\begin{displaymath} + \begin{split} + &\text{Maximize } V(S) = \log\det\Big(I_d+\sum_{i\in S} x_ix_i^T\Big)\\ + &\text{Subject to } \sum_{i\in S} c_i\leq B +\end{split} +\end{displaymath} +This is known as $D$-optimal experimental design: +\begin{itemize} + \item NP-hard problem (in dimension 1, equivalent to \textsc{Knapsack}) + \item specific case of budgeted submodular maximization + \item there exists a poly-time $\frac{e}{e-1}$-approximate algorithm +\end{itemize} + + +\end{block} +\end{column} + +\begin{column}{\sepwid}\end{column} +\vrule{} +\begin{column}{\sepwid}\end{column} + + \begin{column}{\onecolwid} + \begin{block}{Experimental Design with Strategic Users} + \begin{itemize} + \item users might lie about the reported costs $c_i$ + \item payments and selection rule must be adapted accordingly + \end{itemize} + Specific case of \textbf{budget feasible mechanism design}; the payments and selection rule must satisfy: + \begin{itemize} + \item individual rationality and truthfulness + \item budget feasibility + \end{itemize} + Can we still obtain a constant approximation ratio in polynomial time under these additional constraints? + \end{block} +\vspace{1em} + \begin{block}{Mechanism blueprint for Experimental Design} + \fbox{\parbox{0.95\textwidth}{ + \begin{itemize} + \item Compute $i^* = \arg\max_i V(\{i\})$ + \item Compute $L^*$ (see below) + \item Compute $S_G$ greedily: + \begin{itemize} + \item repeatedly add element with \emph{highest + marginal-value-per-cost}: + \begin{align*} + i = \arg\max_{j\in[N]\setminus + S}\frac{V(S\cup\{i\}) - V(S)}{c_i}\label{greedy} + \end{align*} + \item stop when the budget is exhausted + \end{itemize} + \item Return: + \begin{itemize} + \item $i^*$ if $V(\{i^*\})>L^*$ + \item $S_G$ otherwise + \end{itemize} + \end{itemize} + }} + + \vspace{1em} + + $L^*$ must be: + \begin{itemize} + \item close to $OPT:=\max_{S\subseteq [N]\setminus \{i^*\}}\big\{V(S);\sum_{i\in S}c_i\leq B\big\}$, to obtain a good approximation + \item monotone in $c$ for truthfulness + \item computable in poly-time + \end{itemize} + \end{block} + + \begin{block}{Convex Relaxation for Experimental Design} + We introduce the following concave function: + \begin{displaymath} + L(\lambda) = \sum_{1\leq i\leq N}\log\det\Big(I_d + + \sum_{1\leq i\leq N} \lambda_i x_ix_i^T\Big). + \end{displaymath} + Then using $L^* = \max_\lambda \big\{L(\lambda); \sum_{1\leq i\leq N} \lambda_ic_i\leq B,\; \lambda_{i^*} = 0,\;0\leq\lambda_i\leq 1\big\}$: + + \vspace{1em} + + \fbox{\parbox{0.95\textwidth}{ + \textbf{Theorem:} \emph{The above mechanism is truthful, individually rational, budget-feasible, runs in polynomial time and its approximation \mbox{ratio} is $\simeq 12.984$.. Furthermore there is a lower bound of $2$ on the approximation ratio.}}} + + \vspace{1em} + + Technical challenges: + \begin{itemize} + \item $L^*$ is close to $OPT$. Let $\lambda^*$ be such that $L(\lambda^*) := L^*$, then: + \begin{displaymath} + OPT\leq L(\lambda^*)\leq 2 F(\lambda^*)\leq 2 OPT + 2 V(\{i^*\}) + \end{displaymath} + where $F$ is the multi-linear extension of $V$. This is known as \textbf{pipage rounding}. + \item $L^*$ can be approximated monotonously in $c$: add the constraint $\lambda_i\geq\alpha$ and solve the convex optimization with $\alpha$ \emph{small enough}. + \end{itemize} + \end{block} + \begin{block}{Generalization} + \begin{itemize} + \item The experimenter assumes a generative model of the form: + \begin{displaymath} + y_i = f(x_i) + \varepsilon_i + \end{displaymath} + and wishes to learn the unknown $f$. +\item Prior knowledge: $f$ is a random variable, the entropy $H(f)$ captures the + experimenter's uncertainty. +\item Value function: information gain (entropy reduction) induced by the observed data of the users in $S$: + \begin{displaymath} + V(S) = H(f) - H(f\,|\,S) + \end{displaymath} + \end{itemize} + $V$ is again submodular and the previous framework applies. + \end{block} + \end{column} +\begin{column}{\sepwid}\end{column} +\begin{column}{\sepwid}\end{column} +\end{columns} + +\end{frame} + +\end{document} |
