1 files changed, 208 insertions, 0 deletions
diff --git a/poster_wine_2013/main.tex b/poster_wine_2013/main.tex
new file mode 100644
index 0000000..2bf9a9e
--- /dev/null
+++ b/poster_wine_2013/main.tex
@@ -0,0 +1,208 @@
+\documentclass[portrait]{beamer}
+\usepackage[utf8]{inputenc}
+\usepackage[orientation=portrait,size=custom,width=42,height=48,scale=1.5]{beamerposter}
+\usetheme{confposter}
+\usepackage{times}
+
+\newlength{\sepwid}
+\newlength{\onecolwid}
+\newlength{\twocolwid}
+\newlength{\threecolwid}
+\setlength{\paperwidth}{42in}
+\setlength{\paperheight}{48in}
+\setlength{\sepwid}{0.01\paperwidth}
+\setlength{\onecolwid}{0.47\paperwidth}
+\setlength{\twocolwid}{0.96\paperwidth}
+\setlength{\fboxrule}{2pt}
+\setlength{\fboxsep}{10pt}
+\usepackage{graphicx}
+\usepackage{amsmath}
+
+\title{Budget Feasible Mechanisms for Experimental Design}
+\author{Thibaut Horel$^*$ \and Stratis Ioannidis$^\dag$ \and Muthu Muthukrishnan$^\ddag$}
+\institute{$^*$Harvard University, $^\dag$Technicolor, $^\ddag$Rutgers University}
+
+\begin{document}
+
+\setlength{\belowcaptionskip}{2ex} % White space under figures
+\setlength\belowdisplayshortskip{2ex} % White space under equations
+
+\begin{frame}[t]
+\begin{columns}[t]
+\begin{column}{\sepwid}\end{column}
+\begin{column}{\sepwid}\end{column}
+
+\begin{column}{\onecolwid}
+
+\begin{block}{Setting}
+    \begin{center}
+    \includegraphics[scale=1.8]{st.pdf}
+\end{center}
+$N$ users, each of them has:
+\begin{itemize}
+    \item \textbf{public} features $x_i$ (\emph{e.g.} age, gender, height, etc.)
+    \item \textbf{private} data $y_i$ (\emph{e.g.} survey answer, medical test outcome)
+    \item cost $c_i$ to reveal their data
+\end{itemize}
+
+Experimenter \textsf{E}:
+\begin{itemize}
+    \item pays users to reveal their private data
+    \item has a budget constraint $B$
+\end{itemize}
+\end{block}
+\begin{block}{Ridge Regression}
+    \begin{enumerate}
+        \item Experimenter $E$ assumes a linear model:
+\begin{displaymath}
+    y_i = \beta^{T}x_i + \varepsilon_i,\quad\varepsilon_i\sim\mathcal{N}(0,\sigma^2)
+\end{displaymath}
+and a normal prior: $\beta\sim\mathcal{N}(0,\sigma^2I_d)$.
+\item Experimenter $E$ selects a set $S$ of users within his budget constraint,
+    and learns $\beta$ through MAP estimation. Under a linear model, this leads
+    to ridge regression:
+\begin{displaymath}
+    \hat{\beta} = \sigma^{-2}\arg\,\min_\beta\Bigg[ \sum_{i\in S}(y_i-\beta^Tx_i)^2 + \|\beta\|^2\Bigg]
+\end{displaymath}
+\end{enumerate}
+\end{block}
+
+\begin{block}{Value of Data}
+    \textbf{Goal} minimize the variance-covariance matrix of the
+    estimator:
+    \begin{displaymath}
+        \mathrm{Var}\, \hat{\beta} = \Big(I_d+\sum_{i\in S}x_ix_i^T\Big)^{-1} 
+    \end{displaymath}
+    Minimizing the volume ($\det$) of the variance is equivalent to maximizing the information gain (entropy reduction) on $\beta$:
+    \begin{displaymath}
+        V(S) = \log\det\Big(I_d+\sum_{i\in S} x_ix_i^T\Big)
+    \end{displaymath}
+Marginal value of an experiment:
+\begin{displaymath}
+    V(S\cup\{i\})-V(S) = \log(1+x_i^TA_S^{-1}x_i    ), \;\text{where } A_S = I_d+\sum_{i\in S}x_ix_i^T
+\end{displaymath}
+\begin{itemize}
+    \item the value function is increasing: experiments always help
+    \item the value function is submodular: $V(S\cup\{i\})-V(S)\geq V(T\cup\{i\})-V(T)$ if $T\subseteq S$
+\end{itemize}
+\end{block}
+
+\begin{block}{Experimental Design}
+The experimenter selects set of user $S$ solution to:
+\begin{displaymath}
+    \begin{split}
+    &\text{Maximize } V(S) = \log\det\Big(I_d+\sum_{i\in S} x_ix_i^T\Big)\\
+        &\text{Subject to } \sum_{i\in S} c_i\leq B
+\end{split}
+\end{displaymath}
+This is known as $D$-optimal experimental design:
+\begin{itemize}
+    \item NP-hard problem (in dimension 1, equivalent to \textsc{Knapsack})
+    \item specific case of budgeted submodular maximization
+    \item there exists a poly-time $\frac{e}{e-1}$-approximate algorithm
+\end{itemize}
+
+
+\end{block}
+\end{column}
+
+\begin{column}{\sepwid}\end{column}
+\vrule{}
+\begin{column}{\sepwid}\end{column}
+
+    \begin{column}{\onecolwid}
+        \begin{block}{Experimental Design with Strategic Users}
+            \begin{itemize}
+                \item users might lie about the reported costs $c_i$
+                \item payments and selection rule must be adapted accordingly
+            \end{itemize}
+            Specific case of \textbf{budget feasible mechanism design}; the payments and selection rule must satisfy:
+            \begin{itemize}
+                \item individual rationality and truthfulness
+                \item budget feasibility
+            \end{itemize}
+            Can we still obtain a constant approximation ratio in polynomial time under these additional constraints?
+        \end{block}
+\vspace{1em}
+        \begin{block}{Mechanism blueprint for Experimental Design}
+            \fbox{\parbox{0.95\textwidth}{
+            \begin{itemize}
+                \item Compute $i^* = \arg\max_i V(\{i\})$
+                \item Compute $L^*$ (see below)
+                \item Compute $S_G$ greedily:
+                    \begin{itemize}
+                        \item repeatedly add element with \emph{highest
+                            marginal-value-per-cost}:
+                            \begin{align*}
+                                i = \arg\max_{j\in[N]\setminus
+                                S}\frac{V(S\cup\{i\}) - V(S)}{c_i}\label{greedy}
+                            \end{align*}
+                        \item stop when the budget is exhausted
+                    \end{itemize}
+                \item Return:
+                    \begin{itemize}
+                        \item $i^*$ if $V(\{i^*\})>L^*$
+                        \item $S_G$ otherwise
+                    \end{itemize}
+            \end{itemize}
+    }}
+
+    \vspace{1em}
+
+            $L^*$ must be:
+            \begin{itemize}
+                \item close to $OPT:=\max_{S\subseteq [N]\setminus \{i^*\}}\big\{V(S);\sum_{i\in S}c_i\leq B\big\}$, to obtain a good approximation
+                \item monotone in $c$ for truthfulness
+                \item computable in poly-time
+            \end{itemize}
+        \end{block}
+        
+        \begin{block}{Convex Relaxation for Experimental Design}
+            We introduce the following concave function:
+            \begin{displaymath}
+                L(\lambda) = \sum_{1\leq i\leq N}\log\det\Big(I_d
+                + \sum_{1\leq i\leq N} \lambda_i x_ix_i^T\Big).
+            \end{displaymath}
+            Then using $L^* = \max_\lambda \big\{L(\lambda); \sum_{1\leq i\leq N} \lambda_ic_i\leq B,\; \lambda_{i^*} = 0,\;0\leq\lambda_i\leq 1\big\}$:
+
+            \vspace{1em}
+
+            \fbox{\parbox{0.95\textwidth}{
+            \textbf{Theorem:} \emph{The above mechanism is truthful, individually rational, budget-feasible, runs in polynomial time and its approximation \mbox{ratio} is $\simeq 12.984$.. Furthermore there is a lower bound of $2$ on the approximation ratio.}}}
+
+            \vspace{1em}
+
+            Technical challenges:
+            \begin{itemize}
+                \item $L^*$ is close to $OPT$. Let $\lambda^*$ be such that $L(\lambda^*) := L^*$, then:
+                    \begin{displaymath}
+                        OPT\leq L(\lambda^*)\leq 2 F(\lambda^*)\leq 2 OPT + 2 V(\{i^*\})
+                    \end{displaymath}
+                    where $F$ is the multi-linear extension of $V$. This is known as \textbf{pipage rounding}.
+                \item $L^*$ can be approximated monotonously in $c$: add the constraint $\lambda_i\geq\alpha$ and solve the convex optimization with $\alpha$ \emph{small enough}.
+            \end{itemize}
+        \end{block}
+        \begin{block}{Generalization}
+            \begin{itemize}
+                \item The experimenter assumes a generative model of the form: 
+    \begin{displaymath}
+        y_i = f(x_i) + \varepsilon_i
+    \end{displaymath}
+    and wishes to learn the unknown $f$.
+\item Prior knowledge: $f$ is a random variable, the entropy $H(f)$ captures the
+    experimenter's uncertainty.
+\item Value function: information gain (entropy reduction) induced by the observed data of the users in $S$:
+    \begin{displaymath}
+        V(S) = H(f) - H(f\,|\,S)
+    \end{displaymath}
+    \end{itemize}
+    $V$ is again submodular and the previous framework applies.
+        \end{block}
+    \end{column}
+\begin{column}{\sepwid}\end{column}
+\begin{column}{\sepwid}\end{column}
+\end{columns}
+
+\end{frame}
+
+\end{document}