poster_wine_2013/main.tex


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208

\documentclass[portrait]{beamer}
\usepackage[utf8]{inputenc}
\usepackage[orientation=portrait,size=custom,width=42,height=48,scale=1.5]{beamerposter}
\usetheme{confposter}
\usepackage{times}

\newlength{\sepwid}
\newlength{\onecolwid}
\newlength{\twocolwid}
\newlength{\threecolwid}
\setlength{\paperwidth}{42in}
\setlength{\paperheight}{48in}
\setlength{\sepwid}{0.01\paperwidth}
\setlength{\onecolwid}{0.47\paperwidth}
\setlength{\twocolwid}{0.96\paperwidth}
\setlength{\fboxrule}{2pt}
\setlength{\fboxsep}{10pt}
\usepackage{graphicx}
\usepackage{amsmath}

\title{Budget Feasible Mechanisms for Experimental Design}
\author{Thibaut Horel$^*$ \and Stratis Ioannidis$^\dag$ \and Muthu Muthukrishnan$^\ddag$}
\institute{$^*$Harvard University, $^\dag$Technicolor, $^\ddag$Rutgers University}

\begin{document}

\setlength{\belowcaptionskip}{2ex} % White space under figures
\setlength\belowdisplayshortskip{2ex} % White space under equations

\begin{frame}[t]
\begin{columns}[t]
\begin{column}{\sepwid}\end{column}
\begin{column}{\sepwid}\end{column}

\begin{column}{\onecolwid}

\begin{block}{Setting}
    \begin{center}
    \includegraphics[scale=1.8]{st.pdf}
\end{center}
$N$ users, each of them has:
\begin{itemize}
    \item \textbf{public} features $x_i$ (\emph{e.g.} age, gender, height, etc.)
    \item \textbf{private} data $y_i$ (\emph{e.g.} survey answer, medical test outcome)
    \item cost $c_i$ to reveal their data
\end{itemize}

Experimenter \textsf{E}:
\begin{itemize}
    \item pays users to reveal their private data
    \item has a budget constraint $B$
\end{itemize}
\end{block}
\begin{block}{Ridge Regression}
    \begin{enumerate}
        \item Experimenter $E$ assumes a linear model:
\begin{displaymath}
    y_i = \beta^{T}x_i + \varepsilon_i,\quad\varepsilon_i\sim\mathcal{N}(0,\sigma^2)
\end{displaymath}
and a normal prior: $\beta\sim\mathcal{N}(0,\sigma^2I_d)$.
\item Experimenter $E$ selects a set $S$ of users within his budget constraint,
    and learns $\beta$ through MAP estimation. Under a linear model, this leads
    to ridge regression:
\begin{displaymath}
    \hat{\beta} = \sigma^{-2}\arg\,\min_\beta\Bigg[ \sum_{i\in S}(y_i-\beta^Tx_i)^2 + \|\beta\|^2\Bigg]
\end{displaymath}
\end{enumerate}
\end{block}

\begin{block}{Value of Data}
    \textbf{Goal} minimize the variance-covariance matrix of the
    estimator:
    \begin{displaymath}
        \mathrm{Var}\, \hat{\beta} = \Big(I_d+\sum_{i\in S}x_ix_i^T\Big)^{-1} 
    \end{displaymath}
    Minimizing the volume ($\det$) of the variance is equivalent to maximizing the information gain (entropy reduction) on $\beta$:
    \begin{displaymath}
        V(S) = \log\det\Big(I_d+\sum_{i\in S} x_ix_i^T\Big)
    \end{displaymath}
Marginal value of an experiment:
\begin{displaymath}
    V(S\cup\{i\})-V(S) = \log(1+x_i^TA_S^{-1}x_i    ), \;\text{where } A_S = I_d+\sum_{i\in S}x_ix_i^T
\end{displaymath}
\begin{itemize}
    \item the value function is increasing: experiments always help
    \item the value function is submodular: $V(S\cup\{i\})-V(S)\geq V(T\cup\{i\})-V(T)$ if $T\subseteq S$
\end{itemize}
\end{block}

\begin{block}{Experimental Design}
The experimenter selects set of user $S$ solution to:
\begin{displaymath}
    \begin{split}
    &\text{Maximize } V(S) = \log\det\Big(I_d+\sum_{i\in S} x_ix_i^T\Big)\\
        &\text{Subject to } \sum_{i\in S} c_i\leq B
\end{split}
\end{displaymath}
This is known as $D$-optimal experimental design:
\begin{itemize}
    \item NP-hard problem (in dimension 1, equivalent to \textsc{Knapsack})
    \item specific case of budgeted submodular maximization
    \item there exists a poly-time $\frac{e}{e-1}$-approximate algorithm
\end{itemize}


\end{block}
\end{column}

\begin{column}{\sepwid}\end{column}
\vrule{}
\begin{column}{\sepwid}\end{column}

    \begin{column}{\onecolwid}
        \begin{block}{Experimental Design with Strategic Users}
            \begin{itemize}
                \item users might lie about the reported costs $c_i$
                \item payments and selection rule must be adapted accordingly
            \end{itemize}
            Specific case of \textbf{budget feasible mechanism design}; the payments and selection rule must satisfy:
            \begin{itemize}
                \item individual rationality and truthfulness
                \item budget feasibility
            \end{itemize}
            Can we still obtain a constant approximation ratio in polynomial time under these additional constraints?
        \end{block}
\vspace{1em}
        \begin{block}{Mechanism blueprint for Experimental Design}
            \fbox{\parbox{0.95\textwidth}{
            \begin{itemize}
                \item Compute $i^* = \arg\max_i V(\{i\})$
                \item Compute $L^*$ (see below)
                \item Compute $S_G$ greedily:
                    \begin{itemize}
                        \item repeatedly add element with \emph{highest
                            marginal-value-per-cost}:
                            \begin{align*}
                                i = \arg\max_{j\in[N]\setminus
                                S}\frac{V(S\cup\{i\}) - V(S)}{c_i}\label{greedy}
                            \end{align*}
                        \item stop when the budget is exhausted
                    \end{itemize}
                \item Return:
                    \begin{itemize}
                        \item $i^*$ if $V(\{i^*\})>L^*$
                        \item $S_G$ otherwise
                    \end{itemize}
            \end{itemize}
    }}

    \vspace{1em}

            $L^*$ must be:
            \begin{itemize}
                \item close to $OPT:=\max_{S\subseteq [N]\setminus \{i^*\}}\big\{V(S);\sum_{i\in S}c_i\leq B\big\}$, to obtain a good approximation
                \item monotone in $c$ for truthfulness
                \item computable in poly-time
            \end{itemize}
        \end{block}
        
        \begin{block}{Convex Relaxation for Experimental Design}
            We introduce the following concave function:
            \begin{displaymath}
                L(\lambda) = \sum_{1\leq i\leq N}\log\det\Big(I_d
                + \sum_{1\leq i\leq N} \lambda_i x_ix_i^T\Big).
            \end{displaymath}
            Then using $L^* = \max_\lambda \big\{L(\lambda); \sum_{1\leq i\leq N} \lambda_ic_i\leq B,\; \lambda_{i^*} = 0,\;0\leq\lambda_i\leq 1\big\}$:

            \vspace{1em}

            \fbox{\parbox{0.95\textwidth}{
            \textbf{Theorem:} \emph{The above mechanism is truthful, individually rational, budget-feasible, runs in polynomial time and its approximation \mbox{ratio} is $\simeq 12.984$.. Furthermore there is a lower bound of $2$ on the approximation ratio.}}}

            \vspace{1em}

            Technical challenges:
            \begin{itemize}
                \item $L^*$ is close to $OPT$. Let $\lambda^*$ be such that $L(\lambda^*) := L^*$, then:
                    \begin{displaymath}
                        OPT\leq L(\lambda^*)\leq 2 F(\lambda^*)\leq 2 OPT + 2 V(\{i^*\})
                    \end{displaymath}
                    where $F$ is the multi-linear extension of $V$. This is known as \textbf{pipage rounding}.
                \item $L^*$ can be approximated monotonously in $c$: add the constraint $\lambda_i\geq\alpha$ and solve the convex optimization with $\alpha$ \emph{small enough}.
            \end{itemize}
        \end{block}
        \begin{block}{Generalization}
            \begin{itemize}
                \item The experimenter assumes a generative model of the form: 
    \begin{displaymath}
        y_i = f(x_i) + \varepsilon_i
    \end{displaymath}
    and wishes to learn the unknown $f$.
\item Prior knowledge: $f$ is a random variable, the entropy $H(f)$ captures the
    experimenter's uncertainty.
\item Value function: information gain (entropy reduction) induced by the observed data of the users in $S$:
    \begin{displaymath}
        V(S) = H(f) - H(f\,|\,S)
    \end{displaymath}
    \end{itemize}
    $V$ is again submodular and the previous framework applies.
        \end{block}
    \end{column}
\begin{column}{\sepwid}\end{column}
\begin{column}{\sepwid}\end{column}
\end{columns}

\end{frame}

\end{document}