1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
|
\documentclass[portrait]{beamer}
\usepackage[utf8]{inputenc}
\usepackage[orientation=portrait,size=custom,width=55,height=71,scale=1]{beamerposter}
\usetheme{confposter}
\usepackage{times}
\newlength{\sepwid}
\newlength{\onecolwid}
\newlength{\twocolwid}
\newlength{\threecolwid}
\setlength{\paperwidth}{22in}
\setlength{\paperheight}{28in}
\setlength{\sepwid}{0.01\paperwidth}
\setlength{\onecolwid}{0.47\paperwidth}
\setlength{\twocolwid}{0.96\paperwidth}
\setlength{\fboxrule}{2pt}
\setlength{\fboxsep}{10pt}
\usepackage{graphicx}
\usepackage{amsmath}
\title{Budget Feasible Mechanisms for Experimental Design}
\author{Thibaut Horel$^*$ \and Stratis Ioannidis$^\dag$ \and Muthu Muthukrishnan$^\ddag$}
\institute{$^*$École Normale Supérieure, $^\dag$Technicolor, $^\ddag$Rutgers University}
\begin{document}
\setlength{\belowcaptionskip}{2ex} % White space under figures
\setlength\belowdisplayshortskip{2ex} % White space under equations
\begin{frame}[t]
\begin{columns}[t]
\begin{column}{\sepwid}\end{column}
\begin{column}{\sepwid}\end{column}
\begin{column}{\onecolwid}
\begin{block}{Setting}
\begin{center}
\includegraphics[scale=1]{st.pdf}
\end{center}
$N$ users, each of them has:
\begin{itemize}
\item \textbf{public} features $x_i$ (\emph{e.g.} age, gender, height, etc.)
\item \textbf{private} data $y_i$ (\emph{e.g.} survey answer, medical test outcome)
\item cost $c_i$ to reveal their data
\end{itemize}
Experimenter \textsf{E}:
\begin{itemize}
\item pays users to reveal their private data
\item has a budget constraint $B$
\end{itemize}
\end{block}
\begin{block}{Ridge Regression}
\begin{enumerate}
\item Experimenter $E$ assumes a linear model:
\begin{displaymath}
y_i = \beta^{T}x_i + \varepsilon_i,\quad\varepsilon_i\sim\mathcal{N}(0,\sigma^2)
\end{displaymath}
and a normal prior: $\beta\sim\mathcal{N}(0,\sigma^2I_d)$.
\item Experimenter $E$ selects a set $S$ of users within his budget constraint,
and learns $\beta$ through ridge regression:
\begin{displaymath}
\hat{\beta} = \sigma^{-2}\arg\,\min_\beta\Big[ \sum_{i\in S}(y_i-\beta^Tx_i)^2 + \|\beta\|^2\Big]
\end{displaymath}
\end{enumerate}
\end{block}
\begin{block}{Value of Data}
\textbf{Goal} minimize the variance-covariance matrix of the
estimator:
\begin{displaymath}
\mathrm{Var}\, \hat{\beta} = \Big(I_d+\sum_{i\in S}x_ix_i^T\Big)^{-1}
\end{displaymath}
Minimizing the volume ($\det$) of the variance is equivalent to maximizing the information gain (entropy reduction) on $\beta$:
\begin{displaymath}
V(S) = \log\det\Big(I_d+\sum_{i\in S} x_ix_i^T\Big)
\end{displaymath}
\end{block}
\begin{block}{Experimental Design}
The experimenter selects set of user $S$ solution to:
\begin{displaymath}
\begin{split}
&\text{Maximize } V(S) = \log\det\Big(I_d+\sum_{i\in S} x_ix_i^T\Big)\\
&\text{Subject to } \sum_{i\in S} c_i\leq B
\end{split}
\end{displaymath}
\begin{itemize}
\item specific case of budgeted submodular maximization
\item there exists a poly-time $\frac{e}{e-1}$-approximate algorithm
\end{itemize}
\end{block}
\end{column}
\begin{column}{\sepwid}\end{column}
\vrule{}
\begin{column}{\sepwid}\end{column}
\begin{column}{\onecolwid}
\begin{block}{Experimental Design with Strategic Users}
\begin{itemize}
\item users might lie about the reported costs $c_i$
\item payments and selection rule must be adapted accordingly
\end{itemize}
Specific case of \textbf{budget feasible mechanism design}; the payments and selection rule must satisfy:
\begin{itemize}
\item individual rationality and truthfulness
\item budget feasibility
\end{itemize}
Can we still obtain a constant approximation ratio in polynomial time?
\end{block}
\begin{block}{Mechanism blueprint for Experimental Design}
\fbox{\parbox{0.95\textwidth}{
\begin{itemize}
\item Compute $i^* = \arg\max_i V(\{i\})$
\item Compute $L^*$ (see below)
\item Compute $S_G$ greedily:
\begin{itemize}
\item repeatedly add element with \emph{highest
marginal-value-per-cost}:
\begin{align*}
i = \argmax_{j\in[N]\setminus
S}\frac{V(S\cup\{i\}) - V(S)}{c_i}\label{greedy}
\end{align*}
\item stop when the budget is exhausted
\end{itemize}
\item Return:
\begin{itemize}
\item $i^*$ if $V(\{i^*\})>L^*$
\item $S_G$ otherwise
\end{itemize}
\end{itemize}
}}
\vspace{0.3cm}
$L^*$ must be:
\begin{itemize}
\item close to $OPT:=\max_{S\subseteq [N]\setminus \{i^*\}} \{V(S);
\sum_{i\in S}c_i\leq B\}$
\item monotone in $c$; computable in poly-time
\end{itemize}
\end{block}
\begin{block}{Convex Relaxation for Experimental Design}
We introduce the following concave function:
\begin{displaymath}
L(\lambda) = \sum_{1\leq i\leq N}\log\det\Big(I_d
+ \sum_{1\leq i\leq N} \lambda_i x_ix_i^T\Big).
\end{displaymath}
Then using $L^* = \max_\lambda \big\{L(\lambda); \sum_{1\leq i\leq N} c_i\leq B,\; \lambda_{i^*} = 0\big\}$:
\vspace{0.3cm}
\fbox{\parbox{0.95\textwidth}{
\textbf{Theorem:} \emph{The above mechanism is truthful, individually rational, budget-feasible, runs in polynomial time and its approximation \mbox{ratio} is $\simeq 12.984$.}}}
\vspace{0.5cm}
Technical challenges:
\begin{itemize}
\item $L^*$ is close to $OPT$ (shown via pipage rounding)
\item $L^*$ can be approximated monotonously in $c$
\end{itemize}
\end{block}
\begin{block}{Generalization}
\begin{itemize}
\item Generative model of the form:
\begin{displaymath}
y_i = f(x_i) + \varepsilon_i
\end{displaymath}
\item Prior knowledge: $f$ is a random variable, the entropy $H(f)$ captures the
experimenter's uncertainty.
\item Value function: information gain (entropy reduction) induced by the observed data of the users in $S$:
\begin{displaymath}
V(S) = H(f) - H(f\,|\,S)
\end{displaymath}
\end{itemize}
$V$ is again submodular and the previous framework applies.
\end{block}
\end{column}
\begin{column}{\sepwid}\end{column}
\begin{column}{\sepwid}\end{column}
\end{columns}
\end{frame}
\end{document}
|