paper.tex


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181

\documentclass{acm_proc_article-sp}
\usepackage[utf8]{inputenc}
\usepackage{amsmath,amsfonts}
\usepackage{algorithm}
\usepackage{algpseudocode}
\newtheorem{lemma}{Lemma}
\newtheorem{fact}{Fact}
\newtheorem{example}{Example}
\newtheorem{prop}{Proposition}
\newtheorem{theorem}{Theorem}
\newcommand*{\defeq}{\stackrel{\text{def}}{=}}
\newcommand{\var}{\mathop{\mathrm{Var}}}
\newcommand{\condexp}[2]{\mathop{\mathbb{E}}\left[#1|#2\right]}
\newcommand{\expt}[1]{\mathop{\mathbb{E}}\left[#1\right]}
\newcommand{\norm}[1]{\lVert#1\rVert}
\newcommand{\tr}[1]{#1^*}
\newcommand{\ip}[2]{\langle #1, #2 \rangle}
\newcommand{\mse}{\mathop{\mathrm{MSE}}}
\DeclareMathOperator{\trace}{tr}
\DeclareMathOperator*{\argmax}{arg\,max}
\title{Budgeted mechanism for optimal experiment design}
\begin{document}

\section{Intro}

\begin{itemize}
    \item already existing field of experiment design: survey-like setup, what
    are the best points to include in your experiment. Measure of the
    usefulness of the data: variance-reduction or entropy-reduction
    \item nowadays, there is also a big focus on purchasing data: paid surveys,
    mechanical turk, etc. that add economic aspects to the problem of
    experiment desgin
    \item recent advances (Singer, Chen) in the field of budgeted mechanisms
    \item we study ridge regression, very widely used in statistical learning,
    and treat it as a problem of budgeted experiment design
    \item we make the following contributions: ...
    \item extension to a more general setup which includes a wider class of
    machine learning problems
\end{itemize}

\section{Problem formulation}

\subsection{Notations}

\begin{itemize}
    \item If $x\in\mathbf{R}^d$, $x^*$ will denote the transpose of vector $x$.
        If $(x, y)\in\mathbf{R}^d\times\mathbf{R}^d$, $x^*y$ is the standard
        inner product of $\mathbf{R}^d$.
    \item We will often use the following order over symmetric matrices: if $A$
        and $B$ are two $d\times d$ real symmetric matrices, we write that
        $A\leq B$ iff:
        \begin{displaymath}
            \forall x\in\mathbf{R}^d,\quad x^*Ax \leq x^*Bx 
        \end{displaymath}
        That is, iff $B-A$ is symmetric semi-definite positive.
    \item Let us recall this property of the ordered defined above: if $A$ and
        $B$ are two symmetric definite positive matrices such that $A\leq B$,
        then $A^{-1}\leq B^{-1}$.
\end{itemize}

\subsection{Data model}
\begin{itemize}
    \item set of $n$ users $\mathcal{N} = \{1,\ldots, n\}$
    \item each user has a public vector of features $x_i\in\mathbf{R}^d$ and some
        undisclosed variable $y_i\in\mathbf{R}$
    \item we assume that the data has been normalized: $\forall
        i\in\mathcal{N}$, $\norm{x_i}\leq 1$
    \item \textbf{Ridge regression model:}
        \begin{itemize}
            \item $y_i = \beta^*x_i + \varepsilon_i$
            \item $\varepsilon_i \sim \mathcal{N}(0,\sigma^2)$, 
                $(\varepsilon_i)_{i\in \mathcal{N}}$ are mutually independent.
            \item prior knowledge of $\beta$: $\beta\sim\mathcal{N}(0,\kappa I_d)$
            \item $\mu = \frac{\kappa}{\sigma^2}$ is the regularization factor.
            \item after the variables $y_i$ are disclosed, the experimenter
                does \emph{maximum a posteriori estimation} which is equivalent
                under Gaussian prior to solving the ridge regression
                maximisation problem:
                \begin{displaymath}
                    \beta_{\text{max}} = \argmax \sum_i (y_i - \beta^*x_i)^2
                    + \frac{1}{\mu}\sum_i \norm{\beta}_2^2
                \end{displaymath}
        \end{itemize}
\end{itemize}

\subsection{Economics}

Value function: following the experiment design theory, the value of
data is the decrease of uncertainty about the model. Common measure of
uncertainty, the entropy. Thus, the value of a set $S$ of users is:
\begin{displaymath}
    V(S) = \mathbb{H}(\beta) - \mathbb{H}(\beta|Y_S)
\end{displaymath}
where $Y_S = \{y_i,\,i\in S\}$ is the set of observations.

In our case (under Gaussian prior):
\begin{align*}
    \forall S\subset\mathcal{N},\; V(S)
    & = \frac{1}{2}\log\det\left(I_d
        + \mu\sum_{i\in S} x_ix_i^*\right)\\
    & \defeq \frac{1}{2}\log\det A(S)
\end{align*}

\begin{lemma}[Marginal contribution]
    \begin{displaymath}
        \Delta_i V(S)\defeq V(S\cup\{i\}) - V(S) 
        = \frac{1}{2}\log\left(1 + \mu x_i^*A(S)^{-1}x_i\right)
    \end{displaymath}
\end{lemma}

\begin{proof}
    We have:
    \begin{align*}
        V(S\cup\{i\}) & = \frac{1}{2}\log\det A(S\cup\{i\})\\
        & = \frac{1}{2}\log\det\left(A(S) + \mu x_i x_i^*\right)\\
        & = V(S) + \frac{1}{2}\log\det\left(I_d + \mu A(S)^{-1}x_i
    x_i^*\right)\\
        & = V(S) + \frac{1}{2}\log\left(1 + \mu x_i^* A(S)^{-1}x_i\right)
    \end{align*}
    where the last equality comes from Sylvester's determinant formula.
\end{proof}
\begin{itemize}
    \item each user $i$ has a cost $c_i$
    \item the auctioneer has a budget constraint $B$
    \item optimisation problem:
        \begin{displaymath}
            OPT(V,\mathcal{N}, B) = \max_{S\subset\mathcal{N}} \left\{ V(S)\,|\,
            \sum_{i\in S}c_i\leq B\right\}
        \end{displaymath}
\end{itemize}

Problem, the costs are reported by users, they can lie. It is an auction
problem. Goal design a mechanism (selection and payment scheme) which is:
\begin{itemize}
    \item truthful
    \item individually rational
    \item budget feasible
    \item has a good approximation ratio
\end{itemize}

\section{Main result}

\begin{algorithm}\label{mechanism}
    \caption{Budget feasible mechanism for ridge regression}
    \begin{algorithmic}[1]
    \State $i^* \gets \argmax_{j\in\mathcal{N}}V(j)$
    \State $x^* \gets \argmax_{x\in[0,1]^n} \{L_{\mathcal{N}\setminus\{i^*\}}(x)
                                    \,|\, c(x)\leq B\}$
        \Statex
        \If{$L(x^*) < CV(i^*)$}
            \State \textbf{return} $\{i^*\}$
        \Else
            \State $i \gets \argmax_{1\leq j\leq n}\frac{V(j)}{c_j}$
            \State $S \gets \emptyset$
            \While{$c_i\leq \frac{B}{2}\frac{V(S\cup\{i\})-V(S)}{V(S\cup\{i\})}$}
                \State $S \gets S\cup\{i\}$
                \State $i \gets \argmax_{j\in\mathcal{N}\setminus S}
                \frac{V(S\cup\{j\})-V(S)}{c_j}$
            \EndWhile
            \State \textbf{return} $S$
        \EndIf
    \end{algorithmic}
\end{algorithm}

Choosing $C=...$ we have:

\begin{theorem}
    The mechanism is truthful, individually rational, budget feasible.
    Furthermore, by choosing $C= ...$ we get an approximation ratio of:
    \begin{multline*}
        1 + C^* = \frac{5e-1 + C_\mu(4e-1)}{2C_\mu(e-1)}\\
        + \frac{\sqrt{C_\mu^2(1+2e)^2
        + 2C_\mu(14e^2+5e+1) + (1-5e)^2}}{2C_\mu(e-1)}
    \end{multline*}
\end{theorem}

\section{General setup}

\section{Conclusion}

\end{document}