1 files changed, 45 insertions, 4 deletions
diff --git a/paper.tex b/paper.tex
index ba848a4..a593d95 100644
--- a/paper.tex
+++ b/paper.tex
@@ -21,15 +21,17 @@
 \title{Budgeted mechanism for optimal experiment design}
 \begin{document}
 
+\maketitle
+
 \section{Intro}
 
 \begin{itemize}
     \item already existing field of experiment design: survey-like setup, what
-    are the best points to include in your experiment. Measure of the
-    usefulness of the data: variance-reduction or entropy-reduction
+    are the best points to include in your experiment? Measure of the
+    usefulness of the data: variance-reduction or entropy-reduction.
     \item nowadays, there is also a big focus on purchasing data: paid surveys,
     mechanical turk, etc. that add economic aspects to the problem of
-    experiment desgin
+    experiment design
     \item recent advances (Singer, Chen) in the field of budgeted mechanisms
     \item we study ridge regression, very widely used in statistical learning,
     and treat it as a problem of budgeted experiment design
@@ -59,6 +61,45 @@
 \end{itemize}
 
 \subsection{Data model}
+
+There is a set of $n$ users, $\mathcal{N} = \{1,\ldots, n\}$. Each user
+$i\in\mathcal{N}$ has a public vector of features $x_i\in\mathbf{R}^d$ and an
+undisclosed piece of information $y_i\in\mathbf{R}$. We assume that the data
+has already been normalized so that $\norm{x_i}_2\leq 1$ for all
+$i\in\mathcal{N}$.
+
+The experimenter is going to select a set of users and ask them to reveal their
+private piece of information. We are interested in a \emph{survey setup}: the
+experimenter has not seen the data yet, but he wants to know which users he
+should be selecting. His goal is to learn the model underlying the data. Here,
+we assume a linear model:
+\begin{displaymath}
+    \forall i\in\mathcal{N},\quad y_i = \beta^* x_i + \varepsilon_i
+\end{displaymath}
+where $\beta\in\mathbf{R}^d$ and $\varepsilon_i\in\mathbf{R}$ follows a normal
+distribution of mean $0$ and variance $\sigma^2$.
+
+After observing the data, the experimenter could simply do linear regression to
+learn the model parameter $\beta$. However, in a more general setup, the
+experimenter has a prior knowledge about $\beta$, a distribution over
+$\mathbf{R}^d$. After observing the data, the experimenter performs
+\emph{maximum a posteriori estimation}: computing the point which maximizes the
+posterior distribution of $\beta$ given the observations.
+
+Here, we will assume, as it is often done, that the prior distribution is
+a multivariate normal distribution of mean zero and covariance matrix $\kappa
+I_d$. Maximum a posteriori estimation leads to the following maximization
+problem:
+\begin{displaymath}
+    \beta_{\text{max}} = \argmax_{\beta\in\mathbf{R}^d} \sum_i (y_i - \beta^*x_i)^2
+    + \frac{1}{\mu}\sum_i \norm{\beta}_2^2
+\end{displaymath}
+which is the well-known \emph{ridge regression}. $\mu
+= \frac{\kappa}{\sigma^2}$ is the regularization parameter. Ridge regression
+can thus be seen as linear regression with a regularization term which
+prevents $\beta$ from having a large $L_2$-norm.
+
+
 \begin{itemize}
     \item set of $n$ users $\mathcal{N} = \{1,\ldots, n\}$
     \item each user has a public vector of features $x_i\in\mathbf{R}^d$ and some
@@ -141,7 +182,7 @@ problem. Goal design a mechanism (selection and payment scheme) which is:
 \section{Main result}
 
 \begin{algorithm}\label{mechanism}
-    \caption{Budget feasible mechanism for ridge regression}
+    \caption{Mechanism for ridge regression}
     \begin{algorithmic}[1]
     \State $i^* \gets \argmax_{j\in\mathcal{N}}V(j)$
     \State $x^* \gets \argmax_{x\in[0,1]^n} \{L_{\mathcal{N}\setminus\{i^*\}}(x)