summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2012-10-25 15:16:10 -0700
committerThibaut Horel <thibaut.horel@gmail.com>2012-10-25 15:16:10 -0700
commite2e3bcb87f8090a1f1811af391f5edb5ec121df7 (patch)
tree4362ca6e0e477655b0193433abe2895e7afca748
parent93c362fe4dbbeb1cb381fe68af4ca821f215d6a3 (diff)
downloadrecommendation-e2e3bcb87f8090a1f1811af391f5edb5ec121df7.tar.gz
Basic structure in paper.tex
-rw-r--r--paper.tex135
1 files changed, 134 insertions, 1 deletions
diff --git a/paper.tex b/paper.tex
index 466ee8e..ba848a4 100644
--- a/paper.tex
+++ b/paper.tex
@@ -40,9 +40,142 @@
\section{Problem formulation}
+\subsection{Notations}
+
+\begin{itemize}
+ \item If $x\in\mathbf{R}^d$, $x^*$ will denote the transpose of vector $x$.
+ If $(x, y)\in\mathbf{R}^d\times\mathbf{R}^d$, $x^*y$ is the standard
+ inner product of $\mathbf{R}^d$.
+ \item We will often use the following order over symmetric matrices: if $A$
+ and $B$ are two $d\times d$ real symmetric matrices, we write that
+ $A\leq B$ iff:
+ \begin{displaymath}
+ \forall x\in\mathbf{R}^d,\quad x^*Ax \leq x^*Bx
+ \end{displaymath}
+ That is, iff $B-A$ is symmetric semi-definite positive.
+ \item Let us recall this property of the ordered defined above: if $A$ and
+ $B$ are two symmetric definite positive matrices such that $A\leq B$,
+ then $A^{-1}\leq B^{-1}$.
+\end{itemize}
+
+\subsection{Data model}
+\begin{itemize}
+ \item set of $n$ users $\mathcal{N} = \{1,\ldots, n\}$
+ \item each user has a public vector of features $x_i\in\mathbf{R}^d$ and some
+ undisclosed variable $y_i\in\mathbf{R}$
+ \item we assume that the data has been normalized: $\forall
+ i\in\mathcal{N}$, $\norm{x_i}\leq 1$
+ \item \textbf{Ridge regression model:}
+ \begin{itemize}
+ \item $y_i = \beta^*x_i + \varepsilon_i$
+ \item $\varepsilon_i \sim \mathcal{N}(0,\sigma^2)$,
+ $(\varepsilon_i)_{i\in \mathcal{N}}$ are mutually independent.
+ \item prior knowledge of $\beta$: $\beta\sim\mathcal{N}(0,\kappa I_d)$
+ \item $\mu = \frac{\kappa}{\sigma^2}$ is the regularization factor.
+ \item after the variables $y_i$ are disclosed, the experimenter
+ does \emph{maximum a posteriori estimation} which is equivalent
+ under Gaussian prior to solving the ridge regression
+ maximisation problem:
+ \begin{displaymath}
+ \beta_{\text{max}} = \argmax \sum_i (y_i - \beta^*x_i)^2
+ + \frac{1}{\mu}\sum_i \norm{\beta}_2^2
+ \end{displaymath}
+ \end{itemize}
+\end{itemize}
+
+\subsection{Economics}
+
+Value function: following the experiment design theory, the value of
+data is the decrease of uncertainty about the model. Common measure of
+uncertainty, the entropy. Thus, the value of a set $S$ of users is:
+\begin{displaymath}
+ V(S) = \mathbb{H}(\beta) - \mathbb{H}(\beta|Y_S)
+\end{displaymath}
+where $Y_S = \{y_i,\,i\in S\}$ is the set of observations.
+
+In our case (under Gaussian prior):
+\begin{align*}
+ \forall S\subset\mathcal{N},\; V(S)
+ & = \frac{1}{2}\log\det\left(I_d
+ + \mu\sum_{i\in S} x_ix_i^*\right)\\
+ & \defeq \frac{1}{2}\log\det A(S)
+\end{align*}
+
+\begin{lemma}[Marginal contribution]
+ \begin{displaymath}
+ \Delta_i V(S)\defeq V(S\cup\{i\}) - V(S)
+ = \frac{1}{2}\log\left(1 + \mu x_i^*A(S)^{-1}x_i\right)
+ \end{displaymath}
+\end{lemma}
+
+\begin{proof}
+ We have:
+ \begin{align*}
+ V(S\cup\{i\}) & = \frac{1}{2}\log\det A(S\cup\{i\})\\
+ & = \frac{1}{2}\log\det\left(A(S) + \mu x_i x_i^*\right)\\
+ & = V(S) + \frac{1}{2}\log\det\left(I_d + \mu A(S)^{-1}x_i
+ x_i^*\right)\\
+ & = V(S) + \frac{1}{2}\log\left(1 + \mu x_i^* A(S)^{-1}x_i\right)
+ \end{align*}
+ where the last equality comes from Sylvester's determinant formula.
+\end{proof}
+\begin{itemize}
+ \item each user $i$ has a cost $c_i$
+ \item the auctioneer has a budget constraint $B$
+ \item optimisation problem:
+ \begin{displaymath}
+ OPT(V,\mathcal{N}, B) = \max_{S\subset\mathcal{N}} \left\{ V(S)\,|\,
+ \sum_{i\in S}c_i\leq B\right\}
+ \end{displaymath}
+\end{itemize}
+
+Problem, the costs are reported by users, they can lie. It is an auction
+problem. Goal design a mechanism (selection and payment scheme) which is:
+\begin{itemize}
+ \item truthful
+ \item individually rational
+ \item budget feasible
+ \item has a good approximation ratio
+\end{itemize}
+
\section{Main result}
+\begin{algorithm}\label{mechanism}
+ \caption{Budget feasible mechanism for ridge regression}
+ \begin{algorithmic}[1]
+ \State $i^* \gets \argmax_{j\in\mathcal{N}}V(j)$
+ \State $x^* \gets \argmax_{x\in[0,1]^n} \{L_{\mathcal{N}\setminus\{i^*\}}(x)
+ \,|\, c(x)\leq B\}$
+ \Statex
+ \If{$L(x^*) < CV(i^*)$}
+ \State \textbf{return} $\{i^*\}$
+ \Else
+ \State $i \gets \argmax_{1\leq j\leq n}\frac{V(j)}{c_j}$
+ \State $S \gets \emptyset$
+ \While{$c_i\leq \frac{B}{2}\frac{V(S\cup\{i\})-V(S)}{V(S\cup\{i\})}$}
+ \State $S \gets S\cup\{i\}$
+ \State $i \gets \argmax_{j\in\mathcal{N}\setminus S}
+ \frac{V(S\cup\{j\})-V(S)}{c_j}$
+ \EndWhile
+ \State \textbf{return} $S$
+ \EndIf
+ \end{algorithmic}
+\end{algorithm}
+
+Choosing $C=...$ we have:
+
+\begin{theorem}
+ The mechanism is truthful, individually rational, budget feasible.
+ Furthermore, by choosing $C= ...$ we get an approximation ratio of:
+ \begin{multline*}
+ 1 + C^* = \frac{5e-1 + C_\mu(4e-1)}{2C_\mu(e-1)}\\
+ + \frac{\sqrt{C_\mu^2(1+2e)^2
+ + 2C_\mu(14e^2+5e+1) + (1-5e)^2}}{2C_\mu(e-1)}
+ \end{multline*}
+\end{theorem}
+
\section{General setup}
-\section{Conclusions}
+\section{Conclusion}
+
\end{document}