summaryrefslogtreecommitdiffstats
path: root/slides
diff options
context:
space:
mode:
Diffstat (limited to 'slides')
-rw-r--r--slides/slides.tex348
1 files changed, 194 insertions, 154 deletions
diff --git a/slides/slides.tex b/slides/slides.tex
index ec2e5be..11ec280 100644
--- a/slides/slides.tex
+++ b/slides/slides.tex
@@ -2,18 +2,14 @@
\usepackage[utf8x]{inputenc}
\usepackage[greek,english]{babel}
\usepackage[LGR,T1]{fontenc}
-\usepackage{lmodern}
\usepackage{amsmath}
-\usepackage{graphicx}
-\usepackage{tikz}
-\newcommand{\tr}[1]{#1^*}
-\newcommand{\expt}[1]{\mathop{\mathbb{E}}\left[#1\right]}
-\newcommand{\mse}{\mathop{\mathrm{MSE}}}
+\usepackage{algpseudocode,algorithm}
+\DeclareMathOperator*{\argmax}{arg\,max}
\usetheme{Boadilla}
\usecolortheme{beaver}
-\title[Data monetization]{Data monetization: pricing user data\\with the Shapley Value}
-\author{Thibaut \textsc{Horel}}
-\institute[Technicolor]{Work with {\greektext Στρατής \textsc{Ιωαννίδης}}}
+\title[Budget Feasible Mechanisms for Experimental Design]{Budget Feasible Mechanisms for Experimental Design}
+\author{xx\\ Joint work with yy and zz}
+\institute[]{}
\setbeamercovered{transparent}
\AtBeginSection[]
@@ -30,192 +26,236 @@
\maketitle
\end{frame}
-\section{Problem}
-\begin{frame}{Problem Overview}
-\begin{center}
-\includegraphics[width=0.8\textwidth]{schema.pdf}
-\end{center}
+\begin{frame}
+\tableofcontents
\end{frame}
-\begin{frame}{Motivation}
-
-Why is it useful?
-
-\begin{itemize}
-\item sell/buy user data
-\item compensate users in your database
-\end{itemize}
-
-\end{frame}
+\section{Experimental Design}
+\begin{frame}{Experimental Design}
+ An experimenter is about to conduct an experiment with users.
+ \begin{itemize}
+ \item $n$ users
+ \item $x_i\in\mathbb{R}^d$: public features of user $i$
+ \item $c_i\in\mathbb{R}^+$: cost of user $i$ for taking part in the experiment
+ \end{itemize}
-\begin{frame}{Challenges}
+ When paid $c_i$, user $i$ reveals $y_i$:
+ \begin{displaymath}
+ y_i = \beta^T x_i + \varepsilon_i,\quad\beta\in\mathbb{R}^d,\;
+ \varepsilon_i\sim\mathcal{N}(0,\sigma^2)
+ \end{displaymath}
+ $\beta$ is \alert{unknown} to the experimenter.
-We need to understand:
-\begin{itemize}
-\item the recommender system
-\item the revenue structure
-\item the relation between the revenue and the value of the database
-\item how to compensate individual users
-\end{itemize}
+ \begin{block}{}
+ \alert{Question:} budget constraint $B$, which users to pick to learn $\beta$ as
+ well as possible without spending more than $B$?
+ \end{block}
\end{frame}
-\section{Shapley value}
+\begin{frame}{Evaluation criterion}
+ \begin{block}{}
+ \alert{Question:} How to measure the helpfulness of a set of users $S$ in learning $\beta$?
+ \end{block}
+
+ \vspace{0.5cm}
-\begin{frame}{Individual value}
- \begin{itemize}
- \item $S$: set of players
- \item $S'\subset S$: coalition
- \item $V: \mathcal{P}(S)\to \mathbf{R}$: \alert{value} function:
+ Bayesian approach:
\begin{itemize}
- \item $V(\emptyset) = 0$
- \item $V(S')$: value of the coalition $S'$
+ \item \emph{a priori} knowledge: prior distribution over $\beta$
+ \item the entropy of $\beta$, $H(\beta)$ measures the uncertainty of the experimenter
+ \item after observing $Y_S = \{y_i,\; i\in S\}$, uncertainty becomes $H(\beta\mid Y_S)$
\end{itemize}
- \end{itemize}
- \alert{Question:} How to split the value of a subset across all its constituents?
+ \vspace{0.5cm}
- \alert{Goal:} $\phi_i(S',V)$: value of player $i$ in $S'$
+ The value of $S$ is the \alert{entropy reduction}:
+ \begin{displaymath}
+ V(S) = H(\beta) - H(\beta\mid Y_S)
+ \end{displaymath}
\end{frame}
-\begin{frame}{Solution attempts}
- \begin{itemize}
- \visible<1->{\item $\phi_i(S',V) = V(\{i\})$?} \visible<2->{``The whole is greater than the sum of its parts''
- $$V(S')\neq \sum_{i\in S'} V(\{i\})$$}
- \visible<3->{\item $\phi_i(S',V) = V(S') - V(S'\setminus\{i\})$: the \alert{marginal contribution}?} \visible<4->{depends on the time the player joins the coalition}
- \end{itemize}
+\begin{frame}{Value function}
+ Gaussian linear model:
+ \begin{displaymath}
+ y_i = \beta^T x_i + \varepsilon_i,\quad\beta\in\mathbb{R}^d,\;
+ \varepsilon_i\sim\mathcal{N}(0,\sigma^2)
+ \end{displaymath}
+
+ Typical prior distribution $\beta\sim\mathcal{N}(0,\tau I_d)$
+
+ \vspace{0.5cm}
+
+ \begin{block}{}
+ Under the Gaussian linear model and Gaussian prior distribution:
+ \begin{displaymath}
+ V(S) = \log\det\Big(I_d + \lambda\sum_{i\in S}x_i x_i^T \Big)
+ \end{displaymath}
+ \end{block}
+
+ \vspace{0.5cm}
+
+ \alert{Remark:} $V$ can be computed efficiently using a Cholesky decomposition
\end{frame}
-\begin{frame}{Axioms}
- \begin{description}
- \item[Efficiency:] No money lost in the splitting process
- $$V(S) = \sum_{i\in S} \phi_i(S,V)$$
-\pause
- \item[Symmetry:] Equal pay for equal contribution
+\section{Non-strategic case}
+\begin{frame}{Optimization Problem}
+ \begin{block}{P1}
+ \begin{align*}
+ &\text{maximize}\; V(S) = \log\det\Big(I_d + \lambda\sum_{i\in S}x_i x_i^T \Big)\\
+ &\text{subject to}\; \sum_{i\in S}c_i\leq B\quad\text{(Knapsack constraint)}
+ \end{align*}
+ \end{block}
+
+ \onslide<2->
+ Good news:
+
+ $V$ is submodular (diminishing return):
+ \begin{displaymath}
+ \forall S\subseteq T\, \forall i,\; V(S\cup\{i\}) - V(S)\geq
+ V(T\cup\{i\}) - V(T)
+ \end{displaymath}
- \alert{if} $\forall S'\subset S\setminus\{i,j\}, V(S'\cup\{i\}) = V(S'\cup\{j\})$
+ \onslide<3->
+ Bad news:
- \alert{then} $\phi_i(S,V) = \phi_j(S,V)$
-\pause
- \item[Fairness:] $j$'s contribution to $i$ equals $i$'s contribution to $j$
- $$\phi_i(S,V) - \phi_i(S\setminus\{j\}) = \phi_j(S,V)- \phi_j(S\setminus\{i\})$$
- \end{description}
+ Maximizing a submodular function under a knapsack constraint is NP-hard (even
+ in the specific case of $\log\det$)
\end{frame}
-\begin{frame}{Solution: the Shapley Value}
+\begin{frame}{Approximation}
+ \begin{definition}[Approximation ratio]
+ Let $OPT$ denote the optimal solution to P1, $S$ is an $\alpha$-approximation
+ if:
+ \begin{displaymath}
+ V(OPT)\leq \alpha V(S)
+ \end{displaymath}
+ \end{definition}
+
+ \vspace{0.5cm}
+
+ \begin{theorem}[Sviridenko]
+ \begin{enumerate}
+ \item There is an algorithm which returns a set $S$ which is a $\frac{e}{e-1}$-approximation
+ \item No algorithm can do better unless P=NP
+ \end{enumerate}
+ \end{theorem}
+
+ \vspace{0.5cm}
- \begin{theorem}[Shapley, 1953]
- There is only one function satisfying Efficiency, Symmetry and
- Fairness:
- $$\phi_i(S,V) = \frac{1}{\vert S\vert!}\sum_{\pi\in\mathfrak{S}(S)}\Delta_i\big(V,S(i,\pi)\big)$$
- \end{theorem}
- \begin{itemize}
- \item $\mathfrak{S}(S)$: set of permutations of $S$
- \item $S(i,\pi)$: set of players before $i$ in $\pi$
- \item $\Delta_i\big(V,T\big) =
- V\big(T\big)-V\big(T\setminus\{i\})$: marginal contribution of $i$
- to $T$
- \end{itemize}
+ The algorithm is a combination of an enumeration method and a greedy heuristic.
\end{frame}
-\begin{frame}{Properties}
- \begin{itemize}
- \item
- \alert{if} $V$ is superadditive: $V(S\cup T) \geq V(S) + V(T)$
-\pause
+\begin{frame}{Greedy algorithm}
+ \begin{block}{Greedy($V$, $B$)}
+ \begin{algorithmic}
+ \State $i \gets \argmax_{1\leq j\leq n}\frac{V(j)}{c_j}$
+ \State $S \gets \emptyset$
+ \While{$c_i\leq B - \sum_{j\in S} c_j$}
+ \State $S \gets S\cup\{i\}$
+ \State $i \gets \argmax_{1\leq j\leq n}
+ \frac{V(S\cup\{j\})-V(S)}{c_j}$
+ \EndWhile
+ \State \textbf{return} $S$
+ \end{algorithmic}
+ \end{block}
- \alert{then} the Shapley Value is individually rational:
- $\phi_i(S,V) > V(\{i\})$
-\pause
- \item
- \alert{if} $V$ is supermodular: $\Delta_i(V,S)\leq\Delta_i(V,T),
- \forall S\subset T$
-\pause
+ \vspace{0.5cm}
- \alert{then} the Shapley Value lies in the core of
- the game:
- $$\sum_{i\in S'}\phi_i(S,V) \geq V(S'), \forall S'\subset S$$
-\pause
- \item Hervé Moulin \& Scott Shenker (1999): Group-strategyproof auction
- \end{itemize}
+ \begin{lemma}
+ Greedy has an \alert{unbounded} approximation ratio.
+ \end{lemma}
\end{frame}
-\section{Example scenario}
+\begin{frame}
+ \begin{block}{MaxGreedy}
+ \begin{algorithmic}
+ \State $i^* \gets \argmax_{1\leq j\leq n} V(j)$
+ \State $S \gets$ Greedy($V$, $B$)
+ \If{$V(i^*)\geq V(S)$}
+ \State \textbf{return} $\{i^*\}$
+ \Else
+ \State \textbf{return} $S$
+ \EndIf
+ \end{algorithmic}
+ \end{block}
-\begin{frame}{Setup}
- \begin{itemize}
- \item database $X$: $n\times p$ matrix
-
- \item $x_i\in \mathbf{R}^p$ $i$-th row of $X$: data of user $i$
+ \vspace{0.5cm}
- \item for each user: $y_i$ variable of interest, could be:
- \begin{itemize}
- \item already observed
- \item about to be observed
- \end{itemize}
- \end{itemize}
+ \begin{lemma}
+ MaxGreedy is a $\frac{5e}{e-1}$ approximation
+ \end{lemma}
\end{frame}
-\begin{frame}{Recommender system}
-\alert{Goal:} predict the variable of interest $y$ of a new user $x$
+\section{Strategic case}
-\alert{Linear model:} relation between data and variable of interest:
-$$y = \langle\beta,x\rangle + \varepsilon, \quad \varepsilon\simeq\mathcal{N}(0,\sigma^2)$$
+\begin{frame}{Reverse auction}
-\alert{Linear regression:} estimator $\tilde{\beta} =
-(\tr{X}X)^{-1}\tr{X}Y$
+ \alert{Problem:} In real life, user are strategic agents. They lie about their costs.
+ This is a \alert{reverse auction}: the experimenter wants to buy data
+ from users.
-\alert{Prediction:} $\tilde{y} = \langle\tilde{\beta},x\rangle$
-\end{frame}
+ \vspace{0.5cm}
+
+ \alert{Goal:} Design an auction mechanism:
+ \begin{itemize}
+ \item a selection rule $s: (c_1,\ldots,c_n) \mapsto (s_1,\ldots,s_n)$,
+ $s_i\in\{0,1\}$
+ \item a payment rule $p: (c_1,\ldots,c_n) \mapsto (p_1,\ldots,p_n)$,
+ $p_i\in\mathbb{R}^+$
+ \end{itemize}
-\begin{frame}{Revenue structure}
-\alert{Prediction error:} $\mse(X,x) = \expt{(\tilde{y} - y)^2} =
-\sigma^2\tr{x}(\tr{X}X)^{-1}x + \sigma^2$
-\vspace{1cm}
-\begin{block}{Property}
- \emph{The prediction error decreases as the size of the database increases}
-\end{block}
-\vspace{1cm}
-\alert{$\Rightarrow$} choose a decreasing function of the prediction
-error
-\pause
+ \vspace{0.5cm}
-\alert{Problem:} $\mse$ not submodular
+ As usual in auction mechanism design, we want payments to be:
+ \begin{itemize}
+ \item individually rational: $p_i\geq c_i$
+ \item truthful: reporting one's true cost is a dominant strategy
+ \item normalized: $s_i=0$ implies $p_i=0$
+ \end{itemize}
\end{frame}
-\begin{frame}{Revenue structure (2)}
-Exponentially decreasing revenue:
-$$V(X,x) = \exp\big(-\mse(X,x)\big)$$
+\begin{frame}{Optimization problem for budget feasible auctions}
+ \begin{block}{P2}
+ \begin{align*}
+ &\text{maximize}\; V\big(s(c_1,\ldots,c_n)\big) = \log\det\Big(I_d + \lambda\sum_{i\in S}x_i x_i^T \Big)\\
+ &\text{subject to}\; \sum_{i\in S}\alert{p_i}\leq B\quad\text{(budget feasible)}
+ \end{align*}
+ where the payments are further constrained to be individually rational, truthful
+ and normalized
+ \end{block}
-Value of database $X$:
-$$V(X) = \int_{x\in\mathbb{R}^d} V(X,x) dx$$
+ \vspace{1cm}
+
+ This is again NP-hard $\Rightarrow$ we seek good approximation ratios
\end{frame}
-\begin{frame}{Properties}
-\begin{block}{Closed-form expression}
-$$V(X) = C\sqrt{\det(\tr{X}X)}$$
-\end{block}
+\begin{frame}{Myerson's theorem}
+ \begin{theorem}[Myerson]
+ Let $(s, p)$ be a mechanism. If:
+ \begin{itemize}
+ \item $s$ is \alert{monotonic}: if $c_i\leq c_i'$ then $s_i \geq s_i'$
+ \item users are paid \alert{threshold payments}: $p_i = \inf\{c_i':i\notin s(c_i',c_{-i})\}$
+ \end{itemize}
+ Then, the mechanism is individually rational and truthful.
+ \end{theorem}
-\vspace{1cm}
-\pause
-\begin{block}{Supermodularity}
-$$X\mapsto\log\big(V(X)\big)$$ is supermodular
-\end{block}
-\pause
-\alert{$\Rightarrow$} we can apply the Shapley value theory!
-\end{frame}
+ \vspace{1cm}
-\begin{frame}{Conclusion}
+ \alert{Consequence:} It suffices to find a monotonic selection rule such that
+ the threshold payments are within the budget constraint.
+\end{frame}
-\begin{itemize}
-\item general framework to study the pricing problem
-\item the case of the linear regression
-\end{itemize}
-\vspace{1cm}
-\pause
-Future directions
-\begin{itemize}
-\item different revenue models
-\item fluid Shapley Value (Aumann-Shapley) for a continuum of players
-\end{itemize}
+\begin{frame}{Main result}
+ \begin{theorem}
+ There is a budget feasible, individually rational and truthful mechanism for
+ the experimental design problem which runs in polynomial time. Its
+ approximation ratio is:
+ \begin{displaymath}
+ \frac{10e-3 + \sqrt{64e^2-24e + 9}}{2(e-1)}
+ \simeq 12.98
+ \end{displaymath}
+ \end{theorem}
\end{frame}
-\end{document} \ No newline at end of file
+\end{document}
+
+