diff options
| -rw-r--r-- | slides.tex | 145 |
1 files changed, 123 insertions, 22 deletions
@@ -6,9 +6,12 @@ \usepackage{amsmath} \usepackage{graphicx} \usepackage{tikz} +\newcommand{\tr}[1]{#1^*} +\newcommand{\expt}[1]{\mathop{\mathbb{E}}\left[#1\right]} +\newcommand{\mse}{\mathop{\mathrm{MSE}}} \usetheme{Boadilla} \usecolortheme{beaver} -\title[Data monetization]{Data monetization: pricing user data with the Shapley Value} +\title[Data monetization]{Data monetization: pricing user data\\with the Shapley Value} \author{Thibaut \textsc{Horel}} \institute[Technicolor]{Work with {\greektext Στρατής \textsc{Ιωαννίδης}}} \setbeamercovered{transparent} @@ -27,56 +30,73 @@ \maketitle \end{frame} -\begin{frame}{Outline} -\tableofcontents -\end{frame} - \section{Problem} \begin{frame}{Problem Overview} - +\begin{center} +\includegraphics[width=0.8\textwidth]{schema.pdf} +\end{center} \end{frame} -\begin{frame}{Purpose} +\begin{frame}{Motivation} + +Why is it useful? + +\begin{itemize} +\item sell/buy user data +\item compensate users in your database +\end{itemize} \end{frame} \begin{frame}{Challenges} +We need to understand: +\begin{itemize} +\item the recommender system +\item the revenue structure +\item the relation between the revenue and the value of the database +\item how to compensate individual users +\end{itemize} \end{frame} \section{Shapley value} \begin{frame}{Individual value} \begin{itemize} - \item $S$ the set of players, a subset $S'\subset S$ is a \alert{coalition} - \item $V: \mathcal{P}(S)\to \mathbf{R}$ the \alert{value} function: + \item $S$: set of players + \item $S'\subset S$: coalition + \item $V: \mathcal{P}(S)\to \mathbf{R}$: \alert{value} function: \begin{itemize} - \item $V(S')$ is the value of the coalition $S'$ \item $V(\emptyset) = 0$ + \item $V(S')$: value of the coalition $S'$ \end{itemize} \end{itemize} \alert{Question:} How to split the value of a subset across all its constituents? - \alert{Goal:} design a function $\phi_i(S',V)$, the value of player $i$ in $S'$ + \alert{Goal:} $\phi_i(S',V)$: value of player $i$ in $S'$ +\end{frame} +\begin{frame}{Solution attempts} \begin{itemize} - \visible<2->{\item $\phi_i(S',V) = V(\{i\})$?} \visible<3->{``The whole is greater than the sum of its parts'' + \visible<1->{\item $\phi_i(S',V) = V(\{i\})$?} \visible<2->{``The whole is greater than the sum of its parts'' $$V(S')\neq \sum_{i\in S'} V(\{i\})$$} - \visible<4->{\item $\phi_i(S',V) = V(S') - V(S'\setminus\{i\})$: the \alert{marginal contribution}?} \visible<5->{depends on the time the player joins the coalition} + \visible<3->{\item $\phi_i(S',V) = V(S') - V(S'\setminus\{i\})$: the \alert{marginal contribution}?} \visible<4->{depends on the time the player joins the coalition} \end{itemize} \end{frame} \begin{frame}{Axioms} \begin{description} - \item[Efficiency:] The whole value must be split + \item[Efficiency:] No money lost in the splitting process $$V(S) = \sum_{i\in S} \phi_i(S,V)$$ +\pause \item[Symmetry:] Equal pay for equal contribution \alert{if} $\forall S'\subset S\setminus\{i,j\}, V(S'\cup\{i\}) = V(S'\cup\{j\})$ \alert{then} $\phi_i(S,V) = \phi_j(S,V)$ - \item[Fairness:] $i$'s contribution to $j$ equals $j$'s contribution to $i$ +\pause + \item[Fairness:] $j$'s contribution to $i$ equals $i$'s contribution to $j$ $$\phi_i(S,V) - \phi_i(S\setminus\{j\}) = \phi_j(S,V)- \phi_j(S\setminus\{i\})$$ \end{description} \end{frame} @@ -84,37 +104,118 @@ \begin{frame}{Solution: the Shapley Value} \begin{theorem}[Shapley, 1953] - + There is only one function satisfying Efficiency, Symmetry and + Fairness: + $$\phi_i(S,V) = \frac{1}{\vert S\vert!}\sum_{\pi\in\mathfrak{S}(S)}\Delta_i\big(V,S(i,\pi)\big)$$ \end{theorem} + \begin{itemize} + \item $\mathfrak{S}(S)$: set of permutations of $S$ + \item $S(i,\pi)$: set of players before $i$ in $\pi$ + \item $\Delta_i\big(V,T\big) = + V\big(T\big)-V\big(T\setminus\{i\})$: marginal contribution of $i$ + to $T$ + \end{itemize} \end{frame} \begin{frame}{Properties} + \begin{itemize} + \item + \alert{if} $V$ is superadditive: $V(S\cup T) \geq V(S) + V(T)$ +\pause + \alert{then} the Shapley Value is individually rational: + $\phi_i(S,V) > V(\{i\})$ +\pause + \item + \alert{if} $V$ is supermodular: $\Delta_i(V,S)\leq\Delta_i(V,T), + \forall S\subset T$ +\pause + + \alert{then} the Shapley Value lies in the core of + the game: + $$\sum_{i\in S'}\phi_i(S,V) \geq V(S'), \forall S'\subset S$$ +\pause + \item Hervé Moulin \& Scott Shenker (1999): Group-strategyproof auction + \end{itemize} \end{frame} -\section{A special case} +\section{Example scenario} -\begin{frame}{User model} +\begin{frame}{Setup} + \begin{itemize} + \item database $X$: $n\times p$ matrix + + \item $x_i\in \mathbf{R}^p$ $i$-th row of $X$: data of user $i$ + \item for each user: $y_i$ variable of interest, could be: + \begin{itemize} + \item already observed + \item about to be observed + \end{itemize} + \end{itemize} \end{frame} \begin{frame}{Recommender system} +\alert{Goal:} predict the variable of interest $y$ of a new user $x$ + +\alert{Linear model:} relation between data and variable of interest: +$$y = \langle\beta,x\rangle + \varepsilon, \quad \varepsilon\simeq\mathcal{N}(0,\sigma^2)$$ + +\alert{Linear regression:} estimator $\tilde{\beta} = +(\tr{X}X)^{-1}\tr{X}Y$ +\alert{Prediction:} $\tilde{y} = \langle\tilde{\beta},x\rangle$ \end{frame} -\begin{frame}{Value function (1)} +\begin{frame}{Revenue structure} +\alert{Prediction error:} $\mse(X,x) = \expt{(\tilde{y} - y)^2} = +\sigma^2\tr{x}(\tr{X}X)^{-1}x + \sigma^2$ +\vspace{1cm} +\begin{block}{Property} + \emph{The prediction error decreases as the size of the database increases} +\end{block} +\vspace{1cm} +\alert{$\Rightarrow$} choose a decreasing function of the prediction +error +\pause +\alert{Problem:} $\mse$ not submodular \end{frame} -\begin{frame}{Properties} +\begin{frame}{Revenue structure (2)} +Exponentially decreasing revenue: +$$V(X,x) = \exp\big(-\mse(X,x)\big)$$ +Value of database $X$: +$$V(X) = \int_{x\in\mathbb{R}^d} V(X,x) dx$$ \end{frame} -\begin{frame}{Value function (2)} +\begin{frame}{Properties} +\begin{block}{Closed-form expression} +$$V(X) = C\sqrt{\det(\tr{X}X)}$$ +\end{block} +\vspace{1cm} +\pause +\begin{block}{Supermodularity} +$$X\mapsto\log\big(V(X)\big)$$ is supermodular +\end{block} +\pause +\alert{$\Rightarrow$} we can apply the Shapley value theory! \end{frame} -\begin{frame}{Conclusion and future directions} +\begin{frame}{Conclusion} +\begin{itemize} +\item general framework to study the pricing problem +\item the case of the linear regression +\end{itemize} +\vspace{1cm} +\pause +Future directions +\begin{itemize} +\item different revenue models +\item fluid Shapley Value (Aumann-Shapley) for a continuum of players +\end{itemize} \end{frame} \end{document}
\ No newline at end of file |
