summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--slides.tex145
1 files changed, 123 insertions, 22 deletions
diff --git a/slides.tex b/slides.tex
index 7d19984..ec2e5be 100644
--- a/slides.tex
+++ b/slides.tex
@@ -6,9 +6,12 @@
\usepackage{amsmath}
\usepackage{graphicx}
\usepackage{tikz}
+\newcommand{\tr}[1]{#1^*}
+\newcommand{\expt}[1]{\mathop{\mathbb{E}}\left[#1\right]}
+\newcommand{\mse}{\mathop{\mathrm{MSE}}}
\usetheme{Boadilla}
\usecolortheme{beaver}
-\title[Data monetization]{Data monetization: pricing user data with the Shapley Value}
+\title[Data monetization]{Data monetization: pricing user data\\with the Shapley Value}
\author{Thibaut \textsc{Horel}}
\institute[Technicolor]{Work with {\greektext Στρατής \textsc{Ιωαννίδης}}}
\setbeamercovered{transparent}
@@ -27,56 +30,73 @@
\maketitle
\end{frame}
-\begin{frame}{Outline}
-\tableofcontents
-\end{frame}
-
\section{Problem}
\begin{frame}{Problem Overview}
-
+\begin{center}
+\includegraphics[width=0.8\textwidth]{schema.pdf}
+\end{center}
\end{frame}
-\begin{frame}{Purpose}
+\begin{frame}{Motivation}
+
+Why is it useful?
+
+\begin{itemize}
+\item sell/buy user data
+\item compensate users in your database
+\end{itemize}
\end{frame}
\begin{frame}{Challenges}
+We need to understand:
+\begin{itemize}
+\item the recommender system
+\item the revenue structure
+\item the relation between the revenue and the value of the database
+\item how to compensate individual users
+\end{itemize}
\end{frame}
\section{Shapley value}
\begin{frame}{Individual value}
\begin{itemize}
- \item $S$ the set of players, a subset $S'\subset S$ is a \alert{coalition}
- \item $V: \mathcal{P}(S)\to \mathbf{R}$ the \alert{value} function:
+ \item $S$: set of players
+ \item $S'\subset S$: coalition
+ \item $V: \mathcal{P}(S)\to \mathbf{R}$: \alert{value} function:
\begin{itemize}
- \item $V(S')$ is the value of the coalition $S'$
\item $V(\emptyset) = 0$
+ \item $V(S')$: value of the coalition $S'$
\end{itemize}
\end{itemize}
\alert{Question:} How to split the value of a subset across all its constituents?
- \alert{Goal:} design a function $\phi_i(S',V)$, the value of player $i$ in $S'$
+ \alert{Goal:} $\phi_i(S',V)$: value of player $i$ in $S'$
+\end{frame}
+\begin{frame}{Solution attempts}
\begin{itemize}
- \visible<2->{\item $\phi_i(S',V) = V(\{i\})$?} \visible<3->{``The whole is greater than the sum of its parts''
+ \visible<1->{\item $\phi_i(S',V) = V(\{i\})$?} \visible<2->{``The whole is greater than the sum of its parts''
$$V(S')\neq \sum_{i\in S'} V(\{i\})$$}
- \visible<4->{\item $\phi_i(S',V) = V(S') - V(S'\setminus\{i\})$: the \alert{marginal contribution}?} \visible<5->{depends on the time the player joins the coalition}
+ \visible<3->{\item $\phi_i(S',V) = V(S') - V(S'\setminus\{i\})$: the \alert{marginal contribution}?} \visible<4->{depends on the time the player joins the coalition}
\end{itemize}
\end{frame}
\begin{frame}{Axioms}
\begin{description}
- \item[Efficiency:] The whole value must be split
+ \item[Efficiency:] No money lost in the splitting process
$$V(S) = \sum_{i\in S} \phi_i(S,V)$$
+\pause
\item[Symmetry:] Equal pay for equal contribution
\alert{if} $\forall S'\subset S\setminus\{i,j\}, V(S'\cup\{i\}) = V(S'\cup\{j\})$
\alert{then} $\phi_i(S,V) = \phi_j(S,V)$
- \item[Fairness:] $i$'s contribution to $j$ equals $j$'s contribution to $i$
+\pause
+ \item[Fairness:] $j$'s contribution to $i$ equals $i$'s contribution to $j$
$$\phi_i(S,V) - \phi_i(S\setminus\{j\}) = \phi_j(S,V)- \phi_j(S\setminus\{i\})$$
\end{description}
\end{frame}
@@ -84,37 +104,118 @@
\begin{frame}{Solution: the Shapley Value}
\begin{theorem}[Shapley, 1953]
-
+ There is only one function satisfying Efficiency, Symmetry and
+ Fairness:
+ $$\phi_i(S,V) = \frac{1}{\vert S\vert!}\sum_{\pi\in\mathfrak{S}(S)}\Delta_i\big(V,S(i,\pi)\big)$$
\end{theorem}
+ \begin{itemize}
+ \item $\mathfrak{S}(S)$: set of permutations of $S$
+ \item $S(i,\pi)$: set of players before $i$ in $\pi$
+ \item $\Delta_i\big(V,T\big) =
+ V\big(T\big)-V\big(T\setminus\{i\})$: marginal contribution of $i$
+ to $T$
+ \end{itemize}
\end{frame}
\begin{frame}{Properties}
+ \begin{itemize}
+ \item
+ \alert{if} $V$ is superadditive: $V(S\cup T) \geq V(S) + V(T)$
+\pause
+ \alert{then} the Shapley Value is individually rational:
+ $\phi_i(S,V) > V(\{i\})$
+\pause
+ \item
+ \alert{if} $V$ is supermodular: $\Delta_i(V,S)\leq\Delta_i(V,T),
+ \forall S\subset T$
+\pause
+
+ \alert{then} the Shapley Value lies in the core of
+ the game:
+ $$\sum_{i\in S'}\phi_i(S,V) \geq V(S'), \forall S'\subset S$$
+\pause
+ \item Hervé Moulin \& Scott Shenker (1999): Group-strategyproof auction
+ \end{itemize}
\end{frame}
-\section{A special case}
+\section{Example scenario}
-\begin{frame}{User model}
+\begin{frame}{Setup}
+ \begin{itemize}
+ \item database $X$: $n\times p$ matrix
+
+ \item $x_i\in \mathbf{R}^p$ $i$-th row of $X$: data of user $i$
+ \item for each user: $y_i$ variable of interest, could be:
+ \begin{itemize}
+ \item already observed
+ \item about to be observed
+ \end{itemize}
+ \end{itemize}
\end{frame}
\begin{frame}{Recommender system}
+\alert{Goal:} predict the variable of interest $y$ of a new user $x$
+
+\alert{Linear model:} relation between data and variable of interest:
+$$y = \langle\beta,x\rangle + \varepsilon, \quad \varepsilon\simeq\mathcal{N}(0,\sigma^2)$$
+
+\alert{Linear regression:} estimator $\tilde{\beta} =
+(\tr{X}X)^{-1}\tr{X}Y$
+\alert{Prediction:} $\tilde{y} = \langle\tilde{\beta},x\rangle$
\end{frame}
-\begin{frame}{Value function (1)}
+\begin{frame}{Revenue structure}
+\alert{Prediction error:} $\mse(X,x) = \expt{(\tilde{y} - y)^2} =
+\sigma^2\tr{x}(\tr{X}X)^{-1}x + \sigma^2$
+\vspace{1cm}
+\begin{block}{Property}
+ \emph{The prediction error decreases as the size of the database increases}
+\end{block}
+\vspace{1cm}
+\alert{$\Rightarrow$} choose a decreasing function of the prediction
+error
+\pause
+\alert{Problem:} $\mse$ not submodular
\end{frame}
-\begin{frame}{Properties}
+\begin{frame}{Revenue structure (2)}
+Exponentially decreasing revenue:
+$$V(X,x) = \exp\big(-\mse(X,x)\big)$$
+Value of database $X$:
+$$V(X) = \int_{x\in\mathbb{R}^d} V(X,x) dx$$
\end{frame}
-\begin{frame}{Value function (2)}
+\begin{frame}{Properties}
+\begin{block}{Closed-form expression}
+$$V(X) = C\sqrt{\det(\tr{X}X)}$$
+\end{block}
+\vspace{1cm}
+\pause
+\begin{block}{Supermodularity}
+$$X\mapsto\log\big(V(X)\big)$$ is supermodular
+\end{block}
+\pause
+\alert{$\Rightarrow$} we can apply the Shapley value theory!
\end{frame}
-\begin{frame}{Conclusion and future directions}
+\begin{frame}{Conclusion}
+\begin{itemize}
+\item general framework to study the pricing problem
+\item the case of the linear regression
+\end{itemize}
+\vspace{1cm}
+\pause
+Future directions
+\begin{itemize}
+\item different revenue models
+\item fluid Shapley Value (Aumann-Shapley) for a continuum of players
+\end{itemize}
\end{frame}
\end{document} \ No newline at end of file