summaryrefslogtreecommitdiffstats
path: root/slides/slides.tex
diff options
context:
space:
mode:
Diffstat (limited to 'slides/slides.tex')
-rw-r--r--slides/slides.tex221
1 files changed, 221 insertions, 0 deletions
diff --git a/slides/slides.tex b/slides/slides.tex
new file mode 100644
index 0000000..ec2e5be
--- /dev/null
+++ b/slides/slides.tex
@@ -0,0 +1,221 @@
+\documentclass{beamer}
+\usepackage[utf8x]{inputenc}
+\usepackage[greek,english]{babel}
+\usepackage[LGR,T1]{fontenc}
+\usepackage{lmodern}
+\usepackage{amsmath}
+\usepackage{graphicx}
+\usepackage{tikz}
+\newcommand{\tr}[1]{#1^*}
+\newcommand{\expt}[1]{\mathop{\mathbb{E}}\left[#1\right]}
+\newcommand{\mse}{\mathop{\mathrm{MSE}}}
+\usetheme{Boadilla}
+\usecolortheme{beaver}
+\title[Data monetization]{Data monetization: pricing user data\\with the Shapley Value}
+\author{Thibaut \textsc{Horel}}
+\institute[Technicolor]{Work with {\greektext Στρατής \textsc{Ιωαννίδης}}}
+\setbeamercovered{transparent}
+
+\AtBeginSection[]
+{
+\begin{frame}<beamer>
+\frametitle{Outline}
+\tableofcontents[currentsection]
+\end{frame}
+}
+
+\begin{document}
+
+\begin{frame}
+\maketitle
+\end{frame}
+
+\section{Problem}
+\begin{frame}{Problem Overview}
+\begin{center}
+\includegraphics[width=0.8\textwidth]{schema.pdf}
+\end{center}
+\end{frame}
+
+\begin{frame}{Motivation}
+
+Why is it useful?
+
+\begin{itemize}
+\item sell/buy user data
+\item compensate users in your database
+\end{itemize}
+
+\end{frame}
+
+\begin{frame}{Challenges}
+
+We need to understand:
+\begin{itemize}
+\item the recommender system
+\item the revenue structure
+\item the relation between the revenue and the value of the database
+\item how to compensate individual users
+\end{itemize}
+\end{frame}
+
+\section{Shapley value}
+
+\begin{frame}{Individual value}
+ \begin{itemize}
+ \item $S$: set of players
+ \item $S'\subset S$: coalition
+ \item $V: \mathcal{P}(S)\to \mathbf{R}$: \alert{value} function:
+ \begin{itemize}
+ \item $V(\emptyset) = 0$
+ \item $V(S')$: value of the coalition $S'$
+ \end{itemize}
+ \end{itemize}
+
+ \alert{Question:} How to split the value of a subset across all its constituents?
+
+ \alert{Goal:} $\phi_i(S',V)$: value of player $i$ in $S'$
+\end{frame}
+
+\begin{frame}{Solution attempts}
+ \begin{itemize}
+ \visible<1->{\item $\phi_i(S',V) = V(\{i\})$?} \visible<2->{``The whole is greater than the sum of its parts''
+ $$V(S')\neq \sum_{i\in S'} V(\{i\})$$}
+ \visible<3->{\item $\phi_i(S',V) = V(S') - V(S'\setminus\{i\})$: the \alert{marginal contribution}?} \visible<4->{depends on the time the player joins the coalition}
+ \end{itemize}
+\end{frame}
+
+\begin{frame}{Axioms}
+ \begin{description}
+ \item[Efficiency:] No money lost in the splitting process
+ $$V(S) = \sum_{i\in S} \phi_i(S,V)$$
+\pause
+ \item[Symmetry:] Equal pay for equal contribution
+
+ \alert{if} $\forall S'\subset S\setminus\{i,j\}, V(S'\cup\{i\}) = V(S'\cup\{j\})$
+
+ \alert{then} $\phi_i(S,V) = \phi_j(S,V)$
+\pause
+ \item[Fairness:] $j$'s contribution to $i$ equals $i$'s contribution to $j$
+ $$\phi_i(S,V) - \phi_i(S\setminus\{j\}) = \phi_j(S,V)- \phi_j(S\setminus\{i\})$$
+ \end{description}
+\end{frame}
+
+\begin{frame}{Solution: the Shapley Value}
+
+ \begin{theorem}[Shapley, 1953]
+ There is only one function satisfying Efficiency, Symmetry and
+ Fairness:
+ $$\phi_i(S,V) = \frac{1}{\vert S\vert!}\sum_{\pi\in\mathfrak{S}(S)}\Delta_i\big(V,S(i,\pi)\big)$$
+ \end{theorem}
+ \begin{itemize}
+ \item $\mathfrak{S}(S)$: set of permutations of $S$
+ \item $S(i,\pi)$: set of players before $i$ in $\pi$
+ \item $\Delta_i\big(V,T\big) =
+ V\big(T\big)-V\big(T\setminus\{i\})$: marginal contribution of $i$
+ to $T$
+ \end{itemize}
+\end{frame}
+
+\begin{frame}{Properties}
+ \begin{itemize}
+ \item
+ \alert{if} $V$ is superadditive: $V(S\cup T) \geq V(S) + V(T)$
+\pause
+
+ \alert{then} the Shapley Value is individually rational:
+ $\phi_i(S,V) > V(\{i\})$
+\pause
+ \item
+ \alert{if} $V$ is supermodular: $\Delta_i(V,S)\leq\Delta_i(V,T),
+ \forall S\subset T$
+\pause
+
+ \alert{then} the Shapley Value lies in the core of
+ the game:
+ $$\sum_{i\in S'}\phi_i(S,V) \geq V(S'), \forall S'\subset S$$
+\pause
+ \item Hervé Moulin \& Scott Shenker (1999): Group-strategyproof auction
+ \end{itemize}
+\end{frame}
+
+\section{Example scenario}
+
+\begin{frame}{Setup}
+ \begin{itemize}
+ \item database $X$: $n\times p$ matrix
+
+ \item $x_i\in \mathbf{R}^p$ $i$-th row of $X$: data of user $i$
+
+ \item for each user: $y_i$ variable of interest, could be:
+ \begin{itemize}
+ \item already observed
+ \item about to be observed
+ \end{itemize}
+ \end{itemize}
+\end{frame}
+
+\begin{frame}{Recommender system}
+\alert{Goal:} predict the variable of interest $y$ of a new user $x$
+
+\alert{Linear model:} relation between data and variable of interest:
+$$y = \langle\beta,x\rangle + \varepsilon, \quad \varepsilon\simeq\mathcal{N}(0,\sigma^2)$$
+
+\alert{Linear regression:} estimator $\tilde{\beta} =
+(\tr{X}X)^{-1}\tr{X}Y$
+
+\alert{Prediction:} $\tilde{y} = \langle\tilde{\beta},x\rangle$
+\end{frame}
+
+\begin{frame}{Revenue structure}
+\alert{Prediction error:} $\mse(X,x) = \expt{(\tilde{y} - y)^2} =
+\sigma^2\tr{x}(\tr{X}X)^{-1}x + \sigma^2$
+\vspace{1cm}
+\begin{block}{Property}
+ \emph{The prediction error decreases as the size of the database increases}
+\end{block}
+\vspace{1cm}
+\alert{$\Rightarrow$} choose a decreasing function of the prediction
+error
+\pause
+
+\alert{Problem:} $\mse$ not submodular
+\end{frame}
+
+\begin{frame}{Revenue structure (2)}
+Exponentially decreasing revenue:
+$$V(X,x) = \exp\big(-\mse(X,x)\big)$$
+
+Value of database $X$:
+$$V(X) = \int_{x\in\mathbb{R}^d} V(X,x) dx$$
+\end{frame}
+
+\begin{frame}{Properties}
+\begin{block}{Closed-form expression}
+$$V(X) = C\sqrt{\det(\tr{X}X)}$$
+\end{block}
+
+\vspace{1cm}
+\pause
+\begin{block}{Supermodularity}
+$$X\mapsto\log\big(V(X)\big)$$ is supermodular
+\end{block}
+\pause
+\alert{$\Rightarrow$} we can apply the Shapley value theory!
+\end{frame}
+
+\begin{frame}{Conclusion}
+
+\begin{itemize}
+\item general framework to study the pricing problem
+\item the case of the linear regression
+\end{itemize}
+\vspace{1cm}
+\pause
+Future directions
+\begin{itemize}
+\item different revenue models
+\item fluid Shapley Value (Aumann-Shapley) for a continuum of players
+\end{itemize}
+\end{frame}
+\end{document} \ No newline at end of file