From d010cdae55a7e19d78a24e46c1db3058d69347fc Mon Sep 17 00:00:00 2001 From: Thibaut Horel Date: Wed, 15 Aug 2012 18:27:30 -0700 Subject: Repository restructuration (has to be clean now that it is shared ;) --- slides/schema.pdf | Bin 0 -> 844491 bytes slides/slides.tex | 221 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 221 insertions(+) create mode 100644 slides/schema.pdf create mode 100644 slides/slides.tex (limited to 'slides') diff --git a/slides/schema.pdf b/slides/schema.pdf new file mode 100644 index 0000000..c44eb38 Binary files /dev/null and b/slides/schema.pdf differ diff --git a/slides/slides.tex b/slides/slides.tex new file mode 100644 index 0000000..ec2e5be --- /dev/null +++ b/slides/slides.tex @@ -0,0 +1,221 @@ +\documentclass{beamer} +\usepackage[utf8x]{inputenc} +\usepackage[greek,english]{babel} +\usepackage[LGR,T1]{fontenc} +\usepackage{lmodern} +\usepackage{amsmath} +\usepackage{graphicx} +\usepackage{tikz} +\newcommand{\tr}[1]{#1^*} +\newcommand{\expt}[1]{\mathop{\mathbb{E}}\left[#1\right]} +\newcommand{\mse}{\mathop{\mathrm{MSE}}} +\usetheme{Boadilla} +\usecolortheme{beaver} +\title[Data monetization]{Data monetization: pricing user data\\with the Shapley Value} +\author{Thibaut \textsc{Horel}} +\institute[Technicolor]{Work with {\greektext Στρατής \textsc{Ιωαννίδης}}} +\setbeamercovered{transparent} + +\AtBeginSection[] +{ +\begin{frame} +\frametitle{Outline} +\tableofcontents[currentsection] +\end{frame} +} + +\begin{document} + +\begin{frame} +\maketitle +\end{frame} + +\section{Problem} +\begin{frame}{Problem Overview} +\begin{center} +\includegraphics[width=0.8\textwidth]{schema.pdf} +\end{center} +\end{frame} + +\begin{frame}{Motivation} + +Why is it useful? + +\begin{itemize} +\item sell/buy user data +\item compensate users in your database +\end{itemize} + +\end{frame} + +\begin{frame}{Challenges} + +We need to understand: +\begin{itemize} +\item the recommender system +\item the revenue structure +\item the relation between the revenue and the value of the database +\item how to compensate individual users +\end{itemize} +\end{frame} + +\section{Shapley value} + +\begin{frame}{Individual value} + \begin{itemize} + \item $S$: set of players + \item $S'\subset S$: coalition + \item $V: \mathcal{P}(S)\to \mathbf{R}$: \alert{value} function: + \begin{itemize} + \item $V(\emptyset) = 0$ + \item $V(S')$: value of the coalition $S'$ + \end{itemize} + \end{itemize} + + \alert{Question:} How to split the value of a subset across all its constituents? + + \alert{Goal:} $\phi_i(S',V)$: value of player $i$ in $S'$ +\end{frame} + +\begin{frame}{Solution attempts} + \begin{itemize} + \visible<1->{\item $\phi_i(S',V) = V(\{i\})$?} \visible<2->{``The whole is greater than the sum of its parts'' + $$V(S')\neq \sum_{i\in S'} V(\{i\})$$} + \visible<3->{\item $\phi_i(S',V) = V(S') - V(S'\setminus\{i\})$: the \alert{marginal contribution}?} \visible<4->{depends on the time the player joins the coalition} + \end{itemize} +\end{frame} + +\begin{frame}{Axioms} + \begin{description} + \item[Efficiency:] No money lost in the splitting process + $$V(S) = \sum_{i\in S} \phi_i(S,V)$$ +\pause + \item[Symmetry:] Equal pay for equal contribution + + \alert{if} $\forall S'\subset S\setminus\{i,j\}, V(S'\cup\{i\}) = V(S'\cup\{j\})$ + + \alert{then} $\phi_i(S,V) = \phi_j(S,V)$ +\pause + \item[Fairness:] $j$'s contribution to $i$ equals $i$'s contribution to $j$ + $$\phi_i(S,V) - \phi_i(S\setminus\{j\}) = \phi_j(S,V)- \phi_j(S\setminus\{i\})$$ + \end{description} +\end{frame} + +\begin{frame}{Solution: the Shapley Value} + + \begin{theorem}[Shapley, 1953] + There is only one function satisfying Efficiency, Symmetry and + Fairness: + $$\phi_i(S,V) = \frac{1}{\vert S\vert!}\sum_{\pi\in\mathfrak{S}(S)}\Delta_i\big(V,S(i,\pi)\big)$$ + \end{theorem} + \begin{itemize} + \item $\mathfrak{S}(S)$: set of permutations of $S$ + \item $S(i,\pi)$: set of players before $i$ in $\pi$ + \item $\Delta_i\big(V,T\big) = + V\big(T\big)-V\big(T\setminus\{i\})$: marginal contribution of $i$ + to $T$ + \end{itemize} +\end{frame} + +\begin{frame}{Properties} + \begin{itemize} + \item + \alert{if} $V$ is superadditive: $V(S\cup T) \geq V(S) + V(T)$ +\pause + + \alert{then} the Shapley Value is individually rational: + $\phi_i(S,V) > V(\{i\})$ +\pause + \item + \alert{if} $V$ is supermodular: $\Delta_i(V,S)\leq\Delta_i(V,T), + \forall S\subset T$ +\pause + + \alert{then} the Shapley Value lies in the core of + the game: + $$\sum_{i\in S'}\phi_i(S,V) \geq V(S'), \forall S'\subset S$$ +\pause + \item Hervé Moulin \& Scott Shenker (1999): Group-strategyproof auction + \end{itemize} +\end{frame} + +\section{Example scenario} + +\begin{frame}{Setup} + \begin{itemize} + \item database $X$: $n\times p$ matrix + + \item $x_i\in \mathbf{R}^p$ $i$-th row of $X$: data of user $i$ + + \item for each user: $y_i$ variable of interest, could be: + \begin{itemize} + \item already observed + \item about to be observed + \end{itemize} + \end{itemize} +\end{frame} + +\begin{frame}{Recommender system} +\alert{Goal:} predict the variable of interest $y$ of a new user $x$ + +\alert{Linear model:} relation between data and variable of interest: +$$y = \langle\beta,x\rangle + \varepsilon, \quad \varepsilon\simeq\mathcal{N}(0,\sigma^2)$$ + +\alert{Linear regression:} estimator $\tilde{\beta} = +(\tr{X}X)^{-1}\tr{X}Y$ + +\alert{Prediction:} $\tilde{y} = \langle\tilde{\beta},x\rangle$ +\end{frame} + +\begin{frame}{Revenue structure} +\alert{Prediction error:} $\mse(X,x) = \expt{(\tilde{y} - y)^2} = +\sigma^2\tr{x}(\tr{X}X)^{-1}x + \sigma^2$ +\vspace{1cm} +\begin{block}{Property} + \emph{The prediction error decreases as the size of the database increases} +\end{block} +\vspace{1cm} +\alert{$\Rightarrow$} choose a decreasing function of the prediction +error +\pause + +\alert{Problem:} $\mse$ not submodular +\end{frame} + +\begin{frame}{Revenue structure (2)} +Exponentially decreasing revenue: +$$V(X,x) = \exp\big(-\mse(X,x)\big)$$ + +Value of database $X$: +$$V(X) = \int_{x\in\mathbb{R}^d} V(X,x) dx$$ +\end{frame} + +\begin{frame}{Properties} +\begin{block}{Closed-form expression} +$$V(X) = C\sqrt{\det(\tr{X}X)}$$ +\end{block} + +\vspace{1cm} +\pause +\begin{block}{Supermodularity} +$$X\mapsto\log\big(V(X)\big)$$ is supermodular +\end{block} +\pause +\alert{$\Rightarrow$} we can apply the Shapley value theory! +\end{frame} + +\begin{frame}{Conclusion} + +\begin{itemize} +\item general framework to study the pricing problem +\item the case of the linear regression +\end{itemize} +\vspace{1cm} +\pause +Future directions +\begin{itemize} +\item different revenue models +\item fluid Shapley Value (Aumann-Shapley) for a continuum of players +\end{itemize} +\end{frame} +\end{document} \ No newline at end of file -- cgit v1.2.3-70-g09d2