1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
|
\documentclass{beamer}
\usepackage[utf8x]{inputenc}
\usepackage[greek,english]{babel}
\usepackage[LGR,T1]{fontenc}
\usepackage{lmodern}
\usepackage{amsmath}
\usepackage{graphicx}
\usepackage{tikz}
\newcommand{\tr}[1]{#1^*}
\newcommand{\expt}[1]{\mathop{\mathbb{E}}\left[#1\right]}
\newcommand{\mse}{\mathop{\mathrm{MSE}}}
\usetheme{Boadilla}
\usecolortheme{beaver}
\title[Data monetization]{Data monetization: pricing user data\\with the Shapley Value}
\author{Thibaut \textsc{Horel}}
\institute[Technicolor]{Work with {\greektext Στρατής \textsc{Ιωαννίδης}}}
\setbeamercovered{transparent}
\AtBeginSection[]
{
\begin{frame}<beamer>
\frametitle{Outline}
\tableofcontents[currentsection]
\end{frame}
}
\begin{document}
\begin{frame}
\maketitle
\end{frame}
\section{Problem}
\begin{frame}{Problem Overview}
\begin{center}
\includegraphics[width=0.8\textwidth]{schema.pdf}
\end{center}
\end{frame}
\begin{frame}{Motivation}
Why is it useful?
\begin{itemize}
\item sell/buy user data
\item compensate users in your database
\end{itemize}
\end{frame}
\begin{frame}{Challenges}
We need to understand:
\begin{itemize}
\item the recommender system
\item the revenue structure
\item the relation between the revenue and the value of the database
\item how to compensate individual users
\end{itemize}
\end{frame}
\section{Shapley value}
\begin{frame}{Individual value}
\begin{itemize}
\item $S$: set of players
\item $S'\subset S$: coalition
\item $V: \mathcal{P}(S)\to \mathbf{R}$: \alert{value} function:
\begin{itemize}
\item $V(\emptyset) = 0$
\item $V(S')$: value of the coalition $S'$
\end{itemize}
\end{itemize}
\alert{Question:} How to split the value of a subset across all its constituents?
\alert{Goal:} $\phi_i(S',V)$: value of player $i$ in $S'$
\end{frame}
\begin{frame}{Solution attempts}
\begin{itemize}
\visible<1->{\item $\phi_i(S',V) = V(\{i\})$?} \visible<2->{``The whole is greater than the sum of its parts''
$$V(S')\neq \sum_{i\in S'} V(\{i\})$$}
\visible<3->{\item $\phi_i(S',V) = V(S') - V(S'\setminus\{i\})$: the \alert{marginal contribution}?} \visible<4->{depends on the time the player joins the coalition}
\end{itemize}
\end{frame}
\begin{frame}{Axioms}
\begin{description}
\item[Efficiency:] No money lost in the splitting process
$$V(S) = \sum_{i\in S} \phi_i(S,V)$$
\pause
\item[Symmetry:] Equal pay for equal contribution
\alert{if} $\forall S'\subset S\setminus\{i,j\}, V(S'\cup\{i\}) = V(S'\cup\{j\})$
\alert{then} $\phi_i(S,V) = \phi_j(S,V)$
\pause
\item[Fairness:] $j$'s contribution to $i$ equals $i$'s contribution to $j$
$$\phi_i(S,V) - \phi_i(S\setminus\{j\}) = \phi_j(S,V)- \phi_j(S\setminus\{i\})$$
\end{description}
\end{frame}
\begin{frame}{Solution: the Shapley Value}
\begin{theorem}[Shapley, 1953]
There is only one function satisfying Efficiency, Symmetry and
Fairness:
$$\phi_i(S,V) = \frac{1}{\vert S\vert!}\sum_{\pi\in\mathfrak{S}(S)}\Delta_i\big(V,S(i,\pi)\big)$$
\end{theorem}
\begin{itemize}
\item $\mathfrak{S}(S)$: set of permutations of $S$
\item $S(i,\pi)$: set of players before $i$ in $\pi$
\item $\Delta_i\big(V,T\big) =
V\big(T\big)-V\big(T\setminus\{i\})$: marginal contribution of $i$
to $T$
\end{itemize}
\end{frame}
\begin{frame}{Properties}
\begin{itemize}
\item
\alert{if} $V$ is superadditive: $V(S\cup T) \geq V(S) + V(T)$
\pause
\alert{then} the Shapley Value is individually rational:
$\phi_i(S,V) > V(\{i\})$
\pause
\item
\alert{if} $V$ is supermodular: $\Delta_i(V,S)\leq\Delta_i(V,T),
\forall S\subset T$
\pause
\alert{then} the Shapley Value lies in the core of
the game:
$$\sum_{i\in S'}\phi_i(S,V) \geq V(S'), \forall S'\subset S$$
\pause
\item Hervé Moulin \& Scott Shenker (1999): Group-strategyproof auction
\end{itemize}
\end{frame}
\section{Example scenario}
\begin{frame}{Setup}
\begin{itemize}
\item database $X$: $n\times p$ matrix
\item $x_i\in \mathbf{R}^p$ $i$-th row of $X$: data of user $i$
\item for each user: $y_i$ variable of interest, could be:
\begin{itemize}
\item already observed
\item about to be observed
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}{Recommender system}
\alert{Goal:} predict the variable of interest $y$ of a new user $x$
\alert{Linear model:} relation between data and variable of interest:
$$y = \langle\beta,x\rangle + \varepsilon, \quad \varepsilon\simeq\mathcal{N}(0,\sigma^2)$$
\alert{Linear regression:} estimator $\tilde{\beta} =
(\tr{X}X)^{-1}\tr{X}Y$
\alert{Prediction:} $\tilde{y} = \langle\tilde{\beta},x\rangle$
\end{frame}
\begin{frame}{Revenue structure}
\alert{Prediction error:} $\mse(X,x) = \expt{(\tilde{y} - y)^2} =
\sigma^2\tr{x}(\tr{X}X)^{-1}x + \sigma^2$
\vspace{1cm}
\begin{block}{Property}
\emph{The prediction error decreases as the size of the database increases}
\end{block}
\vspace{1cm}
\alert{$\Rightarrow$} choose a decreasing function of the prediction
error
\pause
\alert{Problem:} $\mse$ not submodular
\end{frame}
\begin{frame}{Revenue structure (2)}
Exponentially decreasing revenue:
$$V(X,x) = \exp\big(-\mse(X,x)\big)$$
Value of database $X$:
$$V(X) = \int_{x\in\mathbb{R}^d} V(X,x) dx$$
\end{frame}
\begin{frame}{Properties}
\begin{block}{Closed-form expression}
$$V(X) = C\sqrt{\det(\tr{X}X)}$$
\end{block}
\vspace{1cm}
\pause
\begin{block}{Supermodularity}
$$X\mapsto\log\big(V(X)\big)$$ is supermodular
\end{block}
\pause
\alert{$\Rightarrow$} we can apply the Shapley value theory!
\end{frame}
\begin{frame}{Conclusion}
\begin{itemize}
\item general framework to study the pricing problem
\item the case of the linear regression
\end{itemize}
\vspace{1cm}
\pause
Future directions
\begin{itemize}
\item different revenue models
\item fluid Shapley Value (Aumann-Shapley) for a continuum of players
\end{itemize}
\end{frame}
\end{document}
|