diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2012-08-15 18:27:30 -0700 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2012-08-15 18:27:30 -0700 |
| commit | d010cdae55a7e19d78a24e46c1db3058d69347fc (patch) | |
| tree | 52703dde8733e956348484bbca1dada9272b5f9f /slides.tex | |
| parent | 64806a3659109cf595fe3347cc49522c8f536660 (diff) | |
| download | recommendation-d010cdae55a7e19d78a24e46c1db3058d69347fc.tar.gz | |
Repository restructuration (has to be clean now that it is shared ;)
Diffstat (limited to 'slides.tex')
| -rw-r--r-- | slides.tex | 221 |
1 files changed, 0 insertions, 221 deletions
diff --git a/slides.tex b/slides.tex deleted file mode 100644 index ec2e5be..0000000 --- a/slides.tex +++ /dev/null @@ -1,221 +0,0 @@ -\documentclass{beamer} -\usepackage[utf8x]{inputenc} -\usepackage[greek,english]{babel} -\usepackage[LGR,T1]{fontenc} -\usepackage{lmodern} -\usepackage{amsmath} -\usepackage{graphicx} -\usepackage{tikz} -\newcommand{\tr}[1]{#1^*} -\newcommand{\expt}[1]{\mathop{\mathbb{E}}\left[#1\right]} -\newcommand{\mse}{\mathop{\mathrm{MSE}}} -\usetheme{Boadilla} -\usecolortheme{beaver} -\title[Data monetization]{Data monetization: pricing user data\\with the Shapley Value} -\author{Thibaut \textsc{Horel}} -\institute[Technicolor]{Work with {\greektext Στρατής \textsc{Ιωαννίδης}}} -\setbeamercovered{transparent} - -\AtBeginSection[] -{ -\begin{frame}<beamer> -\frametitle{Outline} -\tableofcontents[currentsection] -\end{frame} -} - -\begin{document} - -\begin{frame} -\maketitle -\end{frame} - -\section{Problem} -\begin{frame}{Problem Overview} -\begin{center} -\includegraphics[width=0.8\textwidth]{schema.pdf} -\end{center} -\end{frame} - -\begin{frame}{Motivation} - -Why is it useful? - -\begin{itemize} -\item sell/buy user data -\item compensate users in your database -\end{itemize} - -\end{frame} - -\begin{frame}{Challenges} - -We need to understand: -\begin{itemize} -\item the recommender system -\item the revenue structure -\item the relation between the revenue and the value of the database -\item how to compensate individual users -\end{itemize} -\end{frame} - -\section{Shapley value} - -\begin{frame}{Individual value} - \begin{itemize} - \item $S$: set of players - \item $S'\subset S$: coalition - \item $V: \mathcal{P}(S)\to \mathbf{R}$: \alert{value} function: - \begin{itemize} - \item $V(\emptyset) = 0$ - \item $V(S')$: value of the coalition $S'$ - \end{itemize} - \end{itemize} - - \alert{Question:} How to split the value of a subset across all its constituents? - - \alert{Goal:} $\phi_i(S',V)$: value of player $i$ in $S'$ -\end{frame} - -\begin{frame}{Solution attempts} - \begin{itemize} - \visible<1->{\item $\phi_i(S',V) = V(\{i\})$?} \visible<2->{``The whole is greater than the sum of its parts'' - $$V(S')\neq \sum_{i\in S'} V(\{i\})$$} - \visible<3->{\item $\phi_i(S',V) = V(S') - V(S'\setminus\{i\})$: the \alert{marginal contribution}?} \visible<4->{depends on the time the player joins the coalition} - \end{itemize} -\end{frame} - -\begin{frame}{Axioms} - \begin{description} - \item[Efficiency:] No money lost in the splitting process - $$V(S) = \sum_{i\in S} \phi_i(S,V)$$ -\pause - \item[Symmetry:] Equal pay for equal contribution - - \alert{if} $\forall S'\subset S\setminus\{i,j\}, V(S'\cup\{i\}) = V(S'\cup\{j\})$ - - \alert{then} $\phi_i(S,V) = \phi_j(S,V)$ -\pause - \item[Fairness:] $j$'s contribution to $i$ equals $i$'s contribution to $j$ - $$\phi_i(S,V) - \phi_i(S\setminus\{j\}) = \phi_j(S,V)- \phi_j(S\setminus\{i\})$$ - \end{description} -\end{frame} - -\begin{frame}{Solution: the Shapley Value} - - \begin{theorem}[Shapley, 1953] - There is only one function satisfying Efficiency, Symmetry and - Fairness: - $$\phi_i(S,V) = \frac{1}{\vert S\vert!}\sum_{\pi\in\mathfrak{S}(S)}\Delta_i\big(V,S(i,\pi)\big)$$ - \end{theorem} - \begin{itemize} - \item $\mathfrak{S}(S)$: set of permutations of $S$ - \item $S(i,\pi)$: set of players before $i$ in $\pi$ - \item $\Delta_i\big(V,T\big) = - V\big(T\big)-V\big(T\setminus\{i\})$: marginal contribution of $i$ - to $T$ - \end{itemize} -\end{frame} - -\begin{frame}{Properties} - \begin{itemize} - \item - \alert{if} $V$ is superadditive: $V(S\cup T) \geq V(S) + V(T)$ -\pause - - \alert{then} the Shapley Value is individually rational: - $\phi_i(S,V) > V(\{i\})$ -\pause - \item - \alert{if} $V$ is supermodular: $\Delta_i(V,S)\leq\Delta_i(V,T), - \forall S\subset T$ -\pause - - \alert{then} the Shapley Value lies in the core of - the game: - $$\sum_{i\in S'}\phi_i(S,V) \geq V(S'), \forall S'\subset S$$ -\pause - \item Hervé Moulin \& Scott Shenker (1999): Group-strategyproof auction - \end{itemize} -\end{frame} - -\section{Example scenario} - -\begin{frame}{Setup} - \begin{itemize} - \item database $X$: $n\times p$ matrix - - \item $x_i\in \mathbf{R}^p$ $i$-th row of $X$: data of user $i$ - - \item for each user: $y_i$ variable of interest, could be: - \begin{itemize} - \item already observed - \item about to be observed - \end{itemize} - \end{itemize} -\end{frame} - -\begin{frame}{Recommender system} -\alert{Goal:} predict the variable of interest $y$ of a new user $x$ - -\alert{Linear model:} relation between data and variable of interest: -$$y = \langle\beta,x\rangle + \varepsilon, \quad \varepsilon\simeq\mathcal{N}(0,\sigma^2)$$ - -\alert{Linear regression:} estimator $\tilde{\beta} = -(\tr{X}X)^{-1}\tr{X}Y$ - -\alert{Prediction:} $\tilde{y} = \langle\tilde{\beta},x\rangle$ -\end{frame} - -\begin{frame}{Revenue structure} -\alert{Prediction error:} $\mse(X,x) = \expt{(\tilde{y} - y)^2} = -\sigma^2\tr{x}(\tr{X}X)^{-1}x + \sigma^2$ -\vspace{1cm} -\begin{block}{Property} - \emph{The prediction error decreases as the size of the database increases} -\end{block} -\vspace{1cm} -\alert{$\Rightarrow$} choose a decreasing function of the prediction -error -\pause - -\alert{Problem:} $\mse$ not submodular -\end{frame} - -\begin{frame}{Revenue structure (2)} -Exponentially decreasing revenue: -$$V(X,x) = \exp\big(-\mse(X,x)\big)$$ - -Value of database $X$: -$$V(X) = \int_{x\in\mathbb{R}^d} V(X,x) dx$$ -\end{frame} - -\begin{frame}{Properties} -\begin{block}{Closed-form expression} -$$V(X) = C\sqrt{\det(\tr{X}X)}$$ -\end{block} - -\vspace{1cm} -\pause -\begin{block}{Supermodularity} -$$X\mapsto\log\big(V(X)\big)$$ is supermodular -\end{block} -\pause -\alert{$\Rightarrow$} we can apply the Shapley value theory! -\end{frame} - -\begin{frame}{Conclusion} - -\begin{itemize} -\item general framework to study the pricing problem -\item the case of the linear regression -\end{itemize} -\vspace{1cm} -\pause -Future directions -\begin{itemize} -\item different revenue models -\item fluid Shapley Value (Aumann-Shapley) for a continuum of players -\end{itemize} -\end{frame} -\end{document}
\ No newline at end of file |
