\documentclass[10pt]{article}
\usepackage{fullpage}
\usepackage{amsmath,amsfonts,amsthm}
\usepackage[english]{babel}
\usepackage[capitalize, noabbrev]{cleveref}
\usepackage{paralist} 

% these are compressed lists to help fit into a 1 page limit
\newenvironment{enumerate*}%
  {\vspace{-2ex} \begin{enumerate} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{enumerate}}
 
\newenvironment{itemize*}%
  {\vspace{-2ex} \begin{itemize} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{itemize}}
 
\newenvironment{description*}%
  {\vspace{-2ex} \begin{description} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{description}}

\DeclareMathOperator{\E}{\mathbb{E}}
\let\Pr\relax
\DeclareMathOperator{\Pr}{\mathbb{P}}
\newcommand{\ex}[2][]{\E_{#1}\left[#2\right]}
\newcommand{\prob}[2][]{\Pr_{#1}\left[#2\right]}

\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
\newcommand{\R}{\mathbb{R}}
\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}}
\newcommand{\llbracket}{[\![}

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}

\author{Thibaut Horel \and Paul Tylkin}
\title{Economics 2099 Problem Set 2 -- Solutions}

\begin{document}

\maketitle
\section*{Exercise 4.13}

\section*{Exercise 4.14*}

\section*{Exercise 4.19}

\section*{Additional Problem}

We prove that $\ex[\mathbf{F}]{\max_i
v_i}\leq\frac{e}{e-1}\ex[\mathbf{F}^I]{\max_i v_i}$. We first prove it for
finite support distributions by reduction to Lemma 4.13 in the lecture notes.
We then extend the result to general distributions using a density argument
(general distributions can be approximated arbitrarily well by finite support
distributions).

\paragraph{Finite support distributions}
Let us first assume that $\mathbf{F}$ has a finite support. We write $A_i$ the
set of atoms of $\mathbf{F}_i$ (the $i$-th marginal of $\mathbf{F}$) so that
both $\mathbf{F}$ and $\mathbf{F}^I$ define a probability distribution over
$A_1\times\ldots\times A_n$. We now define $A$ to be the disjoint union of
$A_1$ to $A_n$:
\begin{displaymath}
    A = \coprod_{i=1}^n A_i
    = \big\{ (a, i)\, |\, x\in A_i,\; 1\leq i \leq n\big\}
\end{displaymath}

Note that there is a canonical injection from $A_1\times\ldots\times A_n$ to
$\mathcal{P}(A)$:
\begin{displaymath}
    (a_1,\ldots, a_n)\mapsto \big\{ (a_1, 1),\ldots, (a_n, n)\big\}
\end{displaymath}
so that $\mathbf{F}$ and $\mathbf{F}^I$ naturally define a probability
distribution over subsets of $A$:
\begin{displaymath}
    \Pr^A_{\mathbf{F}}\big( \{(a_1, i_1),\ldots,(a_k, i_k)\}\big) = 
    \begin{cases}
        \Pr_{\mathbf{F}} (a_1,\ldots, a_k)&\text{if } \{i_1,\ldots,i_k\}
    = \{1,\ldots, n\}\\
    0&\text{otherwise}
    \end{cases}
\end{displaymath}
and similarly for $\Pr^A_{\mathbf{F^I}}$. That is, the only subsets of $A$ which
have a non-zero probability are the ones which ``correspond'' to a $n$-tuple in
$A_1\times\ldots\times A_n$ (they are in the image of the canonical injection
given above).

Similarly, the function $\max_i v_i$ defined over $A_1\times\ldots\times A_n$
can be extended to subsets of $A$ by:
\begin{displaymath}
    g(\{(a_1, i_1),\ldots, (a_k, i_k)\}) \eqdef \max\{a_1,\ldots, a_k\}
\end{displaymath}

The consequence of this construction\footnote{This construction might appear
    a bit technical. What we would naturally do is to consider the set of all
    atoms across all coordinates, but this would not work in cases where the
    same atom can appear at different coordinates. This is why we have to
    consider the disjoin union, with ``independent'' copies of the atoms.} is
    the following key property:
\begin{displaymath}
    \E_{S\sim\Pr_{\mathbf{F}}^A}[g(S)] = \E_{\mathbf{F}}[\max_i v_i]
\end{displaymath}
and similarly for $\mathbf{F}^I$. Now, we note that $g$ is
a maximum-weight-element set function, where the weight of $(a, i)\in A$ is
simply $a$. Hence we can apply Lemma 4.13 to $g$ for subsets of $A$ and obtain:
\begin{displaymath}
    \E_{\mathbf{F}}[\max_i v_i] = 
    \E_{S\sim\Pr_{\mathbf{F}}^A}[g(S)]
    \geq \frac{e}{e-1}
    \E_{S\sim\Pr_{\mathbf{F^I}}^A}[g(S)]=
    \E_{\mathbf{F^I}}[\max_i v_i]
\end{displaymath}

\paragraph{General distributions} We first look at a continuous distribution
$\mathbf{F}$ with bounded support. On the support of $\mathbf{F}$, it is
possible to approximate its CDF arbitrarily well in $L^\infty$ norm (hence in
$L^2$ norm since we are on a bounded set) by simple functions (convex
combination of step functions). But a step function is simply the CDF of
a dirac distribution. Hence, we can approximate $\mathbf{F}$ arbitrarily well
in $L^2$ norm with finite support distributions. But convergence in $L^2$ norm
implies weak convergence by Cauchy-Schwarz inequality. That is, we have
a sequence $F_n$ of finite support distribution such that:
\begin{displaymath}
    \E_{F_n}[\max_i v_i] \xrightarrow[n\rightarrow \infty]{} \E_F[\max_i v_i]
\end{displaymath}
we can apply the previous result for each of the $F_n$ and take the limit to
obtain again:
\begin{displaymath}
    \E_{\mathbf{F}}[\max_i v_i] \geq \E_{\mathbf{F^I}}[\max_i v_i]
\end{displaymath}

When $\mathbf{F}$ does not have bounded support, we can truncate $\mathbf{F}$
by considering its restriction to an increasing sequence of bounded rectangles:
$K_n = \prod_{i=1}^n [-n, n]$. We can apply the previous result to each of the
truncated distributions and again take the limit as $n\rightarrow\infty$. The
validity of this last limit will depend on the exact technical assumptions we
make on $\mathbf{F}$ (basically we need $\mathbf{F}$ to be quickly decreasing
outside of bounded intervals).

More generally, the previous approach will apply as long as we are considering
a distribution $\mathbf{F}$ in a space of distributions for which finite
support distributions are dense for the weak topology. There are various
assumptions with various level of technicality which imply such a density. We
can choose to apply one or the other based on the assumptions we have on
$\mathbf{F}$.

\end{document}