\documentclass[10pt]{article}
\usepackage{fullpage}
\usepackage[utf8x]{inputenc}
\usepackage{amsmath,amsfonts,amsthm}
\usepackage[english]{babel}
\usepackage[capitalize, noabbrev]{cleveref}
\usepackage{algorithm}
\usepackage{algpseudocode}

\newenvironment{enumerate*}%
  {\vspace{-2ex} \begin{enumerate} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{enumerate}}
 
\newenvironment{itemize*}%
  {\vspace{-2ex} \begin{itemize} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{itemize}}
 
\newenvironment{description*}%
  {\vspace{-2ex} \begin{description} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{description}}


\DeclareMathOperator{\E}{\mathbb{E}}
\DeclareMathOperator{\Enc}{\mathrm{Enc}}
\DeclareMathOperator{\Dec}{\mathrm{Dec}}
\DeclareMathOperator{\Ext}{\mathrm{Ext}}
\let\P\relax
\DeclareMathOperator{\P}{\mathbb{P}}
\DeclareMathOperator*{\argmin}{\arg\min}
\newcommand{\ex}[1]{\E\left[#1\right]}
\newcommand{\prob}[1]{\P\left[#1\right]}
\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
\newcommand{\R}{\mathbf{R}}
\newcommand{\poly}{\text{poly}}
\newcommand{\N}{\mathbf{N}}
\newcommand{\C}{\mathcal{C}}
\newcommand{\eps}{\varepsilon}
\newcommand{\cl}[1]{\text{\textbf{#1}}}
\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}}
\newcommand{\llbracket}{[\![}


\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\algrenewcommand\algorithmicensure{\textbf{Output:}}
\algrenewcommand\algorithmicrequire{\textbf{Input:}}

\author{Thibaut Horel}
\title{CS 225 Problem Set 5 -- Solutions}

\begin{document}
\maketitle

\section*{Problem 6.1}

\paragraph{1.}
Let us denote by $\mathcal{U}$ the universe on which $X$ and $Y$
are defined, and define $p_X(i) = \P[X=i]$ for $i\in \mathcal{U}$ (and
similarly for $Y$). Let us partition $\mathcal{U} = S_X\cup S_Y$ where $S_X
= \{i\in\mathcal{U}, p_X(i) > p_Y(i)\}$ (and $S_Y$ is the complement of $S_X$).

For a $[0,1]$ valued function $f$ defined over $\mathcal{U}$, we have:
\begin{displaymath}
    \big | \E[f(X)] - \E[f(Y)]\big|
    = \left|\sum_{i\in S_X} \big(p_X(i) - p_Y(i)\big) f(i)
    + \sum_{i\in S_Y} \big(p_X(i) - p_Y(i)\big) f(i)\right|
\end{displaymath}
notice that the terms in the first sum are positive and the terms in the second
sum are negative. Hence, if we want to maximize the above expression over all
$f$s, we should either set $f(i) = 1$ for all $i\in S_X$ and $0$ elsewhere, or
vice versa. In the first case we would obtain a value:
\begin{displaymath}
    V_1\eqdef \sum_{i\in S_X} \big(p_X(i) - p_Y(i)\big)
\end{displaymath}
in the second case:
\begin{displaymath}
    V_2\eqdef \sum_{i\in S_Y} \big(p_Y(i) - p_X(i)\big)
\end{displaymath}
Using that $p_X$ and $p_Y$ are probability distributions, we observe that
$V_1 = V_2$ (indeed, this is equivalent to $1=1$ after re-ordering the terms).
Hence:
\begin{equation}\label{eq:foo}
    \max_f\big | \E[f(X)] - \E[f(Y)]\big| = 
    \sum_{i\in S_X} \big(p_X(i) - p_Y(i)\big)=
    \sum_{i\in S_Y} \big(p_Y(i) - p_X(i)\big)
\end{equation}

We have now showed:
\begin{displaymath}
    \Delta(X, Y) = \max_f\big | \E[f(X)] - \E[f(Y)]\big|
\end{displaymath}
Indeed, we clearly have $\Delta(X, Y) \leq \max_f\big | \E[f(X)]
- \E[f(Y)]\big|$, since $\Delta(X, Y)$ is the specific case where $f$ is
a $\{0,1\}$-function, but the proof above shows that the inequality is in fact
an equality since the maximum for $f$ is reached for a $\{0,1\}$-function.

Finally, we observe that:
\begin{displaymath}
    \frac{1}{2}|X-Y|_1 = \frac{1}{2}\sum_{i\in\mathcal{U}}|p_X(i)-p_Y(i)|
    =\frac{1}{2}\sum_{i\in S_X}\big(p_X(i)-p_Y(i)\big)
    + \frac{1}{2}\sum_{i\in S_Y}\big(p_Y(i)-p_X(i)\big)
    =\frac{V_1}{2}+\frac{V_2}{2}
\end{displaymath}
Using \eqref{eq:foo}, this implies:
\begin{displaymath}
    \frac{1}{2}|X-Y|_1 = \max_f\big | \E[f(X)] - \E[f(Y)]\big|
\end{displaymath}
and concludes the proof of this part.

\paragraph{2.} Let us consider a $w$ such that $X_w \eqdef X|_{W=w}$ is not
a $k-l-\log\frac{1}{\eps}$ source. Then this implies that there exists an $x$
such that:
\begin{displaymath}
    \P[X_w=x]>\frac{1}{2^{k-l-\log\frac{1}{\eps}}}
\end{displaymath}
But we have:
\begin{displaymath}
    \P[X_w=x] = \frac{\P[X=x\wedge W=w]}{\P[W=w]}\leq
    \frac{1}{2^k}\frac{1}{\P[W=w]}
\end{displaymath}
where the inequality uses that $(W,X)$ is a $k$ source. The above two
inequalities together imply:
\begin{displaymath}
    \P[W=w]\leq \frac{\eps}{2^l}
\end{displaymath}
Denoting by $B$ the $w$s such that $X_w$ is not a $k-l-\log\frac{1}{\eps}$
source, we obtain:
\begin{displaymath}
    \P[W\in B] = \sum_{w\in B}\P[W=w]\leq |B|\frac{\eps}{2^l}\leq \eps
\end{displaymath}
since $|B|\leq 2^l$. This concludes the proof.

\paragraph{3.} This is a consequence of \textbf{2.}. Let us denote by $B$ the
set of $x_1\in\{0,1\}^{n_1}$ such that $X_2|_{X_1=x_1}$ is not
a $n_2-\Delta-\log\frac{1}{\eps}$ source. Define $X_2'$ to be the following
variable:
\begin{displaymath}
    X_2' = \begin{cases}
        U_{n_2}&\text{if $X_1\in B$}\\
        X_2&\text{otherwise}
    \end{cases}
\end{displaymath}
$X_1$ is a $n_1-\Delta$ source. Indeed:
\begin{displaymath}
    \P[X_1=x_1] = \sum_{x_2} \P[X_1=x_1\wedge X_2 = x_2]\leq
    2^{n_2}\frac{1}{2^{n-\Delta}} = \frac{1}{2^{n_1-\Delta}}
\end{displaymath}
where the inequality used that $X$ is a $n-\Delta$ source. For all $x_1$,
$X_2'|_{X_1=x_1}$ is a $n_2-\Delta-\log\frac{1}{\eps}$ source: either $x_1\notin
B$ and this is by definition, or $x_1\in B$ and then $X_2'|_{X_1=x_i}$ is
uniform, i.e a $n_2$ source). Hence, $(X_1,X_2')$ is a $(n_1-\Delta,
n_2-\Delta-\log\frac{1}{\eps})$ block source. Finally:
\begin{displaymath}
    \Delta\big((X_1,X_2),(X_1,X_2')\big) \leq \P[X_1\in B]\leq \eps
\end{displaymath}
where the last inequality used \textbf{2.}. $(X_1,X_2')$ satisfies all
the requirements.

\section*{Problem 6.5}

\paragraph{1.} I use the expander walk averaging sampler of Theorem 4.23. This
sampler uses $m+ \frac{C_1}{\eps^2}\log\frac{1}{\delta\eps}$ random bits for an
output of length $m$ and uses $\frac{C_2}{\eps^2}\log\frac{1}{\delta}$ samples.

By Corollary 6.24, this implies a $(k, \eps)$ extractor for $k=
n+\log\delta+\log\frac{1}{\eps}$.

We want seed length $d\leq \log n$. Hence we have to set $\delta$ high enough
such that:
\begin{displaymath}
    \log\frac{1}{\delta}\leq \frac{n\eps^2}{C_2}
\end{displaymath}

We want $m\geq (1-\alpha)n$. This is equivalent to:
\begin{displaymath}
    \alpha m\geq (1-\alpha)\frac{C_1}{\eps^2}\log\frac{1}{\delta\eps}
\end{displaymath}
again, this can be achieved by setting $\delta$ high enough (we will have to
take $\delta$ to be the max of this value and the value obtained above).

Finally we need to verify that $k$ is $\beta n$ for some $\beta<1$. But we have:
\begin{displaymath}
    k = n + \log \delta + \log\frac{1}{\eps} \geq n -\frac{n\eps^2}{C_2}
    = n(1-\eps^2/C_2)
\end{displaymath}
which concludes the proof.

\paragraph{3.} Using Corollary 6.24, the extractor of Theorem 6.36 implies the
existence of a $(K/B,\eps)$ averaging sampler using $b$ random bits and
$\text{poly}(b/\eps)$ samples provided that $k\leq 2m$ (where $m$ is the bit
length of the samples).

Assuming w.l.o.g. that the success probability of the algorithm $A$ for our
$\cl{BPP}$ problem is $\frac{2}{3}$, choosing $\eps=\frac{1}{6}$ and
$K/B=\frac{1}{2^l}$ and averaging the output of $A$ on all $\text{poly}(b)$
samples of the sampler (we fix the input $x$ of $A$ and use the samples as
random bits), the error probability is now at most $\frac{1}{2^l}$.

We need to set $m = r$. Setting $k = 2m$, this implies $b = k + l = O(r) + l$.
The number of samples used is $\text{poly}(b) = \text{poly}(r, l)$. Since $r$
and $l$ are polynomial in $n$, the resulting algorithm is still polynomial in
$n$.

\paragraph{} Starting from an algorithm $A$ with probability of success $2/3$
and using $r = r(n)$ random bits, the first part of this question shows that
using $q = Cr +l$ random bits (for some constant $C$) implies an error
probability of $2^{-l}$. Setting $l = (Cr)^{100}$, we have $2^{-l}2^q
= 2^{Cr}\leq 2^{q^{0.01}}$, i.e, the algorithm errs on at most $2^q^{0.01}$
choices of its $q$ random bits. The running time is still polynomial since $l$
is polynomial in $r$.

\section*{Problem 6.9}

We first condense the source using Theorem 6.34, yielding a source of length
$(1+\alpha)k +O(\log \frac{n}{\eps_1})$ which is $\eps_1$ close to
a $k+O(\log\frac{n}{\eps_1})$ source using a seed of length $O(\log
\frac{n}{\eps_1})$. We will set $\eps_1$ and $\alpha$ later.

We now partition the condensed source in $t$ blocks of size $k/t
+ O(\log\frac{n}{\eps_1})$ (from now on, the $O$ will hide $t$ terms, but we do
not care since $t$ is constant). Applying Lemma 6.31 $t$ times (we proved it in
Problem 6.1), the resulting source is $\eps_1 + t\eps_1$ close to a $t\times
k_1$ source, with:
\begin{displaymath}
    k_1 = \frac{1+\alpha}{t}k - \alpha k + O\left(\log \frac{n}{\eps_1}\right)
\end{displaymath}
note that for this to work, we will need $\alpha < \frac{1}{t}$, this will be
satisfied when we set $\alpha$ later.

Using Theorem 6.18, we know that there exists an extractor with seed length $n$
(for input length $n$) which is a $(k, \eps)$ extractor with $m
= k + n - O(\log\frac{1}{\eps})$.

We use this extractor in Lemma 6.28 to do block extraction on the block source
constructed above (using the same extractor for each block). The number of
random bits we need for this is $\frac{k}{t} + O(\log \frac{n}{\eps_1})$. The
number of extracted bits is:
\begin{displaymath}
    m = t\times\big(k_1 - \log\frac{1}{\eps_1}\big)\geq k-\alpha
    k t + O\left(\log\frac{n}{\eps_1}\right)
\end{displaymath}
If we choose $\alpha = \frac{1}{2t}$, we will have $m\geq \frac{k}{2}$ as
required (this also implies $\alpha< \frac{1}{t}$ as was needed above).

The resulting source will be $\eps_1 + t\eps_1 + t\eps_1$ close to a uniform
source. This implies that we need to set $\eps_1 = \frac{\eps}{1+2t}$.

Finally the number of bits needed is $O(\log\frac{n}{\eps_1})$ for the
condenser, and $\frac{k}{t} + O(\log\frac{n}{\eps_1})$ to bootstrap the block
extraction. Given our choice of $\eps_1$, this is $\frac{k}{t}
+ O(\log\frac{n}{\eps})$ as required.

\section*{Problem 6.10}

\paragraph{1.} I define the following encryption scheme with $n=m=l$:
\begin{displaymath}
    \Enc(k, u) = \Ext(k)\oplus u,\quad
    \Dec(k, e) = \Ext(k)\oplus e
\end{displaymath}
It is clear that for any message $u$, we have $\Dec(k, \Enc(k, u)) = k$.
Furthermore, by definition of $\Ext$, for any $K\in\mathcal{C}$, $\Ext(K)$ is
$\eps$ close to uniform over $\{0,1\}^m$, hence for any pair of messages
$(u,v)$:
\begin{align*}
    \Delta(\Enc(K,u),\Enc(K,v)) &= \Delta(\Ext(K)\oplus u, \Ext(K)\oplus v)
    \leq \Delta(\Ext(K)\oplus u, U_m) + \Delta(\Ext(K)\oplus v, U_m)\\
    &\leq \Delta(\Ext(K), U_m\oplus u) + \Delta(\Ext(K), U_m\oplus v)\leq 2\eps
\end{align*}
Where we used the triangle inequality (and the fact that $\Delta$ can only
decrease by applying the same function to both its arguments) and that
$U_m\oplus u$ is still uniform for any $u$.

\paragraph{2. (a)} I use the probabilistic method similarly to the proof of
Proposition 6.12 (there might be a way to reduce to it directly, but I haven't
found how). Define $m'
= m - 2\log\frac{1}{\eps} - c_1$ (for some constant $c_1$), and consider a totally random function $f$
\begin{displaymath}
    \P[f\text{ is not an $\eps$-extractor}] = \P_f[\exists (k, T) \text{ s.t.}
    \P_{U_m}[f(\Enc(k,U_m)\in T] - \P_{U_{m'}}[U_m'\in T]>\eps]
\end{displaymath}
We fix $k$ and $T$ (and we will union bound later). We have:
\begin{displaymath}
    \P_f[\P_{U_m}[f(\Enc(k,U_m)\in T] - \P_{U_{m'}}[U_m'\in T]>\eps] =
    \P_f[\P_{U_m}[f(\Enc(k,U_m)\in T]-\mu>\eps]
\end{displaymath}
where I defined $\mu = \frac{T}{2^{m'}}$. For fixed $u\in\{0,1\}^m$,
$f(\Enc(k, u))$ is a uniform variable over $\{0,1\}^{m'}$ (since $f$ is
totally random), hence:
\begin{displaymath}
    \P_f[\P_{U_m}[f(\Enc(k,U_m)\in T]-\mu>\eps]
    = \P\left[\frac{1}{2^m}\sum_{u\in\{0,1\}^m} Y_u - \mu>\eps\right]
\end{displaymath}
where the $Y_u$ are independent Bernouilli variables of parameter $\mu$. Using
the Chernoff bound, we get that the above probability is bounded by
$\exp(-2^m\eps^2/4)$. Taking a union bound over $k$ and $T$, the probability
that $f$ is not an $\eps$-extractor is bounded above by
$2^{m'}2^n\exp(-2^m\eps^2/4)$.

Using that $m' = m - 2\log \frac{1}{\eps} - c_1$, we can upper bound the
probability $p$ computed above:
\begin{displaymath}
    p\eqdef 2^{m'}2^n\exp(-2^m\eps^2/4)
    = \frac{2^m\eps^2}{c_2}2^n\exp(-2^m\eps^2/4)
\end{displaymath}
where $c_2= 2^{c_1}$. Using $x\leq e^x$, we  have:
\begin{displaymath}
    p\leq \exp(n\log 2 - 2^m\eps^2(\frac{1}{c_2}-\frac{1}{4}))
\end{displaymath}
Finally, using that $2^m\geq c_3\frac{n}{\eps^2}$, it is clear that we can find
a setting of the constants such that the argument of the $\exp$ function is
negative, i.e $p<1$. This implies the existence of a function $f$ which is
a deterministic $\eps$-extractor.

\paragraph{2. (b)} Still denoting by $m'$ the output length of $\Ext'$, we want
to upper bound, for $K\in\mathcal{C}$:
\begin{align*}
    \Delta(\Ext'(\Enc(K, 0^m)), U_{m'})&\leq
    \Delta(\Ext'(\Enc(K, 0^m)), \Ext'(\Enc(K, U_m)))
    + \Delta(\Ext'(\Enc(K, U_m)), U_{m'})\\
    &\leq \eps +\Delta(\Ext'(\Enc(K, U_m)), U_{m'})\\
    &\leq 2\eps
\end{align*}
where the first inequality used the triangle inequality, the second inequality
averages on the different taken by $U_m$ in the first term: for all $u\in\{0,1\}^m$:
\begin{displaymath}
\Delta(\Ext'(\Enc(K, 0^m)), \Ext'(\Enc(K, u)))
\leq \Delta(\Enc(K, 0^m), \Enc(K, u))\leq \eps
\end{displaymath}
using that $\Enc$ is $\eps$-secure. Finally, the last inequality averages on
the different values $k$  taken by $K$ in the second term: for each $k$ the
term is smaller than $\eps$ using that $\Ext'$ is an $\eps$-extractor. This is
still true after averaging over all values taken by $K$.

\end{document}