\documentclass[10pt]{article}
\usepackage{fullpage}
\usepackage[utf8x]{inputenc}
\usepackage{amsmath,amsfonts,amsthm}
\usepackage[english]{babel}
\usepackage[capitalize, noabbrev]{cleveref}
\usepackage{algorithm}
\usepackage{algpseudocode}

\newenvironment{enumerate*}%
  {\vspace{-2ex} \begin{enumerate} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{enumerate}}
 
\newenvironment{itemize*}%
  {\vspace{-2ex} \begin{itemize} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{itemize}}
 
\newenvironment{description*}%
  {\vspace{-2ex} \begin{description} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{description}}


\DeclareMathOperator{\E}{\mathbb{E}}
\DeclareMathOperator{\Enc}{\mathrm{Enc}}
\DeclareMathOperator{\Dec}{\mathrm{Dec}}
\DeclareMathOperator{\Ext}{\mathrm{Ext}}
\DeclareMathOperator{\List}{\mathrm{LIST}}
\let\P\relax
\DeclareMathOperator{\P}{\mathbb{P}}
\DeclareMathOperator*{\argmin}{\arg\min}
\newcommand{\ex}[1]{\E\left[#1\right]}
\newcommand{\prob}[1]{\P\left[#1\right]}
\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
\newcommand{\R}{\mathbf{R}}
\newcommand{\F}{\mathbb{F}}
\newcommand{\poly}{\text{poly}}
\newcommand{\N}{\mathbf{N}}
\newcommand{\C}{\mathcal{C}}
\newcommand{\eps}{\varepsilon}
\newcommand{\cl}[1]{\text{\textbf{#1}}}
\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}}
\newcommand{\llbracket}{[\![}

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\algrenewcommand\algorithmicensure{\textbf{Output:}}
\algrenewcommand\algorithmicrequire{\textbf{Input:}}

\author{Thibaut Horel}
\title{CS 225 Take-Home Final -- Solutions}

\begin{document}
\maketitle

\section*{Problem 2.11}

\paragraph{2.} At a high level, we will use that $\cl{prBPP}=\cl{prP}$ to guess
a good sequence of random bits for algorithm $A$. The good sequence of random
bits will be guessed bit by bit by formulating at each step a promise problem
whose instances are pairs $(x, r)$ where $r$ is the prefix of the random
sequence constructed so far (as suggested by the hint).

Let us denote $A(x, r)$ the output of algorithm $A$ for input $x$ and random
bits $r$, $r\in\{0,1\}^{q(n)}$ where $q(n)$ is some polynomial in $n$. The
guarantee I have about $A$ is:
\begin{displaymath}
    \P_r[V(x, A(x, r)) = 1]\geq \frac{2}{3}
\end{displaymath}

I will inductively construct a sequence $r_k$ for $1\leq k\leq q(n)$ such that:
\begin{equation}
    \label{eq:foo}
    \P_{t_k}[V(x, A(x, r_k\cdot t_k)) = 1] \geq \frac{2}{3}
\end{equation}
where $t_k$ is uniformly distributed over $\{0,1\}^{q(n) - k}$. In particular
for $k=q(n)$ there will be no random choices left and I will have $V(x, A(x,
r_{q(n)})) = 1$.

Let us assume that $r_k$ has been constructed for a given $k$. Then I define
the following promise problem:
\begin{displaymath}
    \Pi_Y = \left\{ (x, r_k)\,|\, \P_{t_k}[V(x, A(x, r_k\cdot 0\cdot t_k)) = 1] \geq
    \frac{2}{3}\right\}
\end{displaymath}
\begin{displaymath}
    \Pi_N = \left\{ (x, r_k)\,|\, \P_{t_k}[V(x, A(x, r_k\cdot 0\cdot t_k)) = 0] \geq
    \frac{2}{3}\right\}
\end{displaymath}
where $t_k$ denotes a uniform random variable over $\{0,1\}^{q(n) - k -1}$.
Clearly, $(\Pi_Y, \Pi_N)$ is a $\cl{prBPP}$ problem: the classes are disjoint,
and given $(x, r_k)$ a $\cl{prBPP}$ algorithm simply chooses $t_k$ uniformly at
random and outputs “accept” or “reject” depending on whether or not $V(x, A(x,
r_k\cdot 0\cdot t_k)) = 1$ (this is poly time since $A$ and $V$ are poly time).

Since $\cl{prBPP} = \cl{prP}$, I can decide in deterministic polynomial time
whether or not a given $(x, r_k)$ belongs to $\Pi_Y$ or $\Pi_N$. For the $r_k$
constructed inductively at step $k$ and satisfying \eqref{eq:foo}:
\begin{itemize}
    \item either $(x, r_k)\in \Pi_Y$, in which case I define $r_{k+1}
        = r_k\cdot 0$. By definition of $\Pi_Y$, $r_{k+1}$ satisfies the
        inductive property \eqref{eq:foo} for $k+1$.
    \item or $(x, r_k)\in \Pi_N$. Then I define $r_{k+1} = r_k \cdot 1$. Since
        $r_k$ satisfies \eqref{eq:foo} and $(x, r_k)\notin \Pi_Y$, a simple
        conditioning argument (on the first bit of $t_k$ in \eqref{eq:foo})
        shows that $r_{k+1}$ satisfies \eqref{eq:foo} at step $k+1$.
\end{itemize}
I have just shown how to construct $r_k$ inductively in deterministic
polynomial time. Since there are $q(n)$ steps, the overall running time to
construct $r_{q(n)}$ is still polynomial. As noted above, once $r_{q(n)}$ is
constructed, the (deterministic) algorithm for the NP search problem simply
outputs $A(x, r_{q(n)})$.

\paragraph{4.}First, if $\cl{prBPP} = \cl{prP}$, then in particular $\cl{BPP}
= \cl{P}$. In particular since \textsc{Primality} is in $\cl{BPP}$ I will
henceforth assume that I have a deterministic polynomial algorithm for
\textsc{Primality} (alternatively we now know that \textsc{Primality} is in
$\cl{P}$ since AKS).

I define the following NP search problem. Given input $x\in\{0, 1\}^n$ a valid
solution is $y\in\{0, 1\}^{n+1}$ such that $y$ is a prime number in the
interval $[x, 2x)$. Since \textsc{Primality} is in $\cl{P}$, there is
    a deterministic polynomial time verifier $V(x, y)$ for this NP search
    problem: given input $(x, y)$, output 1 if $y$ is prime and $x\leq y< 2x$,
    0 otherwise.

Using the prime number theorem, I claim that this NP search problem can be
solved in probabilistic polynomial time. Let us denote by $\rho(x)$ the number
of prime numbers in the interval $[x, 2x)$:
\begin{displaymath}
    \rho(x) = \pi(2x) - \pi(x) = \frac{2x}{\log 2x} + o\left(\frac{x}{\log
    2x}\right)  - \frac{x}{\log x} + o\left(\frac{x}{\log x}\right)\sim
    \frac{x}{\log x}
\end{displaymath}
As a consequence, for $x$ large enough, the number of prime numbers in the
interval $[x, 2x)$ is larger than $\frac{2}{3}\frac{x}{\log x}.$\footnote{This
        result could be obtained with something much weaker and easier to prove
        than the prime number theorem: as soon as we have sufficiently good
    bounds on $\pi(x)$ we can obtain a sufficiently good bound on the number of
    prime numbers in $[x, 2x)$. Proving reasonably good bounds on $\pi(x)$ is
    not too hard.} The probabilistic algorithm for the NP search problem now
    works as follows: pick a random number in $[x, 2x)$ and output it. By the
        computation performed above, for $x$ large enough, the probability of
        success of this algorithm is $\frac{2}{3}\frac{1}{\log x}$. By
        repeating this algorithm $\log x = n$ times and using a union bound, we
        amplify the success probability to $\frac{2}{3}$. The running time will
        be polynomial in $n$ (which is the bit length of $x$).

I now use part \textbf{2.} to derandomize this probabilistic algorithm for the
NP search problem and obtain a deterministic polynomial time algorithm that
finds a number in $[N, 2N)$ for $N$ large enough.

\section*{Problem 6.4}

\paragraph{1.} Let us assume that $\Ext$ is a $(k, \eps)$ Rényi extractor, and
consider a $k$-source $X$. Since $H_{\infty}(X) \geq k$, then in particular
$H_2(X)\geq k$. By Rényi extraction property, $H_2(\Ext(X, U_d))\geq m-\eps$.
We want to use this to bound $\Delta(\Ext(X, U_d), U_m)$:
\begin{align*}
    \Delta(\Ext(X, U_d), U_m)^2 &= \frac{1}{4}\|\Ext(X, U_d) - U_m\|_1^2
    \leq \frac{m}{4}\|\Ext(X, U_d) - U_m\|_2^2\leq
    \frac{m}{4}\left(\|\Ext(X, U_d)\|_2^2 - \frac{1}{m}\right)\\
    &\leq \frac{m}{4}\left(\frac{1}{1+m-\eps} - \frac{1}{m}\right)\leq \eps
\end{align*}
where we used basic algebra and properties of the $\ell_2$ and $\ell_1$ norms.
The penultimate inequality used:
\begin{displaymath}
    \|\Ext(X, U_d)\|_2^2 = 2^{-H_2(\Ext(X, U_d))}\leq \frac{1}{1+m-\eps}
\end{displaymath}
because of the Rényi extraction property and $2^{-x}\leq \frac{1}{1+x}$.

We obtained $\Delta(\Ext(X, U_d), U_m)\leq \sqrt{\eps}$ for an arbitrary
$k$-source $X$, which is exactly the definition of a $(k,\sqrt{\eps})$
extractor.

\paragraph{2.}We know that there exists an almost pairwise independent family
with $d = O(m+\log\frac{n}{\eps})$ random bits. A proof similar to the one of
Theorem 6.18 shows that by defining $\Ext(x, h) = (h, h(x))$, then if $X$ has
Rényi entropy at last $k$, then:
\begin{displaymath}
    CP(H, H(X))\leq \frac{1}{D}\left(\frac{1}{K}+\frac{1+\eps}{M}\right)
\end{displaymath}
$\cdots$
\paragraph{3.}$\cdots$

\section*{Problem 7.7}

I will focus on obtaining a $(1/2-\eps)$ local correcting algorithm running in
time $\poly(m, q)$. This is sufficient by Lemma 7.39 and the systematic
encoding of Reed-Muller codes of Lemma 7.48.

Following a question made by Alex in class and answered by Salil, I will follow
the same approach as Algorithm 7.45 but use a degree 2 curve instead of a line.
Given an input $x\in \F^m$ and oracle $g:\F^m\to\F$ which agrees with some
polynomial $p$ of degree $d$ on a fraction $1/2-\eps$ of the points:
\begin{enumerate}
    \item choose $y = (y_1, y_2)\in (\F^m)^2$ uniformly at random and define
        the curve $C_y = \{x + t y_1 +t^2 y_2\,|\, t\in \F\}$. We will write
        $c_y(t) = x+ty_1+t^2y_2$ such that $C_y = \{c_y(t)\,|\,t\in \F\}$.
    \item query $g$ on the $q$ points of $C_y$ to obtain $g_y:\F\to\F$
    \item run the $q$-ary Reed-Solomon list-decoding algorithm with parameter
        $\eta$ (the setting of the parameters is explained below) on $g_y$
        to obtain a polynomial $q$
    \item output $q(0)$
\end{enumerate}

First note, $g_y$ is a univariate polynomial on $\F$ of degree $2d$. In step
3., we need to make sure that the parameter $\eta$ with which we
  call the Reed-Solomon list-decoding algorithm is such that:
\begin{itemize}
    \item $\eta\leq 1-2\sqrt{\frac{2d}{q}}$ so that Reed-Solomon list-decoding
    is possible
    \item $\eta \leq 1- \frac{2d}{q}$, half the minimum distance of the Reed-Solomon
        code of degree $2d$, such that there is a unique polynomial in the
        decoded list.
\item with probability at most $\frac{1}{3}$, $g_y$ disagrees
        with $p_y$ on a fraction more than $\eta$ of the points in $\F$, such
        that the unique polynomial in the decoded list is $p_y$ with
        probability at least $\frac{2}{3}$.
\end{itemize}

To see that finding such an $\eta$ is possible we need to understand the
quantity $P\eqdef\P_y[d(g_y, p_y)\geq \eta]$. We write:
\begin{displaymath}
    P = \P_y\left[\frac{1}{q}\sum_{t\in \F} X_y^t\geq \eta\right]
\end{displaymath}
where we defined $X_y^t = I[g(c_y(t)) \neq p(c_y(t))]$ with $I[E]$ the
indicator variable of event $E$. It is easy to see that for a fixed $t$ and
a random choice of $y$, $c_y(t)$ is uniformly distributed in $\F$, so that
$X_y^t$ is a Bernouilli variable of parameter $\frac{1}{2}-\eps$.

Furthermore, I claim that $(X^t_y)_{t\in\F}$ is a pairwise independent family.
Indeed for $t\neq u$:
\begin{displaymath}
    \P_y[X^t_y = 1\text{ and }X^u_y = 1]
    = \sum_{(z_1,z_2)\in \F^2} \P_y[c(t) = z_1\text{ and } c(u) = z_2]
    I[g(z_1)\neq p(z_1)]I[g(z_2)\neq p(z_2)] = (1/2-\eps)^2
\end{displaymath}
This is because there are exactly $q^2(1/2-\eps)^2$ pairs $z_1, z_2$ such that 
$I[g(z_1)\neq p(z_1)]I[g(z_2)\neq p(z_2)]$. And by a standard polynomial
interpolation argument, for any pair $z_1,z_2$:
\begin{displaymath}
    \P_y[c(t) = z_1\text{ and } c(u) = z_2] =\frac{1}{q^2}
\end{displaymath}
We could have a similar argument for other events associated with $X_y^t$ and
$x_y^u$.

Hence, we can apply the tail-bound for pairwise independent variables of
Proposition 3.28:
\begin{displaymath}
    P\leq \frac{1}{q(\eta - (1/2-\eps))^2}
\end{displaymath}
Now it is easy to see that for $\eta$ large enough such that $P\leq
\frac{1}{3}$, it is possible to choose $d = \gamma q$ with $\gamma$ small
enough such that we have both $\eta\leq 1- 2\sqrt{\frac{2d}{q}}$ and $\eta\leq
1-\frac{2d}{q}$ as required above.

\section*{Problem 7.8}

\paragraph{1.}
Let us consider an $\cl{RP}$ algorithm $A$ for language $L$
running in time $n^c$ for inputs of bit-length $n$ and denote by $A(x, r)$ the
output of $A$ on input $x$ and random bits $r$. Since $A$ runs in time $n^c$ we
can assume $r \leq n^c$.  By definition of $\cl{RP}$, for fixed $x$, $T_x(\cdot)
= A(x, \cdot)$ is of size at most $n^c$ such that:
\begin{itemize}
    \item if $x\in L$, $T_x$ accepts on a fraction at least $\frac{1}{2}$ of the
        strings of length $n^c$
    \item if $x\notin L$, $T_x$ accepts none of the strings of length $n^c$
\end{itemize}
A deterministic algorithm for $L$ works as follows:
\begin{itemize}
    \item  construct a $(n^c,\frac{1}{2})$ hitting set $H$ and evaluate $T_x$ on each
        element of $H$
    \item if $T_x$ accepts one the element, output “$x\in L$”
    \item otherwise output “$x\notin L$”
\end{itemize}
the correctness of the algorithm follows directly from the definition of
a hitting set and the property of $T_x$ described above.

The running time is $s(n^c)$ to construct $H$ and $n^c s(n^c)$ to evaluate $T$
on all the elements in $H$ ($H$ has at most $s(n^c)$ elements since it is
constructed in time $s(n^c)$).

\paragraph{2.} The hitting set $H_m$ is simply $\{G_m(u)\,|\, u\in\{0,1\}^d\}$.
The time to construct $H_m$ is $2^d s$ as required. Now to see that it is
a hitting set:

Assume that it is not, that is, there is a circuit $T$ of size $t$ which accepts
$>\eps$ fraction of the $2^m$ strings in $\{0,1\}^m$ but which accepts none of
the string in $H_m$, then:
\begin{displaymath}
    |\P[T(G(U_d)) = 1] - \P[T(U_m) = 1]|> \eps
\end{displaymath}
that is, $T$ has advantage greater than $\eps$ which contradicts that $G_m$ is
a pseudorandom generator.

\paragraph{4.}

\paragraph{Definition}

I say that $G$ is a $(t, k,\eps)$ black box construction of
hitting set generators if for every oracle $f:[n]\to\{0,1\}$, $G^f:[D]\to[M]$
is a deterministic algorithm and if there exists a randomized oracle algorithm
$Red$ running in time $t$ such that for every $f:[n]\to\{0,1\}$ and
$T:[M]\to\{0,1\}$ such that $T$ accepts less than $1-\eps$ fraction of elements
in $[M]$ if:
\begin{displaymath}
    \forall x\in [D], T(G^f(x)) = 1
\end{displaymath}
then there exists an advice $z\in [K]$ such that:
\begin{displaymath}
    \forall x\in[n],\quad \P[Red^T(x, z) = f(x)] \geq \frac{2}{3}
\end{displaymath}

\paragraph{Comment} Note that I have reversed the definition of $T$ in the
definition of the hitting set generator (by taking the negation of the
algorithm $T$). This is to make the connection with dispersers clearer. Then the
definition simply says that $T$ should reject at least one element in the image
of $G^f$. If it doesn't then it means that we can “compute” the hard function
$f$.

\paragraph{Dispersers} I will show the connection with dispersers via the list
decoding  framework. It is immediate to see that the list decoding definition
of a disperser is the following: $\Gamma:[N]\times [D]\to [M]$ is a $(k, \eps)$
disperser iff:
\begin{displaymath}
    \forall T\subseteq [M],\; |T|< (1-\eps)M\Rightarrow |\List_{\Gamma}(T, 1)|<K
\end{displaymath}

\paragraph{Equivalence} For a black box construction $G$, define $\Gamma(f, y)
\eqdef G^f(y)$ where on the left-hand side if see $f$ as a string of $n$ bits
(i.e. $N = 2^n$). Then $G$ is a $(\infty, k,\eps)$ black-box construction iff
$\Gamma$ is a $(k, \eps)$ disperser.

\begin{proof}

    $\Rightarrow$ Suppose $G$ is a $(\infty, k,\eps)$ black box
    construction for hitting set generators. Consider $f\in\List_{\Gamma}(T,
    1)$ for some $T\subseteq[M]$ of size smaller than $(1-\eps)M$. Interpreting
    $T$ as the truth table of an algorithm $T$, then $f\in\List_{\Gamma}(T, 1)$
    iff:
    \begin{displaymath}
        \forall x\in [D], \Gamma(f, x)\in T
    \end{displaymath}
    or in other words $T(G^f(x)) = 1, \forall x\in[D]$. But by the black-box
    definition, this implies the existence of $z\in [K]$ such that
    $Red^T(\cdot, z)$ computes $f$ everywhere. Hence the number of different
    possible $f$ is bounded by $K$ which shows that $\Gamma$ satisfies the list
    decoding property of a disperser.

    $\Leftarrow$ Suppose that $\Gamma$ is a disperser, then for a $T$ such that
    $|T|<(1-\eps)M$  we have $|\List_{\Gamma}(T, 1)|< K$. Denoting
    $f_1,\dots,f_K$ the elements in the list,  I define $Red^T(x, z) = f_z(x)$
    for $z\in [K]$. Then if $G^f(x) = \Gamma(f, x)\in T$ for all $x$, it means
    that $f$ is in the list, in which case the advice is simply the index of
    $f$ in the list, showing that $G^f$ is a hitting set generator.
\end{proof}
\end{document}