summaryrefslogtreecommitdiffstats
path: root/final/main.tex
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2015-05-16 17:42:52 +0200
committerThibaut Horel <thibaut.horel@gmail.com>2015-05-16 17:42:52 +0200
commit4ab5229c9e164b39e3e7563c5762e1ef0c68054d (patch)
treea65d858107fbe1d68aa9cee7f2aa3429f78cc68c /final/main.tex
parentb826034077c581658a6308b6ebceacb292a5ea9b (diff)
downloadcs225-master.tar.gz
Adding final solutionsHEADmaster
Diffstat (limited to 'final/main.tex')
-rw-r--r--final/main.tex361
1 files changed, 361 insertions, 0 deletions
diff --git a/final/main.tex b/final/main.tex
new file mode 100644
index 0000000..fd7c343
--- /dev/null
+++ b/final/main.tex
@@ -0,0 +1,361 @@
+\documentclass[10pt]{article}
+\usepackage{fullpage}
+\usepackage[utf8x]{inputenc}
+\usepackage{amsmath,amsfonts,amsthm}
+\usepackage[english]{babel}
+\usepackage[capitalize, noabbrev]{cleveref}
+\usepackage{algorithm}
+\usepackage{algpseudocode}
+
+\newenvironment{enumerate*}%
+ {\vspace{-2ex} \begin{enumerate} %
+ \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
+ {\end{enumerate}}
+
+\newenvironment{itemize*}%
+ {\vspace{-2ex} \begin{itemize} %
+ \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
+ {\end{itemize}}
+
+\newenvironment{description*}%
+ {\vspace{-2ex} \begin{description} %
+ \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
+ {\end{description}}
+
+
+\DeclareMathOperator{\E}{\mathbb{E}}
+\DeclareMathOperator{\Enc}{\mathrm{Enc}}
+\DeclareMathOperator{\Dec}{\mathrm{Dec}}
+\DeclareMathOperator{\Ext}{\mathrm{Ext}}
+\DeclareMathOperator{\List}{\mathrm{LIST}}
+\let\P\relax
+\DeclareMathOperator{\P}{\mathbb{P}}
+\DeclareMathOperator*{\argmin}{\arg\min}
+\newcommand{\ex}[1]{\E\left[#1\right]}
+\newcommand{\prob}[1]{\P\left[#1\right]}
+\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
+\newcommand{\R}{\mathbf{R}}
+\newcommand{\F}{\mathbb{F}}
+\newcommand{\poly}{\text{poly}}
+\newcommand{\N}{\mathbf{N}}
+\newcommand{\C}{\mathcal{C}}
+\newcommand{\eps}{\varepsilon}
+\newcommand{\cl}[1]{\text{\textbf{#1}}}
+\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}}
+\newcommand{\llbracket}{[\![}
+
+\newtheorem{theorem}{Theorem}
+\newtheorem{lemma}{Lemma}
+\algrenewcommand\algorithmicensure{\textbf{Output:}}
+\algrenewcommand\algorithmicrequire{\textbf{Input:}}
+
+\author{Thibaut Horel}
+\title{CS 225 Take-Home Final -- Solutions}
+
+\begin{document}
+\maketitle
+
+\section*{Problem 2.11}
+
+\paragraph{2.} At a high level, we will use that $\cl{prBPP}=\cl{prP}$ to guess
+a good sequence of random bits for algorithm $A$. The good sequence of random
+bits will be guessed bit by bit by formulating at each step a promise problem
+whose instances are pairs $(x, r)$ where $r$ is the prefix of the random
+sequence constructed so far (as suggested by the hint).
+
+Let us denote $A(x, r)$ the output of algorithm $A$ for input $x$ and random
+bits $r$, $r\in\{0,1\}^{q(n)}$ where $q(n)$ is some polynomial in $n$. The
+guarantee I have about $A$ is:
+\begin{displaymath}
+ \P_r[V(x, A(x, r)) = 1]\geq \frac{2}{3}
+\end{displaymath}
+
+I will inductively construct a sequence $r_k$ for $1\leq k\leq q(n)$ such that:
+\begin{equation}
+ \label{eq:foo}
+ \P_{t_k}[V(x, A(x, r_k\cdot t_k)) = 1] \geq \frac{2}{3}
+\end{equation}
+where $t_k$ is uniformly distributed over $\{0,1\}^{q(n) - k}$. In particular
+for $k=q(n)$ there will be no random choices left and I will have $V(x, A(x,
+r_{q(n)})) = 1$.
+
+Let us assume that $r_k$ has been constructed for a given $k$. Then I define
+the following promise problem:
+\begin{displaymath}
+ \Pi_Y = \left\{ (x, r_k)\,|\, \P_{t_k}[V(x, A(x, r_k\cdot 0\cdot t_k)) = 1] \geq
+ \frac{2}{3}\right\}
+\end{displaymath}
+\begin{displaymath}
+ \Pi_N = \left\{ (x, r_k)\,|\, \P_{t_k}[V(x, A(x, r_k\cdot 0\cdot t_k)) = 0] \geq
+ \frac{2}{3}\right\}
+\end{displaymath}
+where $t_k$ denotes a uniform random variable over $\{0,1\}^{q(n) - k -1}$.
+Clearly, $(\Pi_Y, \Pi_N)$ is a $\cl{prBPP}$ problem: the classes are disjoint,
+and given $(x, r_k)$ a $\cl{prBPP}$ algorithm simply chooses $t_k$ uniformly at
+random and outputs “accept” or “reject” depending on whether or not $V(x, A(x,
+r_k\cdot 0\cdot t_k)) = 1$ (this is poly time since $A$ and $V$ are poly time).
+
+Since $\cl{prBPP} = \cl{prP}$, I can decide in deterministic polynomial time
+whether or not a given $(x, r_k)$ belongs to $\Pi_Y$ or $\Pi_N$. For the $r_k$
+constructed inductively at step $k$ and satisfying \eqref{eq:foo}:
+\begin{itemize}
+ \item either $(x, r_k)\in \Pi_Y$, in which case I define $r_{k+1}
+ = r_k\cdot 0$. By definition of $\Pi_Y$, $r_{k+1}$ satisfies the
+ inductive property \eqref{eq:foo} for $k+1$.
+ \item or $(x, r_k)\in \Pi_N$. Then I define $r_{k+1} = r_k \cdot 1$. Since
+ $r_k$ satisfies \eqref{eq:foo} and $(x, r_k)\notin \Pi_Y$, a simple
+ conditioning argument (on the first bit of $t_k$ in \eqref{eq:foo})
+ shows that $r_{k+1}$ satisfies \eqref{eq:foo} at step $k+1$.
+\end{itemize}
+I have just shown how to construct $r_k$ inductively in deterministic
+polynomial time. Since there are $q(n)$ steps, the overall running time to
+construct $r_{q(n)}$ is still polynomial. As noted above, once $r_{q(n)}$ is
+constructed, the (deterministic) algorithm for the NP search problem simply
+outputs $A(x, r_{q(n)})$.
+
+\paragraph{4.}First, if $\cl{prBPP} = \cl{prP}$, then in particular $\cl{BPP}
+= \cl{P}$. In particular since \textsc{Primality} is in $\cl{BPP}$ I will
+henceforth assume that I have a deterministic polynomial algorithm for
+\textsc{Primality} (alternatively we now know that \textsc{Primality} is in
+$\cl{P}$ since AKS).
+
+I define the following NP search problem. Given input $x\in\{0, 1\}^n$ a valid
+solution is $y\in\{0, 1\}^{n+1}$ such that $y$ is a prime number in the
+interval $[x, 2x)$. Since \textsc{Primality} is in $\cl{P}$, there is
+ a deterministic polynomial time verifier $V(x, y)$ for this NP search
+ problem: given input $(x, y)$, output 1 if $y$ is prime and $x\leq y< 2x$,
+ 0 otherwise.
+
+Using the prime number theorem, I claim that this NP search problem can be
+solved in probabilistic polynomial time. Let us denote by $\rho(x)$ the number
+of prime numbers in the interval $[x, 2x)$:
+\begin{displaymath}
+ \rho(x) = \pi(2x) - \pi(x) = \frac{2x}{\log 2x} + o\left(\frac{x}{\log
+ 2x}\right) - \frac{x}{\log x} + o\left(\frac{x}{\log x}\right)\sim
+ \frac{x}{\log x}
+\end{displaymath}
+As a consequence, for $x$ large enough, the number of prime numbers in the
+interval $[x, 2x)$ is larger than $\frac{2}{3}\frac{x}{\log x}.$\footnote{This
+ result could be obtained with something much weaker and easier to prove
+ than the prime number theorem: as soon as we have sufficiently good
+ bounds on $\pi(x)$ we can obtain a sufficiently good bound on the number of
+ prime numbers in $[x, 2x)$. Proving reasonably good bounds on $\pi(x)$ is
+ not too hard.} The probabilistic algorithm for the NP search problem now
+ works as follows: pick a random number in $[x, 2x)$ and output it. By the
+ computation performed above, for $x$ large enough, the probability of
+ success of this algorithm is $\frac{2}{3}\frac{1}{\log x}$. By
+ repeating this algorithm $\log x = n$ times and using a union bound, we
+ amplify the success probability to $\frac{2}{3}$. The running time will
+ be polynomial in $n$ (which is the bit length of $x$).
+
+I now use part \textbf{2.} to derandomize this probabilistic algorithm for the
+NP search problem and obtain a deterministic polynomial time algorithm that
+finds a number in $[N, 2N)$ for $N$ large enough.
+
+\section*{Problem 6.4}
+
+\paragraph{1.} Let us assume that $\Ext$ is a $(k, \eps)$ Rényi extractor, and
+consider a $k$-source $X$. Since $H_{\infty}(X) \geq k$, then in particular
+$H_2(X)\geq k$. By Rényi extraction property, $H_2(\Ext(X, U_d))\geq m-\eps$.
+We want to use this to bound $\Delta(\Ext(X, U_d), U_m)$:
+\begin{align*}
+ \Delta(\Ext(X, U_d), U_m)^2 &= \frac{1}{4}\|\Ext(X, U_d) - U_m\|_1^2
+ \leq \frac{m}{4}\|\Ext(X, U_d) - U_m\|_2^2\leq
+ \frac{m}{4}\left(\|\Ext(X, U_d)\|_2^2 - \frac{1}{m}\right)\\
+ &\leq \frac{m}{4}\left(\frac{1}{1+m-\eps} - \frac{1}{m}\right)\leq \eps
+\end{align*}
+where we used basic algebra and properties of the $\ell_2$ and $\ell_1$ norms.
+The penultimate inequality used:
+\begin{displaymath}
+ \|\Ext(X, U_d)\|_2^2 = 2^{-H_2(\Ext(X, U_d))}\leq \frac{1}{1+m-\eps}
+\end{displaymath}
+because of the Rényi extraction property and $2^{-x}\leq \frac{1}{1+x}$.
+
+We obtained $\Delta(\Ext(X, U_d), U_m)\leq \sqrt{\eps}$ for an arbitrary
+$k$-source $X$, which is exactly the definition of a $(k,\sqrt{\eps})$
+extractor.
+
+\paragraph{2.}We know that there exists an almost pairwise independent family
+with $d = O(m+\log\frac{n}{\eps})$ random bits. A proof similar to the one of
+Theorem 6.18 shows that by defining $\Ext(x, h) = (h, h(x))$, then if $X$ has
+Rényi entropy at last $k$, then:
+\begin{displaymath}
+ CP(H, H(X))\leq \frac{1}{D}\left(\frac{1}{K}+\frac{1+\eps}{M}\right)
+\end{displaymath}
+$\cdots$
+\paragraph{3.}$\cdots$
+
+\section*{Problem 7.7}
+
+I will focus on obtaining a $(1/2-\eps)$ local correcting algorithm running in
+time $\poly(m, q)$. This is sufficient by Lemma 7.39 and the systematic
+encoding of Reed-Muller codes of Lemma 7.48.
+
+Following a question made by Alex in class and answered by Salil, I will follow
+the same approach as Algorithm 7.45 but use a degree 2 curve instead of a line.
+Given an input $x\in \F^m$ and oracle $g:\F^m\to\F$ which agrees with some
+polynomial $p$ of degree $d$ on a fraction $1/2-\eps$ of the points:
+\begin{enumerate}
+ \item choose $y = (y_1, y_2)\in (\F^m)^2$ uniformly at random and define
+ the curve $C_y = \{x + t y_1 +t^2 y_2\,|\, t\in \F\}$. We will write
+ $c_y(t) = x+ty_1+t^2y_2$ such that $C_y = \{c_y(t)\,|\,t\in \F\}$.
+ \item query $g$ on the $q$ points of $C_y$ to obtain $g_y:\F\to\F$
+ \item run the $q$-ary Reed-Solomon list-decoding algorithm with parameter
+ $\eta$ (the setting of the parameters is explained below) on $g_y$
+ to obtain a polynomial $q$
+ \item output $q(0)$
+\end{enumerate}
+
+First note, $g_y$ is a univariate polynomial on $\F$ of degree $2d$. In step
+3., we need to make sure that the parameter $\eta$ with which we
+ call the Reed-Solomon list-decoding algorithm is such that:
+\begin{itemize}
+ \item $\eta\leq 1-2\sqrt{\frac{2d}{q}}$ so that Reed-Solomon list-decoding
+ is possible
+ \item $\eta \leq 1- \frac{2d}{q}$, half the minimum distance of the Reed-Solomon
+ code of degree $2d$, such that there is a unique polynomial in the
+ decoded list.
+\item with probability at most $\frac{1}{3}$, $g_y$ disagrees
+ with $p_y$ on a fraction more than $\eta$ of the points in $\F$, such
+ that the unique polynomial in the decoded list is $p_y$ with
+ probability at least $\frac{2}{3}$.
+\end{itemize}
+
+To see that finding such an $\eta$ is possible we need to understand the
+quantity $P\eqdef\P_y[d(g_y, p_y)\geq \eta]$. We write:
+\begin{displaymath}
+ P = \P_y\left[\frac{1}{q}\sum_{t\in \F} X_y^t\geq \eta\right]
+\end{displaymath}
+where we defined $X_y^t = I[g(c_y(t)) \neq p(c_y(t))]$ with $I[E]$ the
+indicator variable of event $E$. It is easy to see that for a fixed $t$ and
+a random choice of $y$, $c_y(t)$ is uniformly distributed in $\F$, so that
+$X_y^t$ is a Bernouilli variable of parameter $\frac{1}{2}-\eps$.
+
+Furthermore, I claim that $(X^t_y)_{t\in\F}$ is a pairwise independent family.
+Indeed for $t\neq u$:
+\begin{displaymath}
+ \P_y[X^t_y = 1\text{ and }X^u_y = 1]
+ = \sum_{(z_1,z_2)\in \F^2} \P_y[c(t) = z_1\text{ and } c(u) = z_2]
+ I[g(z_1)\neq p(z_1)]I[g(z_2)\neq p(z_2)] = (1/2-\eps)^2
+\end{displaymath}
+This is because there are exactly $q^2(1/2-\eps)^2$ pairs $z_1, z_2$ such that
+$I[g(z_1)\neq p(z_1)]I[g(z_2)\neq p(z_2)]$. And by a standard polynomial
+interpolation argument, for any pair $z_1,z_2$:
+\begin{displaymath}
+ \P_y[c(t) = z_1\text{ and } c(u) = z_2] =\frac{1}{q^2}
+\end{displaymath}
+We could have a similar argument for other events associated with $X_y^t$ and
+$x_y^u$.
+
+Hence, we can apply the tail-bound for pairwise independent variables of
+Proposition 3.28:
+\begin{displaymath}
+ P\leq \frac{1}{q(\eta - (1/2-\eps))^2}
+\end{displaymath}
+Now it is easy to see that for $\eta$ large enough such that $P\leq
+\frac{1}{3}$, it is possible to choose $d = \gamma q$ with $\gamma$ small
+enough such that we have both $\eta\leq 1- 2\sqrt{\frac{2d}{q}}$ and $\eta\leq
+1-\frac{2d}{q}$ as required above.
+
+\section*{Problem 7.8}
+
+\paragraph{1.}
+Let us consider an $\cl{RP}$ algorithm $A$ for language $L$
+running in time $n^c$ for inputs of bit-length $n$ and denote by $A(x, r)$ the
+output of $A$ on input $x$ and random bits $r$. Since $A$ runs in time $n^c$ we
+can assume $r \leq n^c$. By definition of $\cl{RP}$, for fixed $x$, $T_x(\cdot)
+= A(x, \cdot)$ is of size at most $n^c$ such that:
+\begin{itemize}
+ \item if $x\in L$, $T_x$ accepts on a fraction at least $\frac{1}{2}$ of the
+ strings of length $n^c$
+ \item if $x\notin L$, $T_x$ accepts none of the strings of length $n^c$
+\end{itemize}
+A deterministic algorithm for $L$ works as follows:
+\begin{itemize}
+ \item construct a $(n^c,\frac{1}{2})$ hitting set $H$ and evaluate $T_x$ on each
+ element of $H$
+ \item if $T_x$ accepts one the element, output “$x\in L$”
+ \item otherwise output “$x\notin L$”
+\end{itemize}
+the correctness of the algorithm follows directly from the definition of
+a hitting set and the property of $T_x$ described above.
+
+The running time is $s(n^c)$ to construct $H$ and $n^c s(n^c)$ to evaluate $T$
+on all the elements in $H$ ($H$ has at most $s(n^c)$ elements since it is
+constructed in time $s(n^c)$).
+
+\paragraph{2.} The hitting set $H_m$ is simply $\{G_m(u)\,|\, u\in\{0,1\}^d\}$.
+The time to construct $H_m$ is $2^d s$ as required. Now to see that it is
+a hitting set:
+
+Assume that it is not, that is, there is a circuit $T$ of size $t$ which accepts
+$>\eps$ fraction of the $2^m$ strings in $\{0,1\}^m$ but which accepts none of
+the string in $H_m$, then:
+\begin{displaymath}
+ |\P[T(G(U_d)) = 1] - \P[T(U_m) = 1]|> \eps
+\end{displaymath}
+that is, $T$ has advantage greater than $\eps$ which contradicts that $G_m$ is
+a pseudorandom generator.
+
+\paragraph{4.}
+
+\paragraph{Definition}
+
+I say that $G$ is a $(t, k,\eps)$ black box construction of
+hitting set generators if for every oracle $f:[n]\to\{0,1\}$, $G^f:[D]\to[M]$
+is a deterministic algorithm and if there exists a randomized oracle algorithm
+$Red$ running in time $t$ such that for every $f:[n]\to\{0,1\}$ and
+$T:[M]\to\{0,1\}$ such that $T$ accepts less than $1-\eps$ fraction of elements
+in $[M]$ if:
+\begin{displaymath}
+ \forall x\in [D], T(G^f(x)) = 1
+\end{displaymath}
+then there exists an advice $z\in [K]$ such that:
+\begin{displaymath}
+ \forall x\in[n],\quad \P[Red^T(x, z) = f(x)] \geq \frac{2}{3}
+\end{displaymath}
+
+\paragraph{Comment} Note that I have reversed the definition of $T$ in the
+definition of the hitting set generator (by taking the negation of the
+algorithm $T$). This is to make the connection with dispersers clearer. Then the
+definition simply says that $T$ should reject at least one element in the image
+of $G^f$. If it doesn't then it means that we can “compute” the hard function
+$f$.
+
+\paragraph{Dispersers} I will show the connection with dispersers via the list
+decoding framework. It is immediate to see that the list decoding definition
+of a disperser is the following: $\Gamma:[N]\times [D]\to [M]$ is a $(k, \eps)$
+disperser iff:
+\begin{displaymath}
+ \forall T\subseteq [M],\; |T|< (1-\eps)M\Rightarrow |\List_{\Gamma}(T, 1)|<K
+\end{displaymath}
+
+\paragraph{Equivalence} For a black box construction $G$, define $\Gamma(f, y)
+\eqdef G^f(y)$ where on the left-hand side if see $f$ as a string of $n$ bits
+(i.e. $N = 2^n$). Then $G$ is a $(\infty, k,\eps)$ black-box construction iff
+$\Gamma$ is a $(k, \eps)$ disperser.
+
+\begin{proof}
+
+ $\Rightarrow$ Suppose $G$ is a $(\infty, k,\eps)$ black box
+ construction for hitting set generators. Consider $f\in\List_{\Gamma}(T,
+ 1)$ for some $T\subseteq[M]$ of size smaller than $(1-\eps)M$. Interpreting
+ $T$ as the truth table of an algorithm $T$, then $f\in\List_{\Gamma}(T, 1)$
+ iff:
+ \begin{displaymath}
+ \forall x\in [D], \Gamma(f, x)\in T
+ \end{displaymath}
+ or in other words $T(G^f(x)) = 1, \forall x\in[D]$. But by the black-box
+ definition, this implies the existence of $z\in [K]$ such that
+ $Red^T(\cdot, z)$ computes $f$ everywhere. Hence the number of different
+ possible $f$ is bounded by $K$ which shows that $\Gamma$ satisfies the list
+ decoding property of a disperser.
+
+ $\Leftarrow$ Suppose that $\Gamma$ is a disperser, then for a $T$ such that
+ $|T|<(1-\eps)M$ we have $|\List_{\Gamma}(T, 1)|< K$. Denoting
+ $f_1,\dots,f_K$ the elements in the list, I define $Red^T(x, z) = f_z(x)$
+ for $z\in [K]$. Then if $G^f(x) = \Gamma(f, x)\in T$ for all $x$, it means
+ that $f$ is in the list, in which case the advice is simply the index of
+ $f$ in the list, showing that $G^f$ is a hitting set generator.
+\end{proof}
+\end{document}