final/main.tex


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361

\documentclass[10pt]{article}
\usepackage{fullpage}
\usepackage[utf8x]{inputenc}
\usepackage{amsmath,amsfonts,amsthm}
\usepackage[english]{babel}
\usepackage[capitalize, noabbrev]{cleveref}
\usepackage{algorithm}
\usepackage{algpseudocode}

\newenvironment{enumerate*}%
  {\vspace{-2ex} \begin{enumerate} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{enumerate}}
 
\newenvironment{itemize*}%
  {\vspace{-2ex} \begin{itemize} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{itemize}}
 
\newenvironment{description*}%
  {\vspace{-2ex} \begin{description} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{description}}


\DeclareMathOperator{\E}{\mathbb{E}}
\DeclareMathOperator{\Enc}{\mathrm{Enc}}
\DeclareMathOperator{\Dec}{\mathrm{Dec}}
\DeclareMathOperator{\Ext}{\mathrm{Ext}}
\DeclareMathOperator{\List}{\mathrm{LIST}}
\let\P\relax
\DeclareMathOperator{\P}{\mathbb{P}}
\DeclareMathOperator*{\argmin}{\arg\min}
\newcommand{\ex}[1]{\E\left[#1\right]}
\newcommand{\prob}[1]{\P\left[#1\right]}
\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
\newcommand{\R}{\mathbf{R}}
\newcommand{\F}{\mathbb{F}}
\newcommand{\poly}{\text{poly}}
\newcommand{\N}{\mathbf{N}}
\newcommand{\C}{\mathcal{C}}
\newcommand{\eps}{\varepsilon}
\newcommand{\cl}[1]{\text{\textbf{#1}}}
\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}}
\newcommand{\llbracket}{[\![}

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\algrenewcommand\algorithmicensure{\textbf{Output:}}
\algrenewcommand\algorithmicrequire{\textbf{Input:}}

\author{Thibaut Horel}
\title{CS 225 Take-Home Final -- Solutions}

\begin{document}
\maketitle

\section*{Problem 2.11}

\paragraph{2.} At a high level, we will use that $\cl{prBPP}=\cl{prP}$ to guess
a good sequence of random bits for algorithm $A$. The good sequence of random
bits will be guessed bit by bit by formulating at each step a promise problem
whose instances are pairs $(x, r)$ where $r$ is the prefix of the random
sequence constructed so far (as suggested by the hint).

Let us denote $A(x, r)$ the output of algorithm $A$ for input $x$ and random
bits $r$, $r\in\{0,1\}^{q(n)}$ where $q(n)$ is some polynomial in $n$. The
guarantee I have about $A$ is:
\begin{displaymath}
    \P_r[V(x, A(x, r)) = 1]\geq \frac{2}{3}
\end{displaymath}

I will inductively construct a sequence $r_k$ for $1\leq k\leq q(n)$ such that:
\begin{equation}
    \label{eq:foo}
    \P_{t_k}[V(x, A(x, r_k\cdot t_k)) = 1] \geq \frac{2}{3}
\end{equation}
where $t_k$ is uniformly distributed over $\{0,1\}^{q(n) - k}$. In particular
for $k=q(n)$ there will be no random choices left and I will have $V(x, A(x,
r_{q(n)})) = 1$.

Let us assume that $r_k$ has been constructed for a given $k$. Then I define
the following promise problem:
\begin{displaymath}
    \Pi_Y = \left\{ (x, r_k)\,|\, \P_{t_k}[V(x, A(x, r_k\cdot 0\cdot t_k)) = 1] \geq
    \frac{2}{3}\right\}
\end{displaymath}
\begin{displaymath}
    \Pi_N = \left\{ (x, r_k)\,|\, \P_{t_k}[V(x, A(x, r_k\cdot 0\cdot t_k)) = 0] \geq
    \frac{2}{3}\right\}
\end{displaymath}
where $t_k$ denotes a uniform random variable over $\{0,1\}^{q(n) - k -1}$.
Clearly, $(\Pi_Y, \Pi_N)$ is a $\cl{prBPP}$ problem: the classes are disjoint,
and given $(x, r_k)$ a $\cl{prBPP}$ algorithm simply chooses $t_k$ uniformly at
random and outputs “accept” or “reject” depending on whether or not $V(x, A(x,
r_k\cdot 0\cdot t_k)) = 1$ (this is poly time since $A$ and $V$ are poly time).

Since $\cl{prBPP} = \cl{prP}$, I can decide in deterministic polynomial time
whether or not a given $(x, r_k)$ belongs to $\Pi_Y$ or $\Pi_N$. For the $r_k$
constructed inductively at step $k$ and satisfying \eqref{eq:foo}:
\begin{itemize}
    \item either $(x, r_k)\in \Pi_Y$, in which case I define $r_{k+1}
        = r_k\cdot 0$. By definition of $\Pi_Y$, $r_{k+1}$ satisfies the
        inductive property \eqref{eq:foo} for $k+1$.
    \item or $(x, r_k)\in \Pi_N$. Then I define $r_{k+1} = r_k \cdot 1$. Since
        $r_k$ satisfies \eqref{eq:foo} and $(x, r_k)\notin \Pi_Y$, a simple
        conditioning argument (on the first bit of $t_k$ in \eqref{eq:foo})
        shows that $r_{k+1}$ satisfies \eqref{eq:foo} at step $k+1$.
\end{itemize}
I have just shown how to construct $r_k$ inductively in deterministic
polynomial time. Since there are $q(n)$ steps, the overall running time to
construct $r_{q(n)}$ is still polynomial. As noted above, once $r_{q(n)}$ is
constructed, the (deterministic) algorithm for the NP search problem simply
outputs $A(x, r_{q(n)})$.

\paragraph{4.}First, if $\cl{prBPP} = \cl{prP}$, then in particular $\cl{BPP}
= \cl{P}$. In particular since \textsc{Primality} is in $\cl{BPP}$ I will
henceforth assume that I have a deterministic polynomial algorithm for
\textsc{Primality} (alternatively we now know that \textsc{Primality} is in
$\cl{P}$ since AKS).

I define the following NP search problem. Given input $x\in\{0, 1\}^n$ a valid
solution is $y\in\{0, 1\}^{n+1}$ such that $y$ is a prime number in the
interval $[x, 2x)$. Since \textsc{Primality} is in $\cl{P}$, there is
    a deterministic polynomial time verifier $V(x, y)$ for this NP search
    problem: given input $(x, y)$, output 1 if $y$ is prime and $x\leq y< 2x$,
    0 otherwise.

Using the prime number theorem, I claim that this NP search problem can be
solved in probabilistic polynomial time. Let us denote by $\rho(x)$ the number
of prime numbers in the interval $[x, 2x)$:
\begin{displaymath}
    \rho(x) = \pi(2x) - \pi(x) = \frac{2x}{\log 2x} + o\left(\frac{x}{\log
    2x}\right)  - \frac{x}{\log x} + o\left(\frac{x}{\log x}\right)\sim
    \frac{x}{\log x}
\end{displaymath}
As a consequence, for $x$ large enough, the number of prime numbers in the
interval $[x, 2x)$ is larger than $\frac{2}{3}\frac{x}{\log x}.$\footnote{This
        result could be obtained with something much weaker and easier to prove
        than the prime number theorem: as soon as we have sufficiently good
    bounds on $\pi(x)$ we can obtain a sufficiently good bound on the number of
    prime numbers in $[x, 2x)$. Proving reasonably good bounds on $\pi(x)$ is
    not too hard.} The probabilistic algorithm for the NP search problem now
    works as follows: pick a random number in $[x, 2x)$ and output it. By the
        computation performed above, for $x$ large enough, the probability of
        success of this algorithm is $\frac{2}{3}\frac{1}{\log x}$. By
        repeating this algorithm $\log x = n$ times and using a union bound, we
        amplify the success probability to $\frac{2}{3}$. The running time will
        be polynomial in $n$ (which is the bit length of $x$).

I now use part \textbf{2.} to derandomize this probabilistic algorithm for the
NP search problem and obtain a deterministic polynomial time algorithm that
finds a number in $[N, 2N)$ for $N$ large enough.

\section*{Problem 6.4}

\paragraph{1.} Let us assume that $\Ext$ is a $(k, \eps)$ Rényi extractor, and
consider a $k$-source $X$. Since $H_{\infty}(X) \geq k$, then in particular
$H_2(X)\geq k$. By Rényi extraction property, $H_2(\Ext(X, U_d))\geq m-\eps$.
We want to use this to bound $\Delta(\Ext(X, U_d), U_m)$:
\begin{align*}
    \Delta(\Ext(X, U_d), U_m)^2 &= \frac{1}{4}\|\Ext(X, U_d) - U_m\|_1^2
    \leq \frac{m}{4}\|\Ext(X, U_d) - U_m\|_2^2\leq
    \frac{m}{4}\left(\|\Ext(X, U_d)\|_2^2 - \frac{1}{m}\right)\\
    &\leq \frac{m}{4}\left(\frac{1}{1+m-\eps} - \frac{1}{m}\right)\leq \eps
\end{align*}
where we used basic algebra and properties of the $\ell_2$ and $\ell_1$ norms.
The penultimate inequality used:
\begin{displaymath}
    \|\Ext(X, U_d)\|_2^2 = 2^{-H_2(\Ext(X, U_d))}\leq \frac{1}{1+m-\eps}
\end{displaymath}
because of the Rényi extraction property and $2^{-x}\leq \frac{1}{1+x}$.

We obtained $\Delta(\Ext(X, U_d), U_m)\leq \sqrt{\eps}$ for an arbitrary
$k$-source $X$, which is exactly the definition of a $(k,\sqrt{\eps})$
extractor.

\paragraph{2.}We know that there exists an almost pairwise independent family
with $d = O(m+\log\frac{n}{\eps})$ random bits. A proof similar to the one of
Theorem 6.18 shows that by defining $\Ext(x, h) = (h, h(x))$, then if $X$ has
Rényi entropy at last $k$, then:
\begin{displaymath}
    CP(H, H(X))\leq \frac{1}{D}\left(\frac{1}{K}+\frac{1+\eps}{M}\right)
\end{displaymath}
$\cdots$
\paragraph{3.}$\cdots$

\section*{Problem 7.7}

I will focus on obtaining a $(1/2-\eps)$ local correcting algorithm running in
time $\poly(m, q)$. This is sufficient by Lemma 7.39 and the systematic
encoding of Reed-Muller codes of Lemma 7.48.

Following a question made by Alex in class and answered by Salil, I will follow
the same approach as Algorithm 7.45 but use a degree 2 curve instead of a line.
Given an input $x\in \F^m$ and oracle $g:\F^m\to\F$ which agrees with some
polynomial $p$ of degree $d$ on a fraction $1/2-\eps$ of the points:
\begin{enumerate}
    \item choose $y = (y_1, y_2)\in (\F^m)^2$ uniformly at random and define
        the curve $C_y = \{x + t y_1 +t^2 y_2\,|\, t\in \F\}$. We will write
        $c_y(t) = x+ty_1+t^2y_2$ such that $C_y = \{c_y(t)\,|\,t\in \F\}$.
    \item query $g$ on the $q$ points of $C_y$ to obtain $g_y:\F\to\F$
    \item run the $q$-ary Reed-Solomon list-decoding algorithm with parameter
        $\eta$ (the setting of the parameters is explained below) on $g_y$
        to obtain a polynomial $q$
    \item output $q(0)$
\end{enumerate}

First note, $g_y$ is a univariate polynomial on $\F$ of degree $2d$. In step
3., we need to make sure that the parameter $\eta$ with which we
  call the Reed-Solomon list-decoding algorithm is such that:
\begin{itemize}
    \item $\eta\leq 1-2\sqrt{\frac{2d}{q}}$ so that Reed-Solomon list-decoding
    is possible
    \item $\eta \leq 1- \frac{2d}{q}$, half the minimum distance of the Reed-Solomon
        code of degree $2d$, such that there is a unique polynomial in the
        decoded list.
\item with probability at most $\frac{1}{3}$, $g_y$ disagrees
        with $p_y$ on a fraction more than $\eta$ of the points in $\F$, such
        that the unique polynomial in the decoded list is $p_y$ with
        probability at least $\frac{2}{3}$.
\end{itemize}

To see that finding such an $\eta$ is possible we need to understand the
quantity $P\eqdef\P_y[d(g_y, p_y)\geq \eta]$. We write:
\begin{displaymath}
    P = \P_y\left[\frac{1}{q}\sum_{t\in \F} X_y^t\geq \eta\right]
\end{displaymath}
where we defined $X_y^t = I[g(c_y(t)) \neq p(c_y(t))]$ with $I[E]$ the
indicator variable of event $E$. It is easy to see that for a fixed $t$ and
a random choice of $y$, $c_y(t)$ is uniformly distributed in $\F$, so that
$X_y^t$ is a Bernouilli variable of parameter $\frac{1}{2}-\eps$.

Furthermore, I claim that $(X^t_y)_{t\in\F}$ is a pairwise independent family.
Indeed for $t\neq u$:
\begin{displaymath}
    \P_y[X^t_y = 1\text{ and }X^u_y = 1]
    = \sum_{(z_1,z_2)\in \F^2} \P_y[c(t) = z_1\text{ and } c(u) = z_2]
    I[g(z_1)\neq p(z_1)]I[g(z_2)\neq p(z_2)] = (1/2-\eps)^2
\end{displaymath}
This is because there are exactly $q^2(1/2-\eps)^2$ pairs $z_1, z_2$ such that 
$I[g(z_1)\neq p(z_1)]I[g(z_2)\neq p(z_2)]$. And by a standard polynomial
interpolation argument, for any pair $z_1,z_2$:
\begin{displaymath}
    \P_y[c(t) = z_1\text{ and } c(u) = z_2] =\frac{1}{q^2}
\end{displaymath}
We could have a similar argument for other events associated with $X_y^t$ and
$x_y^u$.

Hence, we can apply the tail-bound for pairwise independent variables of
Proposition 3.28:
\begin{displaymath}
    P\leq \frac{1}{q(\eta - (1/2-\eps))^2}
\end{displaymath}
Now it is easy to see that for $\eta$ large enough such that $P\leq
\frac{1}{3}$, it is possible to choose $d = \gamma q$ with $\gamma$ small
enough such that we have both $\eta\leq 1- 2\sqrt{\frac{2d}{q}}$ and $\eta\leq
1-\frac{2d}{q}$ as required above.

\section*{Problem 7.8}

\paragraph{1.}
Let us consider an $\cl{RP}$ algorithm $A$ for language $L$
running in time $n^c$ for inputs of bit-length $n$ and denote by $A(x, r)$ the
output of $A$ on input $x$ and random bits $r$. Since $A$ runs in time $n^c$ we
can assume $r \leq n^c$.  By definition of $\cl{RP}$, for fixed $x$, $T_x(\cdot)
= A(x, \cdot)$ is of size at most $n^c$ such that:
\begin{itemize}
    \item if $x\in L$, $T_x$ accepts on a fraction at least $\frac{1}{2}$ of the
        strings of length $n^c$
    \item if $x\notin L$, $T_x$ accepts none of the strings of length $n^c$
\end{itemize}
A deterministic algorithm for $L$ works as follows:
\begin{itemize}
    \item  construct a $(n^c,\frac{1}{2})$ hitting set $H$ and evaluate $T_x$ on each
        element of $H$
    \item if $T_x$ accepts one the element, output “$x\in L$”
    \item otherwise output “$x\notin L$”
\end{itemize}
the correctness of the algorithm follows directly from the definition of
a hitting set and the property of $T_x$ described above.

The running time is $s(n^c)$ to construct $H$ and $n^c s(n^c)$ to evaluate $T$
on all the elements in $H$ ($H$ has at most $s(n^c)$ elements since it is
constructed in time $s(n^c)$).

\paragraph{2.} The hitting set $H_m$ is simply $\{G_m(u)\,|\, u\in\{0,1\}^d\}$.
The time to construct $H_m$ is $2^d s$ as required. Now to see that it is
a hitting set:

Assume that it is not, that is, there is a circuit $T$ of size $t$ which accepts
$>\eps$ fraction of the $2^m$ strings in $\{0,1\}^m$ but which accepts none of
the string in $H_m$, then:
\begin{displaymath}
    |\P[T(G(U_d)) = 1] - \P[T(U_m) = 1]|> \eps
\end{displaymath}
that is, $T$ has advantage greater than $\eps$ which contradicts that $G_m$ is
a pseudorandom generator.

\paragraph{4.}

\paragraph{Definition}

I say that $G$ is a $(t, k,\eps)$ black box construction of
hitting set generators if for every oracle $f:[n]\to\{0,1\}$, $G^f:[D]\to[M]$
is a deterministic algorithm and if there exists a randomized oracle algorithm
$Red$ running in time $t$ such that for every $f:[n]\to\{0,1\}$ and
$T:[M]\to\{0,1\}$ such that $T$ accepts less than $1-\eps$ fraction of elements
in $[M]$ if:
\begin{displaymath}
    \forall x\in [D], T(G^f(x)) = 1
\end{displaymath}
then there exists an advice $z\in [K]$ such that:
\begin{displaymath}
    \forall x\in[n],\quad \P[Red^T(x, z) = f(x)] \geq \frac{2}{3}
\end{displaymath}

\paragraph{Comment} Note that I have reversed the definition of $T$ in the
definition of the hitting set generator (by taking the negation of the
algorithm $T$). This is to make the connection with dispersers clearer. Then the
definition simply says that $T$ should reject at least one element in the image
of $G^f$. If it doesn't then it means that we can “compute” the hard function
$f$.

\paragraph{Dispersers} I will show the connection with dispersers via the list
decoding  framework. It is immediate to see that the list decoding definition
of a disperser is the following: $\Gamma:[N]\times [D]\to [M]$ is a $(k, \eps)$
disperser iff:
\begin{displaymath}
    \forall T\subseteq [M],\; |T|< (1-\eps)M\Rightarrow |\List_{\Gamma}(T, 1)|<K
\end{displaymath}

\paragraph{Equivalence} For a black box construction $G$, define $\Gamma(f, y)
\eqdef G^f(y)$ where on the left-hand side if see $f$ as a string of $n$ bits
(i.e. $N = 2^n$). Then $G$ is a $(\infty, k,\eps)$ black-box construction iff
$\Gamma$ is a $(k, \eps)$ disperser.

\begin{proof}

    $\Rightarrow$ Suppose $G$ is a $(\infty, k,\eps)$ black box
    construction for hitting set generators. Consider $f\in\List_{\Gamma}(T,
    1)$ for some $T\subseteq[M]$ of size smaller than $(1-\eps)M$. Interpreting
    $T$ as the truth table of an algorithm $T$, then $f\in\List_{\Gamma}(T, 1)$
    iff:
    \begin{displaymath}
        \forall x\in [D], \Gamma(f, x)\in T
    \end{displaymath}
    or in other words $T(G^f(x)) = 1, \forall x\in[D]$. But by the black-box
    definition, this implies the existence of $z\in [K]$ such that
    $Red^T(\cdot, z)$ computes $f$ everywhere. Hence the number of different
    possible $f$ is bounded by $K$ which shows that $\Gamma$ satisfies the list
    decoding property of a disperser.

    $\Leftarrow$ Suppose that $\Gamma$ is a disperser, then for a $T$ such that
    $|T|<(1-\eps)M$  we have $|\List_{\Gamma}(T, 1)|< K$. Denoting
    $f_1,\dots,f_K$ the elements in the list,  I define $Red^T(x, z) = f_z(x)$
    for $z\in [K]$. Then if $G^f(x) = \Gamma(f, x)\in T$ for all $x$, it means
    that $f$ is in the list, in which case the advice is simply the index of
    $f$ in the list, showing that $G^f$ is a hitting set generator.
\end{proof}
\end{document}