ps1/main.tex


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252

\documentclass[10pt]{article}
\usepackage{fullpage}
\usepackage[utf8x]{inputenc}
\usepackage{amsmath,amsfonts,amsthm}
\usepackage[english]{babel}
\usepackage[capitalize, noabbrev]{cleveref}
\usepackage{algorithm}
\usepackage{algpseudocode}

\newenvironment{enumerate*}%
  {\vspace{-2ex} \begin{enumerate} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{enumerate}}
 
\newenvironment{itemize*}%
  {\vspace{-2ex} \begin{itemize} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{itemize}}
 
\newenvironment{description*}%
  {\vspace{-2ex} \begin{description} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{description}}

\DeclareMathOperator{\E}{\mathbb{E}}
\let\P\relax
\DeclareMathOperator{\P}{\mathbb{P}}
\newcommand{\ex}[1]{\E\left[#1\right]}
\newcommand{\prob}[1]{\P\left[#1\right]}
\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
\newcommand{\R}{\mathbf{R}}
\newcommand{\N}{\mathbf{N}}
\newcommand{\C}{\mathcal{C}}
\newcommand{\eps}{\varepsilon}
\newcommand{\cl}[1]{\text{\textbf{#1}}}
\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}}
\newcommand{\llbracket}{[\![}

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\algrenewcommand\algorithmicensure{\textbf{Output:}}
\algrenewcommand\algorithmicrequire{\textbf{Input:}}

\author{Thibaut Horel}
\title{CS 225 Problem Set 1 -- Solutions}

\begin{document}
\maketitle

Collaborator: Debmalya Mandal

\section*{Problem 2.3}

\paragraph{$\cl{ZPP}\subseteq \cl{RP}\cap\cl{co-RP}$:} Let us consider $L$ in
$\cl{ZPP}$ and an algorithm $A$ which decides $L$ in expected running time
$t(n) = O(n^c)$ for some $c\geq 1$. We construct the following algorithm $A'$:
for input $x$ s.t. $|x| = n$, $A'$ simulates the execution of algorithm $A$
running on input $x$ for at most $2\times t(n)$ steps. It $A$ terminated during
this simulation, output whichever answer was computed by $A$, otherwise output
\emph{Reject}.
\begin{itemize}
    \item If $x\notin L$, it is clear that $A'(x)$ rejects: either
$A$ computed an answer in less than $2\times t(n)$ steps, in which case by
definition of $\cl{ZPP}$, the answer is correct, i.e $A$ rejects; or $A$ failed
to compute an answer in $2\times t(n)$ steps, in which case $A'$ rejects.
    \item If $x\in L$, $A'$ only rejects when $A$ fails to compute an answer
        in less than $2\times t(n)$ steps. By Markov's inequality, this happens
        with probability less than $\frac{1}{2}$.
\end{itemize}
Since, $A'$ obviously runs in polynomial time, this proves that $L$ is in
$\cl{RP}$. The exact same reasoning shows that $L$ is in $\cl{co-RP}$ by
changing the definition of $A'$ to accept when $A$ fails to compute an answer
in less than $2\times t(n)$ step.

\paragraph{$\cl{RP}\cap\cl{co-RP}\subseteq \cl{ZPP}$:} Let us consider $L$
belonging to both $\cl{RP}$ and $\cl{co-RP}$. Hence we have $A$ and $B$ running
in polynomial time such that for $x\in L$, $A$ accepts with probability at
least $\frac{1}{2}$ and $B$ accepts; for $x\notin L$ $A$ rejects and $B$
rejects with probability at least $\frac{1}{2}$. We define algorithm $C$ as
follows: for input $x$, compute $A(x)$. If $A$ accepts, then $C$ accepts,
otherwise, compute $B(x)$. If $B$ rejects then $C$ rejects. Otherwise repeat.

By definition of $A$ and $B$, $C$ is always correct ($A$ is always correct when
accepting, and $B$ is always correct when rejecting). We now need to control
the running time of $C$. For $x\in L$, $A$ rejects with probability at most
$\frac{1}{2}$ and then $B$ accepts and we have to repeat. For $x\notin L$, $A$
rejects and $B$ accepts with probability at most $\frac{1}{2}$ and we have to
repeat. In both cases $C$ repeats with probability at most $\frac{1}{2}$. Hence
we can upper bound the number of repetition by a random variable following
a geometric distribution of parameter $\frac{1}{2}$. The expected number of
repetitions is $2$. Hence the expected running time of $C$ is $2 (t_A + t_B)$
where $t_A$ and $t_B$ are the running times of $A$ and $B$ respectively. Since
$t_A$ and $t_B$ are polynomial in $n=|x|$, so is the expected running time of
$C$.

\section*{Problem 2.5}

\paragraph{1.} $f$ has at most $D$ irreducible factors of degree at most $d$.
A sufficient condition for $f(x) \mod g(x)$ to be non zero is that $g(x)$ is an
irreducible polynomial distinct from the at most $D$ irreducible factors of
$f$. Hence, if $g$ is chosen uniformly at random among the polynomials of
degree at most $d$:
\begin{displaymath}
    p\eqdef \prob{f(x)\neq 0 \mod g(x)} \geq 
\frac{i_d - D}{|F|^{d+1}}
\end{displaymath}
where $i_d$ is the number of
irreducible polynomials of degree at most $d$ and $|F|^{d+1}$ is the number of
polynomials of degree of most $d$.

But we know that $i_d \geq \frac{|F|^{d+1}}{2d}$. Hence $p\geq
\frac{1}{2d}- \frac{D}{|F|^{d+1}}$. Since $|F|\geq 2$ and $d = c\log D$, we
have: $p\geq \frac{1}{2c\log D} - \frac{1}{2}\frac{1}{D^{c-1}}$.
We can now lower bound $p\log D\geq \frac{1}{2c}- \frac{\log D}{2 D^{c-1}}\geq
\frac{1}{2c} -\frac{1}{2D^{c-2}}$ where the second inequality used $\log D\leq
D$. Since we can assume $D\geq 2$ (otherwise PIT is trivial), $p\log D \geq
\frac{1}{2c}- \frac{1}{2\cdot 2^{c-2}}$. For $c = 5$, we have $p\log D \geq
\frac{1}{10}-\frac{1}{16}$ which is positive. So $c=5$ and $c' = \frac{3}{80}$
are as required.

\paragraph{}
This suggests the following algorithm for univariate identity testing: pick $g$
uniformly at random among polynomials of degree at most $d= c\log D$ and
compute $f(x)\mod g(x)$. If the answer is $0$ accept, otherwise reject.

The computation $\mod g(x)$ can be performed by taking the reduction $\mod
g(x)$ at each gate of the circuit representing $f$. The time to take this
reduction is $O(\text{poly}(\log D))$, so the overall computation time is
polynomial in the size of the circuit representing $f$.

It is clear that if $f(x)$ is the zero polynomial the algorithm is correct with
probability one. If $f(x)$ is non zero, then the algorithm rejects with
probability at least $\frac{1}{c'\log D}$. We can get a constant probability of
rejection by repeating the algorithm $\log D$ times, which doesn't break the
polynomial running time.

\paragraph{2.} We reduce the multivariate case to the univariate case using the
following reduction: let $f(x_1,\dots,x_n)$ be the multivariate polynomial of
degree $D$, introduce a symbolic variable $x$ and substitute $x_i$ wth
$x^{(D+1)^{i-1}}$ in $f$.  Since the application $(i_1,\ldots,i_n)\mapsto i_1
+ i_2 (D+1) + \dots + i_n (D+1)^{n-1}$ is bijective (decomposition in base
$D+1$), each monomial in the multivariate polynomial will become a distinct
monomial in the resulting univariate polynomial $\tilde{f}$. Hence $\tilde{f}$
will be non-zero iff $f$ is non zero. Furthermore, $\tilde{f}$ has degree
$O(D^n)$. 

We then apply the algorithm from question 1.: we pick a random (univariate)
polynomial $g$ of degree at most $\log D^n = n\log D$, and compute
$\tilde{f}(x) \mod g(x)$. This computation takes $O(\text{poly} (n\log D))$
(this is polynomial in the size of the circuit representing $f$), is always
correct  when $f$ is zero and is correct with probability at least
$\frac{1}{c'n\log D}$ when $f$ is non-zero. Repeating the algorithm $n\log D$
times yields a constant probability of rejection when $f$ is non zero, the
resulting algorithm still has polynomial running time.

\section*{Problem 2.6}

\paragraph{1.} We can write $(x+1)^n = x^n + 1 +\sum_{k=1}^{n-1}{n \choose
k}x^k$. So we  need to prove that $n$ is prime iff ${n\choose k} = 0 \pmod n$
for all $1\leq k\leq n-1$. We use: ${n\choose k}
= \frac{n(n-1)\dots(n-k+1)}{k!}$.

If $n$ is prime then $n$ is a prime factor of the numerator of ${n\choose k}$,
but does not appear in the prime decomposition of $k!$. Hence, $n$ divides
${n\choose k}$.

If $n$ divides ${n\choose k}$ for all $1\leq k\leq n-1$. Assume $n$ is not
prime.  Let $p$ be the smallest prime factor of $n$ and let us write $n = pn'$.
Then we have ${n\choose p} = \frac{n'(n-1)\ldots(n-p+1)}{(p-1)!}$. Since $n$
divides ${n\choose p}$, $n$ divides its numerator. This implies that $p$
divides $(n-1)\ldots (n-p+1)$.  This is a contradiction since $n$  is
a multiple of $p$, so none of the numbers $(n-1)$,\ldots,$(n-p+1)$ are
divisible by $p$.

\paragraph{2.} We can apply the algorithm from the previous problem: test
whether or not $(x+1)^n - x^n - 1$ is the zero polynomial in $\mathbb{Z}_n$.
When $n$ is prime, $\mathbb{Z}_n$ is a field, and since $(x+1)^n - x^n -1$ is
the zero polynomial in this case, the algorithm from Problem 2.5 accepts with
probability one.

When $n$ is composite, $\mathbb{Z}_n$ is no longer a field and the analysis
from Problem 2.5 does not apply. I tried to analyze in $\mathbb{Z}_p$ for $p$
a prime factor of $n$, as suggested by Thomas, but even though $(x+1)^n - x^n
-1$ is non zero in $\mathbb{Z}_n$, it could be zero in $\mathbb{Z}_p$. Not
clear what happens then, and I'm running out of time\dots.

\section*{Problem 2.7}

\paragraph{1.} We have, for $r\in[0,1/2]$:
\begin{align*}
    \ex{e^{rX}} &= \ex{\prod_{i=1}^t e^{rX_i}} = \prod_{i=1}^t\ex{e^{rX_i}}
                \leq \prod_{i=1}^t 1 + r\ex{X_i} + r^2\ex{X_i^2}\\
                &\leq \prod_{i=1}^t 1+ r\ex{X_i} + r^2 \leq \prod_{i=1}^t
                e^{r\ex{X_i} + r^2} = e^{tr^2 + r\ex{X}}
\end{align*}
where the second equality used the independence of the $X_i$s, the first
inequality used half of the hint and the linearity of the expectation, the second
inequality used that $X_i^2\leq 1$ and the third inequality used the second
half of the hint.

\paragraph{2.} We have $\prob{X\geq \ex{X} +\eps t} = \prob{e^{rX}\geq
e^{r\ex{X}+r\eps t }}$ for all $r \geq 0$ since $\exp$ is increasing. Applying
Markov's inequality first, followed by the result of question \textbf{1.}:
\begin{displaymath}
\prob{X\geq \ex{X} +\eps t} \leq \frac{\ex{e^{rX}}}{e^{r\ex{X} + r\eps t}}
\leq e^{tr^2-r\eps t}
\end{displaymath}
Then picking $r = \frac{\eps}{4}$ gives the desired Chernoff Bound.

For the other direction of the Chernoff Bound, we can simply apply the bound we
just obtained to the variable $1-X_1,\ldots,1-X_n$. We obtain:
\begin{displaymath}
    \prob{n-X\geq n-\ex{X} +\eps t}\leq e^{-\eps^2t/4}
\end{displaymath}
The probability on the left-hand side is exactly $\prob{X\leq \ex{X}-\eps t}$.

\paragraph{3.} I used independence of the $X_i$s in question 1.: if the $X_i$s
are independent, so are the $e^{rX_i}$s. This allows me to write the
expectation of their product as the product of their expectation.

\section*{Problem 2.8}

Let us consider a deterministic algorithm for PIT. Since it is deterministic, the
queries it makes to the black-box are always the same. Let us write them
$q_1,\dots, q_p$. When $f$ is the zero polynomial, $[f(q_1),\dots, f(q_p)]$ is
the zero vector, and the algorithm returns \emph{$f$ is zero} in this case.

Assuming that $p< 2^n$, I am going to construct a \emph{non-zero} multivariate
polynomial $g$ of degree $n$ such that $[g(x_1),\dots, g(x_p)]$ is the zero
vector. Since the algorithm is deterministic, it has to return \emph{$g$ is
zero} because it receives the exact same information as when the polynomial is
$f$. This contradicts the correctness of the algorithm.

To construct $g$, consider the polynomial:
\begin{displaymath}
    g(x_1,\dots, x_n) = \sum_{(i_1,\dots, i_n)\in \{0,1\}^n} c_{i_1,\dots i_n} x_1^{i_1}\dots
    x_n^{i_n}
\end{displaymath}
with $2^n$ monomials whose coefficients $c_{i_1,\dots i_n}$ are yet to be
determined. Let us now consider the system of equations $g(q_1) = 0,\dots,
g(q_p) = 0$. This is a linear system in the $2^n$ coefficients of $g$ (which
are elements of the field $\mathbb{F}$). Since this system has $p<
2^n$ equations, standard linear algebra says that the system is
degenerate\footnote{A linear map from $\mathbb{F}^{m}$ to $\mathbb{F}^n$ always
has a non-trivial kernel when $m> n$.}, that is has a non-zero solution. But
a non-zero solution exactly defines coefficients of $g$. Since the solution is
non zero, at least one coefficient in $g$ is non-zero, hence $g$ is a non-zero
polynomial. However by construction $g(q_1) = \dots = g(q_p) = 0$, and we
obtain the contradiction mentioned above.


\end{document}