\documentclass[10pt]{article}
\usepackage{fullpage}
\usepackage[utf8x]{inputenc}
\usepackage{amsmath,amsfonts,amsthm}
\usepackage[english]{babel}
\usepackage[capitalize, noabbrev]{cleveref}
\usepackage{algorithm}
\usepackage{algpseudocode}

\newenvironment{enumerate*}%
  {\vspace{-2ex} \begin{enumerate} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{enumerate}}
 
\newenvironment{itemize*}%
  {\vspace{-2ex} \begin{itemize} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{itemize}}
 
\newenvironment{description*}%
  {\vspace{-2ex} \begin{description} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{description}}

\DeclareMathOperator{\E}{\mathbb{E}}
\let\P\relax
\DeclareMathOperator{\P}{\mathbb{P}}
\newcommand{\ex}[1]{\E\left[#1\right]}
\newcommand{\prob}[1]{\P\left[#1\right]}
\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
\newcommand{\R}{\mathbf{R}}
\newcommand{\N}{\mathbf{N}}
\newcommand{\C}{\mathcal{C}}
\newcommand{\eps}{\varepsilon}
\newcommand{\cl}[1]{\text{\textbf{#1}}}
\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}}
\newcommand{\llbracket}{[\![}

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\algrenewcommand\algorithmicensure{\textbf{Output:}}
\algrenewcommand\algorithmicrequire{\textbf{Input:}}

\author{Thibaut Horel}
\title{CS 225 Problem Set 1 -- Solutions}

\begin{document}
\maketitle

Collaborator: Debmalya Mandal

\section*{Problem 2.3}

\paragraph{$\cl{ZPP}\subseteq \cl{RP}\cap\cl{co-RP}$:} Let us consider $L$ in
$\cl{ZPP}$ and an algorithm $A$ which decides $L$ in expected running time
$t(n) = O(n^c)$ for some $c\geq 1$. We construct the following algorithm $A'$:
for input $x$ s.t. $|x| = n$, $A'$ simulates the execution of algorithm $A$
running on input $x$ for at most $2\times t(n)$ steps. It $A$ terminated during
this simulation, output whichever answer was computed by $A$, otherwise output
\emph{Reject}.
\begin{itemize}
    \item If $x\notin L$, it is clear that $A'(x)$ rejects: either
$A$ computed an answer in less than $2\times t(n)$ steps, in which case by
definition of $\cl{ZPP}$, the answer is correct, i.e $A$ rejects; or $A$ failed
to compute an answer in $2\times t(n)$ steps, in which case $A'$ rejects.
    \item If $x\in L$, $A'$ only rejects when $A$ fails to compute an answer
        in less than $2\times t(n)$ steps. By Markov's inequality, this happens
        with probability less than $\frac{1}{2}$.
\end{itemize}
Since, $A'$ obviously runs in polynomial time, this proves that $L$ is in
$\cl{RP}$. The exact same reasoning shows that $L$ is in $\cl{co-RP}$ by
changing the definition of $A'$ to accept when $A$ fails to compute an answer
in less than $2\times t(n)$ step.

\paragraph{$\cl{RP}\cap\cl{co-RP}\subseteq \cl{ZPP}$:} Let us consider $L$
belonging to both $\cl{RP}$ and $\cl{co-RP}$. Hence we have $A$ and $B$ running
in polynomial time such that for $x\in L$, $A$ accepts with probability at
least $\frac{1}{2}$ and $B$ accepts; for $x\notin L$ $A$ rejects and $B$
rejects with probability at least $\frac{1}{2}$. We define algorithm $C$ as
follows: for input $x$, compute $A(x)$. If $A$ accepts, then $C$ accepts,
otherwise, compute $B(x)$. If $B$ rejects then $C$ rejects. Otherwise repeat.

By definition of $A$ and $B$, $C$ is always correct ($A$ is always correct when
accepting, and $B$ is always correct when rejecting). We now need to control
the running time of $C$. For $x\in L$, $A$ rejects with probability at most
$\frac{1}{2}$ and then $B$ accepts and we have to repeat. For $x\notin L$, $A$
rejects and $B$ accepts with probability at most $\frac{1}{2}$ and we have to
repeat. In both cases $C$ repeats with probability at most $\frac{1}{2}$. Hence
we can upper bound the number of repetition by a random variable following
a geometric distribution of parameter $\frac{1}{2}$. The expected number of
repetitions is $2$. Hence the expected running time of $C$ is $2 (t_A + t_B)$
where $t_A$ and $t_B$ are the running times of $A$ and $B$ respectively. Since
$t_A$ and $t_B$ are polynomial in $n=|x|$, so is the expected running time of
$C$.

\section*{Problem 2.5}

\paragraph{1.} $f$ has at most $D$ irreducible factors of degree at most $d$.
A sufficient condition for $f(x) \mod g(x)$ to be non zero is that $g(x)$ is an
irreducible polynomial distinct from the at most $D$ irreducible factors of
$f$. Hence, if $g$ is chosen uniformly at random among the polynomials of
degree at most $d$:
\begin{displaymath}
    p\eqdef \prob{f(x)\neq 0 \mod g(x)} \geq 
\frac{i_d - D}{|F|^{d+1}}
\end{displaymath}
where $i_d$ is the number of
irreducible polynomials of degree at most $d$ and $|F|^{d+1}$ is the number of
polynomials of degree of most $d$.

But we know that $i_d \geq \frac{|F|^{d+1}}{2d}$. Hence $p\geq
\frac{1}{2d}- \frac{D}{|F|^{d+1}}$. Since $|F|\geq 2$ and $d = c\log D$, we
have: $p\geq \frac{1}{2c\log D} - \frac{1}{2}\frac{1}{D^{c-1}}$.
We can now lower bound $p\log D\geq \frac{1}{2c}- \frac{\log D}{2 D^{c-1}}\geq
\frac{1}{2c} -\frac{1}{2D^{c-2}}$ where the second inequality used $\log D\leq
D$. Since we can assume $D\geq 2$ (otherwise PIT is trivial), $p\log D \geq
\frac{1}{2c}- \frac{1}{2\cdot 2^{c-2}}$. For $c = 5$, we have $p\log D \geq
\frac{1}{10}-\frac{1}{16}$ which is positive. So $c=5$ and $c' = \frac{3}{80}$
are as required.

\paragraph{}
This suggests the following algorithm for univariate identity testing: pick $g$
uniformly at random among polynomials of degree at most $d= c\log D$ and
compute $f(x)\mod g(x)$. If the answer is $0$ accept, otherwise reject.

The computation $\mod g(x)$ can be performed by taking the reduction $\mod
g(x)$ at each gate of the circuit representing $f$. The time to take this
reduction is $O(\text{poly}(\log D))$, so the overall computation time is
polynomial in the size of the circuit representing $f$.

It is clear that if $f(x)$ is the zero polynomial the algorithm is correct with
probability one. If $f(x)$ is non zero, then the algorithm rejects with
probability at least $\frac{1}{c'\log D}$. We can get a constant probability of
rejection by repeating the algorithm $\log D$ times, which doesn't break the
polynomial running time.

\paragraph{2.} We reduce the multivariate case to the univariate case using the
following reduction: let $f(x_1,\dots,x_n)$ be the multivariate polynomial of
degree $D$, introduce a symbolic variable $x$ and substitute $x_i$ wth
$x^{(D+1)^{i-1}}$ in $f$.  Since the application $(i_1,\ldots,i_n)\mapsto i_1
+ i_2 (D+1) + \dots + i_n (D+1)^{n-1}$ is bijective (decomposition in base
$D+1$), each monomial in the multivariate polynomial will become a distinct
monomial in the resulting univariate polynomial $\tilde{f}$. Hence $\tilde{f}$
will be non-zero iff $f$ is non zero. Furthermore, $\tilde{f}$ has degree
$O(D^n)$. 

We then apply the algorithm from question 1.: we pick a random (univariate)
polynomial $g$ of degree at most $\log D^n = n\log D$, and compute
$\tilde{f}(x) \mod g(x)$. This computation takes $O(\text{poly} (n\log D))$
(this is polynomial in the size of the circuit representing $f$), is always
correct  when $f$ is zero and is correct with probability at least
$\frac{1}{c'n\log D}$ when $f$ is non-zero. Repeating the algorithm $n\log D$
times yields a constant probability of rejection when $f$ is non zero, the
resulting algorithm still has polynomial running time.

\section*{Problem 2.6}

\paragraph{1.} We can write $(x+1)^n = x^n + 1 +\sum_{k=1}^{n-1}{n \choose
k}x^k$. So we  need to prove that $n$ is prime iff ${n\choose k} = 0 \pmod n$
for all $1\leq k\leq n-1$. We use: ${n\choose k}
= \frac{n(n-1)\dots(n-k+1)}{k!}$.

If $n$ is prime then $n$ is a prime factor of the numerator of ${n\choose k}$,
but does not appear in the prime decomposition of $k!$. Hence, $n$ divides
${n\choose k}$.

If $n$ divides ${n\choose k}$ for all $1\leq k\leq n-1$. Assume $n$ is not
prime.  Let $p$ be the smallest prime factor of $n$ and let us write $n = pn'$.
Then we have ${n\choose p} = \frac{n'(n-1)\ldots(n-p+1)}{(p-1)!}$. Since $n$
divides ${n\choose p}$, $n$ divides its numerator. This implies that $p$
divides $(n-1)\ldots (n-p+1)$.  This is a contradiction since $n$  is
a multiple of $p$, so none of the numbers $(n-1)$,\ldots,$(n-p+1)$ are
divisible by $p$.

\paragraph{2.} We can apply the algorithm from the previous problem: test
whether or not $(x+1)^n - x^n - 1$ is the zero polynomial in $\mathbb{Z}_n$.
When $n$ is prime, $\mathbb{Z}_n$ is a field, and since $(x+1)^n - x^n -1$ is
the zero polynomial in this case, the algorithm from Problem 2.5 accepts with
probability one.

When $n$ is composite, $\mathbb{Z}_n$ is no longer a field and the analysis
from Problem 2.5 does not apply. I tried to analyze in $\mathbb{Z}_p$ for $p$
a prime factor of $n$, as suggested by Thomas, but even though $(x+1)^n - x^n
-1$ is non zero in $\mathbb{Z}_n$, it could be zero in $\mathbb{Z}_p$. Not
clear what happens then, and I'm running out of time\dots.

\section*{Problem 2.7}

\paragraph{1.} We have, for $r\in[0,1/2]$:
\begin{align*}
    \ex{e^{rX}} &= \ex{\prod_{i=1}^t e^{rX_i}} = \prod_{i=1}^t\ex{e^{rX_i}}
                \leq \prod_{i=1}^t 1 + r\ex{X_i} + r^2\ex{X_i^2}\\
                &\leq \prod_{i=1}^t 1+ r\ex{X_i} + r^2 \leq \prod_{i=1}^t
                e^{r\ex{X_i} + r^2} = e^{tr^2 + r\ex{X}}
\end{align*}
where the second equality used the independence of the $X_i$s, the first
inequality used half of the hint and the linearity of the expectation, the second
inequality used that $X_i^2\leq 1$ and the third inequality used the second
half of the hint.

\paragraph{2.} We have $\prob{X\geq \ex{X} +\eps t} = \prob{e^{rX}\geq
e^{r\ex{X}+r\eps t }}$ for all $r \geq 0$ since $\exp$ is increasing. Applying
Markov's inequality first, followed by the result of question \textbf{1.}:
\begin{displaymath}
\prob{X\geq \ex{X} +\eps t} \leq \frac{\ex{e^{rX}}}{e^{r\ex{X} + r\eps t}}
\leq e^{tr^2-r\eps t}
\end{displaymath}
Then picking $r = \frac{\eps}{4}$ gives the desired Chernoff Bound.

For the other direction of the Chernoff Bound, we can simply apply the bound we
just obtained to the variable $1-X_1,\ldots,1-X_n$. We obtain:
\begin{displaymath}
    \prob{n-X\geq n-\ex{X} +\eps t}\leq e^{-\eps^2t/4}
\end{displaymath}
The probability on the left-hand side is exactly $\prob{X\leq \ex{X}-\eps t}$.

\paragraph{3.} I used independence of the $X_i$s in question 1.: if the $X_i$s
are independent, so are the $e^{rX_i}$s. This allows me to write the
expectation of their product as the product of their expectation.

\section*{Problem 2.8}

Let us consider a deterministic algorithm for PIT. Since it is deterministic, the
queries it makes to the black-box are always the same. Let us write them
$q_1,\dots, q_p$. When $f$ is the zero polynomial, $[f(q_1),\dots, f(q_p)]$ is
the zero vector, and the algorithm returns \emph{$f$ is zero} in this case.

Assuming that $p< 2^n$, I am going to construct a \emph{non-zero} multivariate
polynomial $g$ of degree $n$ such that $[g(x_1),\dots, g(x_p)]$ is the zero
vector. Since the algorithm is deterministic, it has to return \emph{$g$ is
zero} because it receives the exact same information as when the polynomial is
$f$. This contradicts the correctness of the algorithm.

To construct $g$, consider the polynomial:
\begin{displaymath}
    g(x_1,\dots, x_n) = \sum_{(i_1,\dots, i_n)\in \{0,1\}^n} c_{i_1,\dots i_n} x_1^{i_1}\dots
    x_n^{i_n}
\end{displaymath}
with $2^n$ monomials whose coefficients $c_{i_1,\dots i_n}$ are yet to be
determined. Let us now consider the system of equations $g(q_1) = 0,\dots,
g(q_p) = 0$. This is a linear system in the $2^n$ coefficients of $g$ (which
are elements of the field $\mathbb{F}$). Since this system has $p<
2^n$ equations, standard linear algebra says that the system is
degenerate\footnote{A linear map from $\mathbb{F}^{m}$ to $\mathbb{F}^n$ always
has a non-trivial kernel when $m> n$.}, that is has a non-zero solution. But
a non-zero solution exactly defines coefficients of $g$. Since the solution is
non zero, at least one coefficient in $g$ is non-zero, hence $g$ is a non-zero
polynomial. However by construction $g(q_1) = \dots = g(q_p) = 0$, and we
obtain the contradiction mentioned above.


\end{document}