1 files changed, 252 insertions, 0 deletions
diff --git a/ps1/main.tex b/ps1/main.tex
new file mode 100644
index 0000000..acd6895
--- /dev/null
+++ b/ps1/main.tex
@@ -0,0 +1,252 @@
+\documentclass[10pt]{article}
+\usepackage{fullpage}
+\usepackage[utf8x]{inputenc}
+\usepackage{amsmath,amsfonts,amsthm}
+\usepackage[english]{babel}
+\usepackage[capitalize, noabbrev]{cleveref}
+\usepackage{algorithm}
+\usepackage{algpseudocode}
+
+\newenvironment{enumerate*}%
+  {\vspace{-2ex} \begin{enumerate} %
+     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
+  {\end{enumerate}}
+ 
+\newenvironment{itemize*}%
+  {\vspace{-2ex} \begin{itemize} %
+     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
+  {\end{itemize}}
+ 
+\newenvironment{description*}%
+  {\vspace{-2ex} \begin{description} %
+     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
+  {\end{description}}
+
+\DeclareMathOperator{\E}{\mathbb{E}}
+\let\P\relax
+\DeclareMathOperator{\P}{\mathbb{P}}
+\newcommand{\ex}[1]{\E\left[#1\right]}
+\newcommand{\prob}[1]{\P\left[#1\right]}
+\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
+\newcommand{\R}{\mathbf{R}}
+\newcommand{\N}{\mathbf{N}}
+\newcommand{\C}{\mathcal{C}}
+\newcommand{\eps}{\varepsilon}
+\newcommand{\cl}[1]{\text{\textbf{#1}}}
+\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}}
+\newcommand{\llbracket}{[\![}
+
+\newtheorem{theorem}{Theorem}
+\newtheorem{lemma}{Lemma}
+\algrenewcommand\algorithmicensure{\textbf{Output:}}
+\algrenewcommand\algorithmicrequire{\textbf{Input:}}
+
+\author{Thibaut Horel}
+\title{CS 225 Problem Set 1 -- Solutions}
+
+\begin{document}
+\maketitle
+
+Collaborator: Debmalya Mandal
+
+\section*{Problem 2.3}
+
+\paragraph{$\cl{ZPP}\subseteq \cl{RP}\cap\cl{co-RP}$:} Let us consider $L$ in
+$\cl{ZPP}$ and an algorithm $A$ which decides $L$ in expected running time
+$t(n) = O(n^c)$ for some $c\geq 1$. We construct the following algorithm $A'$:
+for input $x$ s.t. $|x| = n$, $A'$ simulates the execution of algorithm $A$
+running on input $x$ for at most $2\times t(n)$ steps. It $A$ terminated during
+this simulation, output whichever answer was computed by $A$, otherwise output
+\emph{Reject}.
+\begin{itemize}
+    \item If $x\notin L$, it is clear that $A'(x)$ rejects: either
+$A$ computed an answer in less than $2\times t(n)$ steps, in which case by
+definition of $\cl{ZPP}$, the answer is correct, i.e $A$ rejects; or $A$ failed
+to compute an answer in $2\times t(n)$ steps, in which case $A'$ rejects.
+    \item If $x\in L$, $A'$ only rejects when $A$ fails to compute an answer
+        in less than $2\times t(n)$ steps. By Markov's inequality, this happens
+        with probability less than $\frac{1}{2}$.
+\end{itemize}
+Since, $A'$ obviously runs in polynomial time, this proves that $L$ is in
+$\cl{RP}$. The exact same reasoning shows that $L$ is in $\cl{co-RP}$ by
+changing the definition of $A'$ to accept when $A$ fails to compute an answer
+in less than $2\times t(n)$ step.
+
+\paragraph{$\cl{RP}\cap\cl{co-RP}\subseteq \cl{ZPP}$:} Let us consider $L$
+belonging to both $\cl{RP}$ and $\cl{co-RP}$. Hence we have $A$ and $B$ running
+in polynomial time such that for $x\in L$, $A$ accepts with probability at
+least $\frac{1}{2}$ and $B$ accepts; for $x\notin L$ $A$ rejects and $B$
+rejects with probability at least $\frac{1}{2}$. We define algorithm $C$ as
+follows: for input $x$, compute $A(x)$. If $A$ accepts, then $C$ accepts,
+otherwise, compute $B(x)$. If $B$ rejects then $C$ rejects. Otherwise repeat.
+
+By definition of $A$ and $B$, $C$ is always correct ($A$ is always correct when
+accepting, and $B$ is always correct when rejecting). We now need to control
+the running time of $C$. For $x\in L$, $A$ rejects with probability at most
+$\frac{1}{2}$ and then $B$ accepts and we have to repeat. For $x\notin L$, $A$
+rejects and $B$ accepts with probability at most $\frac{1}{2}$ and we have to
+repeat. In both cases $C$ repeats with probability at most $\frac{1}{2}$. Hence
+we can upper bound the number of repetition by a random variable following
+a geometric distribution of parameter $\frac{1}{2}$. The expected number of
+repetitions is $2$. Hence the expected running time of $C$ is $2 (t_A + t_B)$
+where $t_A$ and $t_B$ are the running times of $A$ and $B$ respectively. Since
+$t_A$ and $t_B$ are polynomial in $n=|x|$, so is the expected running time of
+$C$.
+
+\section*{Problem 2.5}
+
+\paragraph{1.} $f$ has at most $D$ irreducible factors of degree at most $d$.
+A sufficient condition for $f(x) \mod g(x)$ to be non zero is that $g(x)$ is an
+irreducible polynomial distinct from the at most $D$ irreducible factors of
+$f$. Hence, if $g$ is chosen uniformly at random among the polynomials of
+degree at most $d$:
+\begin{displaymath}
+    p\eqdef \prob{f(x)\neq 0 \mod g(x)} \geq 
+\frac{i_d - D}{|F|^{d+1}}
+\end{displaymath}
+where $i_d$ is the number of
+irreducible polynomials of degree at most $d$ and $|F|^{d+1}$ is the number of
+polynomials of degree of most $d$.
+
+But we know that $i_d \geq \frac{|F|^{d+1}}{2d}$. Hence $p\geq
+\frac{1}{2d}- \frac{D}{|F|^{d+1}}$. Since $|F|\geq 2$ and $d = c\log D$, we
+have: $p\geq \frac{1}{2c\log D} - \frac{1}{2}\frac{1}{D^{c-1}}$.
+We can now lower bound $p\log D\geq \frac{1}{2c}- \frac{\log D}{2 D^{c-1}}\geq
+\frac{1}{2c} -\frac{1}{2D^{c-2}}$ where the second inequality used $\log D\leq
+D$. Since we can assume $D\geq 2$ (otherwise PIT is trivial), $p\log D \geq
+\frac{1}{2c}- \frac{1}{2\cdot 2^{c-2}}$. For $c = 5$, we have $p\log D \geq
+\frac{1}{10}-\frac{1}{16}$ which is positive. So $c=5$ and $c' = \frac{3}{80}$
+are as required.
+
+\paragraph{}
+This suggests the following algorithm for univariate identity testing: pick $g$
+uniformly at random among polynomials of degree at most $d= c\log D$ and
+compute $f(x)\mod g(x)$. If the answer is $0$ accept, otherwise reject.
+
+The computation $\mod g(x)$ can be performed by taking the reduction $\mod
+g(x)$ at each gate of the circuit representing $f$. The time to take this
+reduction is $O(\text{poly}(\log D))$, so the overall computation time is
+polynomial in the size of the circuit representing $f$.
+
+It is clear that if $f(x)$ is the zero polynomial the algorithm is correct with
+probability one. If $f(x)$ is non zero, then the algorithm rejects with
+probability at least $\frac{1}{c'\log D}$. We can get a constant probability of
+rejection by repeating the algorithm $\log D$ times, which doesn't break the
+polynomial running time.
+
+\paragraph{2.} We reduce the multivariate case to the univariate case using the
+following reduction: let $f(x_1,\dots,x_n)$ be the multivariate polynomial of
+degree $D$, introduce a symbolic variable $x$ and substitute $x_i$ wth
+$x^{(D+1)^{i-1}}$ in $f$.  Since the application $(i_1,\ldots,i_n)\mapsto i_1
++ i_2 (D+1) + \dots + i_n (D+1)^{n-1}$ is bijective (decomposition in base
+$D+1$), each monomial in the multivariate polynomial will become a distinct
+monomial in the resulting univariate polynomial $\tilde{f}$. Hence $\tilde{f}$
+will be non-zero iff $f$ is non zero. Furthermore, $\tilde{f}$ has degree
+$O(D^n)$. 
+
+We then apply the algorithm from question 1.: we pick a random (univariate)
+polynomial $g$ of degree at most $\log D^n = n\log D$, and compute
+$\tilde{f}(x) \mod g(x)$. This computation takes $O(\text{poly} (n\log D))$
+(this is polynomial in the size of the circuit representing $f$), is always
+correct  when $f$ is zero and is correct with probability at least
+$\frac{1}{c'n\log D}$ when $f$ is non-zero. Repeating the algorithm $n\log D$
+times yields a constant probability of rejection when $f$ is non zero, the
+resulting algorithm still has polynomial running time.
+
+\section*{Problem 2.6}
+
+\paragraph{1.} We can write $(x+1)^n = x^n + 1 +\sum_{k=1}^{n-1}{n \choose
+k}x^k$. So we  need to prove that $n$ is prime iff ${n\choose k} = 0 \pmod n$
+for all $1\leq k\leq n-1$. We use: ${n\choose k}
+= \frac{n(n-1)\dots(n-k+1)}{k!}$.
+
+If $n$ is prime then $n$ is a prime factor of the numerator of ${n\choose k}$,
+but does not appear in the prime decomposition of $k!$. Hence, $n$ divides
+${n\choose k}$.
+
+If $n$ divides ${n\choose k}$ for all $1\leq k\leq n-1$. Assume $n$ is not
+prime.  Let $p$ be the smallest prime factor of $n$ and let us write $n = pn'$.
+Then we have ${n\choose p} = \frac{n'(n-1)\ldots(n-p+1)}{(p-1)!}$. Since $n$
+divides ${n\choose p}$, $n$ divides its numerator. This implies that $p$
+divides $(n-1)\ldots (n-p+1)$.  This is a contradiction since $n$  is
+a multiple of $p$, so none of the numbers $(n-1)$,\ldots,$(n-p+1)$ are
+divisible by $p$.
+
+\paragraph{2.} We can apply the algorithm from the previous problem: test
+whether or not $(x+1)^n - x^n - 1$ is the zero polynomial in $\mathbb{Z}_n$.
+When $n$ is prime, $\mathbb{Z}_n$ is a field, and since $(x+1)^n - x^n -1$ is
+the zero polynomial in this case, the algorithm from Problem 2.5 accepts with
+probability one.
+
+When $n$ is composite, $\mathbb{Z}_n$ is no longer a field and the analysis
+from Problem 2.5 does not apply. I tried to analyze in $\mathbb{Z}_p$ for $p$
+a prime factor of $n$, as suggested by Thomas, but even though $(x+1)^n - x^n
+-1$ is non zero in $\mathbb{Z}_n$, it could be zero in $\mathbb{Z}_p$. Not
+clear what happens then, and I'm running out of time\dots.
+
+\section*{Problem 2.7}
+
+\paragraph{1.} We have, for $r\in[0,1/2]$:
+\begin{align*}
+    \ex{e^{rX}} &= \ex{\prod_{i=1}^t e^{rX_i}} = \prod_{i=1}^t\ex{e^{rX_i}}
+                \leq \prod_{i=1}^t 1 + r\ex{X_i} + r^2\ex{X_i^2}\\
+                &\leq \prod_{i=1}^t 1+ r\ex{X_i} + r^2 \leq \prod_{i=1}^t
+                e^{r\ex{X_i} + r^2} = e^{tr^2 + r\ex{X}}
+\end{align*}
+where the second equality used the independence of the $X_i$s, the first
+inequality used half of the hint and the linearity of the expectation, the second
+inequality used that $X_i^2\leq 1$ and the third inequality used the second
+half of the hint.
+
+\paragraph{2.} We have $\prob{X\geq \ex{X} +\eps t} = \prob{e^{rX}\geq
+e^{r\ex{X}+r\eps t }}$ for all $r \geq 0$ since $\exp$ is increasing. Applying
+Markov's inequality first, followed by the result of question \textbf{1.}:
+\begin{displaymath}
+\prob{X\geq \ex{X} +\eps t} \leq \frac{\ex{e^{rX}}}{e^{r\ex{X} + r\eps t}}
+\leq e^{tr^2-r\eps t}
+\end{displaymath}
+Then picking $r = \frac{\eps}{4}$ gives the desired Chernoff Bound.
+
+For the other direction of the Chernoff Bound, we can simply apply the bound we
+just obtained to the variable $1-X_1,\ldots,1-X_n$. We obtain:
+\begin{displaymath}
+    \prob{n-X\geq n-\ex{X} +\eps t}\leq e^{-\eps^2t/4}
+\end{displaymath}
+The probability on the left-hand side is exactly $\prob{X\leq \ex{X}-\eps t}$.
+
+\paragraph{3.} I used independence of the $X_i$s in question 1.: if the $X_i$s
+are independent, so are the $e^{rX_i}$s. This allows me to write the
+expectation of their product as the product of their expectation.
+
+\section*{Problem 2.8}
+
+Let us consider a deterministic algorithm for PIT. Since it is deterministic, the
+queries it makes to the black-box are always the same. Let us write them
+$q_1,\dots, q_p$. When $f$ is the zero polynomial, $[f(q_1),\dots, f(q_p)]$ is
+the zero vector, and the algorithm returns \emph{$f$ is zero} in this case.
+
+Assuming that $p< 2^n$, I am going to construct a \emph{non-zero} multivariate
+polynomial $g$ of degree $n$ such that $[g(x_1),\dots, g(x_p)]$ is the zero
+vector. Since the algorithm is deterministic, it has to return \emph{$g$ is
+zero} because it receives the exact same information as when the polynomial is
+$f$. This contradicts the correctness of the algorithm.
+
+To construct $g$, consider the polynomial:
+\begin{displaymath}
+    g(x_1,\dots, x_n) = \sum_{(i_1,\dots, i_n)\in \{0,1\}^n} c_{i_1,\dots i_n} x_1^{i_1}\dots
+    x_n^{i_n}
+\end{displaymath}
+with $2^n$ monomials whose coefficients $c_{i_1,\dots i_n}$ are yet to be
+determined. Let us now consider the system of equations $g(q_1) = 0,\dots,
+g(q_p) = 0$. This is a linear system in the $2^n$ coefficients of $g$ (which
+are elements of the field $\mathbb{F}$). Since this system has $p<
+2^n$ equations, standard linear algebra says that the system is
+degenerate\footnote{A linear map from $\mathbb{F}^{m}$ to $\mathbb{F}^n$ always
+has a non-trivial kernel when $m> n$.}, that is has a non-zero solution. But
+a non-zero solution exactly defines coefficients of $g$. Since the solution is
+non zero, at least one coefficient in $g$ is non-zero, hence $g$ is a non-zero
+polynomial. However by construction $g(q_1) = \dots = g(q_p) = 0$, and we
+obtain the contradiction mentioned above.
+
+
+\end{document}