1 files changed, 89 insertions, 0 deletions
diff --git a/ps2/main.tex b/ps2/main.tex
index d628770..f292a5a 100644
--- a/ps2/main.tex
+++ b/ps2/main.tex
@@ -400,4 +400,93 @@ for binomial coefficients, the computation of the objective function can be
 made quite efficient: the value of $f_k(e)$ can be computed in $O(1)$
 arithmetic operations from $f_{k-1}(e)$.
 
+\section*{Problem 3.4}
+
+\paragraph{1.}
+Let $g$ be drawn from a pairwise independent family of functions $\mathbb{F}\to
+\mathbb{F}$, and $f_a:\mathbb{F}^{d+1}\to\mathbb{F}$ as in the problem
+statement. Then for $x_1\neq x_2\in \mathbb{F}^{d+1}$ and $y_1, y_2\in
+\mathbb{F}$, writing $h=g\circ f_a$:
+\begin{align*}
+    \P[h(x_1) = y_1\wedge h(x_2) = y_2] = &\P[f_a(x_1)
+    = f_a(x_2)]\P[g(f_a(x_1))=y_1]\mathbf{1}_{\{y_1=y_2\}}\\
+    &+ \P[f_a(x_1)\neq f_a(x_2)]\P[g(f_a(x_1)) = y_1\wedge g(f_a(x_2))
+= y_2\,|\, f_a(x_1)\neq f_a(x_2)]
+\end{align*}
+where we decomposed based on whether or not $f_a(x_1) = f_a(x_2)$.
+
+If $f_a(x_1) = f_a(x_2)$, then $y_1$ have to be equal to $y_2$ and the
+probability is only the probability that $g(f_a(x_1)) = y_1$, which is
+$\frac{1}{|\mathbb{F}|}$ since $g$ is drawn from a pairwise independent family.
+If $f_a(x_2) \neq f_a(x_2)$, then the probability is $\frac{1}{|\mathbb{F}|^2}$
+because $g$ is drawn from a pairwise independent family:
+\begin{align*}
+    \P[h(x_1) = y_1\wedge h(x_2) = y_2] \leq &\P[f_a(x_1)
+= f_a(x_2)]\frac{1}{|\mathbb{F}|}
++ \frac{1}{|\mathbb{F}|^2}
+\end{align*}
+where we upper-bounded $\P[f_a(x_1) \neq f_a(x_2)]$ by $1$ and
+$\mathbf{1}_{\{y_1=y_2\}}$ by $1$.
+
+For fixed $x_1$ and $x_2$ with $x_1\neq x_2$, $f_a(x_1)-f_a(x_2)$ is a non-zero
+univariate polynomial of the variable $a$ of degree at most $d$. Hence, by the
+Schwartz-Zippel lemma:
+\begin{displaymath}
+    \P[f_a(x_1) = f_a(x_2)] \leq \frac{d}{|\mathbb{F}|}
+\end{displaymath}
+Overall we have:
+\begin{displaymath}
+    \P[h(x_1) = y_1\wedge h(x_2) = y_2] \leq \frac{1+d}{|\mathbb{F}|^2}
+\end{displaymath}
+
+Let us now tune the parameters $d$ and $|\mathbb{F}|$ to satisfy the
+requirements. We will choose $|\mathbb{F}| = \gamma M$ for some $\gamma \geq 1$
+(we can simply drop the extra bits to get $\log M = m$ random bits). So we will
+have:
+\begin{displaymath}
+    \P[h(x_1) = y_1\wedge h(x_2) = y_2] \leq \frac{1+\eps}{M^2}
+\end{displaymath}
+as soon as $\frac{1+d}{\gamma^2}\leq 1+\eps$. Choosing $\gamma = \sqrt{d}$,
+this will be satisfied when $d\geq \frac{1}{\eps}$. Then we have
+$|\mathbb{F}|^d = (\sqrt{d}M)^d$ which will be larger than $N$ as soon as
+$d\geq n$.
+
+To summarize, we choose $|\mathbb{F}| = \sqrt{d} M$ and $d = \frac{1}{\eps}
++ n$. The number of required random bits for $g$ and $a$ is $O(\log |F|)
+= O(m+\log n + \log \frac{1}{\eps})$.
+
+\paragraph{2.} We draw $g:[N]\to\{0,1\}$ from an almost pairwise independent
+family.
+
+Consider the classic randomized algorithm for MaxCut using
+$g$: the cut is $(S,\bar{S})$ where $S = \{v\in V\,|\, g(v) = 1\}$. We have:
+\begin{align*}
+    \E[|cut(S)|] &= \sum_{(u,v)\in E} \P[(u,v)\in cut(S)]=
+    \sum_{(u,v)\in E} 1- \P[(u,v)\notin cut(S)]\\
+    &= \sum_{(u,v)\in E} 1- \P[g(u) = g(v) = 1] - \P[g(u) = g(v) - 0]
+    \geq |E|\left(1 - \frac{1+\eps}{4} - \frac{1+\eps}{4}\right)
+    = |E|\left(\frac{1}{2}-\frac{\eps}{2}\right)
+\end{align*}
+where the inequality used the almost pairwise independence of $g$.
+
+Choosing $\eps = \frac{1}{\log N}$, we have that $\E[|cut(S)|]\geq
+|E|(1/2-o(1))$.  Hence there exists a sequence of coin tosses (for $g$) such
+that for this sequence $|cut(S)|\geq 1/2-o(1)$.
+
+One such sequence can be found by iterating over all possible sequences and
+taking the one for which $|cut(S)|$ is maximum. It follows from \textbf{1.}
+that $g$ requires $O(2 + \log\log N +\log\frac{1}{\eps}) = O(\log\log N)$
+random bits. Hence we need $O(\log N)$ iterations to iterate over all possible
+sequences of random bits. For each iteration, we need $O(M\poly\log N)$ time to
+compute the size of the cut (computing $g(x)$ for a given vertex $x$ can be
+done in $\poly\log N$).  Hence the overall running time is $O(M\poly\log N)$
+
+The space requirement is:
+\begin{itemize}
+    \item $O(\log\log N)$ to store the current sequence of random bits as well
+        as the sequence of random bits for the candidate maximum cut
+\item $O(\log M)$ to compute the size of a cut at a given iteration as well as
+    storing the candidate maximum cut size.
+\end{itemize}
+Overall, the space requirement is $O(\log  M)$
 \end{document}