\documentclass[10pt]{article}
\usepackage{fullpage}
\usepackage[utf8x]{inputenc}
\usepackage{amsmath,amsfonts,amsthm}
\usepackage[english]{babel}
\usepackage[capitalize, noabbrev]{cleveref}
\usepackage{algorithm}
\usepackage{algpseudocode}

\newenvironment{enumerate*}%
  {\vspace{-2ex} \begin{enumerate} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{enumerate}}
 
\newenvironment{itemize*}%
  {\vspace{-2ex} \begin{itemize} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{itemize}}
 
\newenvironment{description*}%
  {\vspace{-2ex} \begin{description} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{description}}


\DeclareMathOperator{\E}{\mathbb{E}}
\DeclareMathOperator{\Enc}{\mathrm{Enc}}
\DeclareMathOperator{\Dec}{\mathrm{Dec}}
\let\P\relax
\DeclareMathOperator{\P}{\mathbb{P}}
\DeclareMathOperator*{\argmin}{\arg\min}
\newcommand{\ex}[1]{\E\left[#1\right]}
\newcommand{\prob}[1]{\P\left[#1\right]}
\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
\newcommand{\R}{\mathbf{R}}
\newcommand{\poly}{\text{poly}}
\newcommand{\N}{\mathbf{N}}
\newcommand{\C}{\mathcal{C}}
\newcommand{\eps}{\varepsilon}
\newcommand{\cl}[1]{\text{\textbf{#1}}}
\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}}
\newcommand{\llbracket}{[\![}


\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\algrenewcommand\algorithmicensure{\textbf{Output:}}
\algrenewcommand\algorithmicrequire{\textbf{Input:}}

\author{Thibaut Horel}
\title{CS 225 Problem Set 4 -- Solutions}

\begin{document}
\maketitle

\section*{Problem 5.1} I use a simple counting argument. If $\mathcal{C}$ is
$(\delta, L)$ list-decodable, then for all $r\in \Sigma^{\hat{n}}$ we have
$|B(r,\delta)\cap \mathcal{C}|\leq L$. Hence:
\begin{displaymath}
    \sum_{r\in \Sigma^{\hat{n}}} |B(r,\delta)\cap\mathcal{C}|\leq Lq^{\hat{n}}
\end{displaymath}
Furthermore, each code-word $c$ of $\mathcal{C}$ is counted several times in
the sum: once for each $r\in B(c,\delta)$. But $B(c,\delta)$ is of size
$q^{\hat{n}H_q(\delta, \hat{n})}$, so each $c\in\mathcal{C}$ appears
$q^{\hat{n}H_q(\delta, \hat{n})}$ times in the sum:
\begin{displaymath}
    |\mathcal{C}|q^{\hat{n}H_q(\delta, \hat{n})}=
    \sum_{r\in \Sigma^{\hat{n}}} |B(r,\delta)\cap\mathcal{C}|
\end{displaymath}
Putting the previous two equations together:
$ |\mathcal{C}|q^{\hat{n}H_q(\delta, \hat{n})} \leq Lq^{\hat{n}}$. Taking the
$\log$ both sides and reordering the terms (and using $\rho = \frac{\log
|\mathcal{C}|}{\hat{n}\log q}$) concludes the proof.

\section*{Problem 5.2}

\paragraph{1.} For two different messages $m_1$ and $m_2$, $\Enc_1(m_1)$ and
$\Enc_1(m_2)$ differ on at least $\delta_1 n_1$ letters by definition of the
minimum distance of $\Enc_1$. For each one of those $\delta_1 n_1$ positions
where the letters differ, the images by $\Enc_2$ of the letters at this
position differ on at least $\delta_2 n_2$ letters by definition of the
minimum distance of $\Enc_2$. By summing over all such positions, we obtain
$\delta_1n_1\delta_2n_2$ positions where $\Enc(m_1)$ and $\Enc(m_2)$ differ.
This proves that the minimum distance of $\Enc$ is at least $\delta_1\delta_2$.

\paragraph{2.} \emph{(Thanks Thomas for helping me late at night two days after
the homework was due!)} Let us consider a received word $r$, and a message $m$
such that $d(\Enc(m), r)\leq (1-\eps_1l_2)\delta_2$. We can view $r$ as the
concatenation of $n_1$ blocks, each of size $n_2$. Denoting by $r_i$ the $i$th
block, I first note that the fraction of indicdes $i$ such that $d(r_i,
\Enc_2(\Enc_1(m)_i))> \delta_2$ is at most $(1-\eps_1l_2)$. Otherwise, by summing over $i$
we would have $d(Enc(m), r)> (1-\eps_1l_2)\delta_2$, a contradiction.

Hence, by $l_2$ list-decoding each of the blocks $r_i$, we obtain $n_1$ lists
of $l_2$ characters from $\Sigma_1$, such the indices $i$ such that the $i$th
list does not contain $\Enc_1(m)_i$ are a fraction at most $(1-\eps_1l_2)$.

I now construct $l_2$ words $c_1,\dots, c_{l_2}$ such that $c_j$ is obtained by
taking the $j$th character in each of the $n_1$ lists of length $l_2$ obtained
by $l_2$ list-decoding in the previous paragraph.

I claim that one of those words $c_j$ is such that $d(c_j, Enc_1(m))\leq
1-\eps_1$. Otherwise, by summing over $j$, and denoting by $f$ the fraction of
indices $i$ such that the $i$th list does not contain $\Enc_1(m)_i$:
\begin{align*}
    (1-\eps_1)l_2&< \sum_{j=1}^{l_2} d(c_j, Enc_1(m))
    = \frac{1}{n_1}\sum_{i=1}^{n_1}\sum_{j=1}^{l_2} \left[(c_j)_i\neq
    Enc_1(m)_i\right]\\
    &= fl_2 + (1-f)(l_2-1) = l_2 - 1 + f\leq l_2-1+(1-\eps_1l_2)
    = (1-\eps_1)l_2
\end{align*}
where the second equality was obtained by splitting the sum over $i$ depending
on whether or not $\Enc_1(m)_i$ belongs to the $i$th list, and the last
inequality was obtained by using the bound on $f$ found in the first paragraph
of this answer. Looking at the two extremal terms in the above computation, we
see a contradiction.

Hence, one of the $c_j$ is such that $d(c_j, Enc_1(m))\leq
1-\eps_1$. By $l_1$-decoding all the $c_j$s, we obtain $l_2l_1$ words in
$\{1,\dots, N\}$ such that one of them is $m$ (since $\Enc_1$ is $l_2$
list-decodable at distance $1-\eps_1$). This proves that $\Enc$ is $l_1l_2$
list-decodable at distance $(1-\eps_1\l_2)\delta_2$.

\paragraph{3.} We consider:
\begin{itemize}
    \item $\Enc_1$: the Reed-Solomon code consisting of all polynomials of
        degree $d = \frac{n}{m}-1$ over $\mathbb{F}_{2^m}$.
    \item $\Enc_2$: the Hadamard code of all linear functions from
        $\mathbb{F}_2^m$ to $\mathbb{F}_2$.
\end{itemize}
Since $\mathbb{F}_2^m$ can be identified with $\mathbb{F}_{2^m}$ these two
codes are compatible for concatenation. Both codes have block length $2^m$ so
the resulting block length after concatenation is $2^{2m}$. The bit length of
the code is $(d+1)m = n$ as requested.

If we choose $m$ large enough such that $\frac{n}{m2^m}\leq \eps$, then the
minimum distance of the RS code is $1-\frac{n/m-1}{2^m}\geq
1-\frac{n/m}{2^m}\geq 1-\eps$. Hence, by \textbf{1.}, the minimum distance of
the concatenated code is at least $\frac{1}{2}(1-\eps)$ as requested.
Finally, if $\frac{n}{m2^m}\leq \eps$, the block length of the concatenated
code, $2^{2m}$, is $O(\frac{n^2}{\eps^2})$ as requested.

Since $m = O(\log \frac{n}{\eps})$ and $m(d+1) = n$, it is clear that the code
is fully explicit.

\paragraph{} To get list-decoding, we need a different setting of $m$. We know
that the Hadamard code can be $\frac{1}{2}-\frac{\eps}{2}$ list-decoded for any
$\eps$ with list length $l_2 = \Theta(\frac{1}{\eps^2})$. If we set $m$ such
that:
\begin{displaymath}
2l_2\sqrt{\frac{n}{m2^m}} = \eps
\end{displaymath}
i.e $\frac{n}{m2^m} = \Theta(\eps^6)$,  then applying \textbf{2.} the
concatenated code will have distance
$(1-\eps)(\frac{1}{2}-\frac{\eps}{2}) \geq (\frac{1}{2}
-\eps)$.  The resulting list length will be $\sqrt{\frac{m2^m}{d}}l_2
=\Theta(\frac{1}{\eps^5})$.

The inner Hadamard code as $2^m = \Theta(\frac{n}{\eps^6})$ code-words, so the
list-decoding can be done in polynomial time by brute-force testing all
code-words. Finally, since $m = \Theta(\log \frac{n}{\eps^6})$, The outer
RS code can be list-decoded in polynomial time using the algorithm seen in
class. Note that by \textbf{2.} we only need to apply this algorithm $l_2
= \Theta(\frac{1}{\eps^2})$ times, so the overall resulting running time is
still polynomial.

\section*{Problem 5.6}

\paragraph{1.} I follow the hint by allowing the bivariate polynomial $Q$ in
the proof of Theorem 5.19 to have as many monomials as possible. More
precisely, we allow all monomials of degree $(d_1, d_2)$ such that:
\begin{displaymath}
    d_1 + d\cdot d_2\leq \lfloor\eps q\rfloor \eqdef D
\end{displaymath}
and we will be fine as long as the number of monomials is strictly larger than
$q$ (to make sure we can find such a $Q$). Hence, all we have to do is count
the number of pairs $(d_1,d_2)$ such that $d_1 + d\cdot d_2\leq \eps q$ and
make sure that it is strictly larger than $q$. We see
that $d_2$ can be at most $A\eqdef\lfloor D/d\rfloor$ (in which case
$d_1$ has to be zero). If $d_2 = A - k$ for $k\in\{0,\dots, A\}$, then $d_1$
can be as large as $D -kd$. Hence, the total number of monomials is:
\begin{displaymath}
    N = \sum_{k=0}^A (D-kd + 1) = (D+1)(A+1) - \frac{d}{2} A(A+1)
    = (A+1)\left(D+1-\frac{d}{2}A\right)
\end{displaymath}
For $\eps=\sqrt{2d/q}$, we have $A=\lfloor D/d\rfloor = \lfloor
\sqrt{2q/d}\rfloor$ and $D=\lfloor
\sqrt{2qd}\rfloor$. This leads to:
\begin{displaymath}
    N > \sqrt{2q/d}\left(\sqrt{2qd} - \frac{d}{2}\sqrt{2q/d}\right) = q
\end{displaymath}
as desired.

\section*{Problem 5.7}

\paragraph{1.} Let us denote by $r\in\{0,1\}^{\hat{n}}$ the answers given by
the mean oracle trying to hide the secret from us. First observe that, denoting
by $[A]$ the Iverson bracket of event $A$ and $c_i = \Enc(s_i), i\in\{1,2\}$:
\begin{displaymath}
    \hat{n}d(c_1, c_2) = \sum_{l=1}^{\hat{n}} [(c_1)_i\neq (c_2)_i]
    = \sum_{i=1}^{\hat{n}}[(c_1)_i\neq r_i] + [(c_2)_i\neq r_i]
    = \hat{n} d(c_1, r) + \hat{n}d(c_2, r)
\end{displaymath}
where the second equality follows from the fact that $r_i$ is either equal to
$(c_1)_i$ or $(c_2)_i$  when $(c_1)_i\neq (c_2)_i$. The other equalities simply
use the definition of the relative distance.

I claim that one of the two secrets $s\in\{s_1, s_2\}$ is such that the
relative distance $d(\Enc(s), r)\leq \frac{1}{4} + \eps$. Otherwise, using the
identity proven above:
\begin{displaymath}
    d(c_1, c_2) = d(c_1, r) + d(c_2, r)> \frac{1}{2} + 2\eps
\end{displaymath}
This implies that $\Enc(s_1)$ and $\Enc(s_2)$ agree on less than a $\frac{1}{2}
- 2\eps$ fraction of positions and contradicts the assumption on $\Enc$.

Since one of the two secrets $s\in\{s_1,s_2\}$ is such that
$d(\Enc(s),r)\leq\frac{1}{4}+\eps$, by list-decoding the received answer $r$ at
distance $\frac{1}{4}+\eps$, we are guaranteed that the list $L$ of length $l$
returned by the decoder contains one of the two secrets. By assumption, the
decoding takes polynomial time.

\paragraph{2.} First, we verify that the code-words in the concatenated code of
Problem 5.2.3 have sufficient agreement. Denoting by $a(\cdot,\cdot)$ the
agreement between two words, let us consider two different messages $m_1$ and
$m_2$ of the code of Problem 5.2.3:
\begin{displaymath}
    a\big(\Enc(m_1), \Enc(m_2)\big) = 1- d\big(\Enc(m_1), \Enc(m_2)\big)
    = 1 - d\big(\Enc_1(m_1), \Enc_2(m_2)\big)\frac{1}{2}
\end{displaymath}
where the last equality used the following reasoning (this is basically the
same thing as \textbf{1.} plus using that Hadamard codes have constant
distance):
\begin{itemize}
    \item on the positions where $\Enc_1(m_1)$ and $\Enc_1(m_2)$ coincide, then
        the resulting blocks after applying $\Enc_2$ will be identical and do
        not contribute anything to the $d\big(\Enc(m_1), \Enc(m_2)\big)$.
    \item on the positions where $\Enc_1(m_1)$ and $\Enc_1(m_2)$ differ, the
        resulting blocks after applying $\Enc_2$ will differ on exactly half
        the positions, because for the Hadamard code, the distance between any
        two different codewords is $\frac{1}{2}$.
\end{itemize}
Since the distance is at most 1, we get that the agreement is at least
$\frac{1}{2}$ which is a sufficient condition given \textbf{1.}.

Hence, we can directly apply the last part of Problem 5.2.3 with $\eps
= \frac{1}{4}-\eps'$ for some $\eps'< \frac{1}{4}$ and obtain
a $(\frac{1}{4}+\eps', \mathrm{poly}(\frac{1}{1/4-\eps'}))$ list decodable
code. The number of questions (blocklength) will be $\mathrm{poly}(n)$ for
$\eps'$ a small enough constant. 
\end{document}