\documentclass[10pt]{article} \usepackage{fullpage} \usepackage[utf8x]{inputenc} \usepackage{amsmath,amsfonts,amsthm} \usepackage[english]{babel} \usepackage[capitalize, noabbrev]{cleveref} \usepackage{algorithm} \usepackage{algpseudocode} \newenvironment{enumerate*}% {\vspace{-2ex} \begin{enumerate} % \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}% {\end{enumerate}} \newenvironment{itemize*}% {\vspace{-2ex} \begin{itemize} % \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}% {\end{itemize}} \newenvironment{description*}% {\vspace{-2ex} \begin{description} % \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}% {\end{description}} \DeclareMathOperator{\E}{\mathbb{E}} \DeclareMathOperator{\Enc}{\mathrm{Enc}} \DeclareMathOperator{\Dec}{\mathrm{Dec}} \let\P\relax \DeclareMathOperator{\P}{\mathbb{P}} \DeclareMathOperator*{\argmin}{\arg\min} \newcommand{\ex}[1]{\E\left[#1\right]} \newcommand{\prob}[1]{\P\left[#1\right]} \newcommand{\inprod}[1]{\left\langle #1 \right\rangle} \newcommand{\R}{\mathbf{R}} \newcommand{\poly}{\text{poly}} \newcommand{\N}{\mathbf{N}} \newcommand{\C}{\mathcal{C}} \newcommand{\eps}{\varepsilon} \newcommand{\cl}[1]{\text{\textbf{#1}}} \newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}} \newcommand{\llbracket}{[\![} \newtheorem{theorem}{Theorem} \newtheorem{lemma}{Lemma} \algrenewcommand\algorithmicensure{\textbf{Output:}} \algrenewcommand\algorithmicrequire{\textbf{Input:}} \author{Thibaut Horel} \title{CS 225 Problem Set 4 -- Solutions} \begin{document} \maketitle \section*{Problem 5.1} I use a simple counting argument. If $\mathcal{C}$ is $(\delta, L)$ list-decodable, then for all $r\in \Sigma^{\hat{n}}$ we have $|B(r,\delta)\cap \mathcal{C}|\leq L$. Hence: \begin{displaymath} \sum_{r\in \Sigma^{\hat{n}}} |B(r,\delta)\cap\mathcal{C}|\leq Lq^{\hat{n}} \end{displaymath} Furthermore, each code-word $c$ of $\mathcal{C}$ is counted several times in the sum: once for each $r\in B(c,\delta)$. But $B(c,\delta)$ is of size $q^{\hat{n}H_q(\delta, \hat{n})}$, so each $c\in\mathcal{C}$ appears $q^{\hat{n}H_q(\delta, \hat{n})}$ times in the sum: \begin{displaymath} |\mathcal{C}|q^{\hat{n}H_q(\delta, \hat{n})}= \sum_{r\in \Sigma^{\hat{n}}} |B(r,\delta)\cap\mathcal{C}| \end{displaymath} Putting the previous two equations together: $ |\mathcal{C}|q^{\hat{n}H_q(\delta, \hat{n})} \leq Lq^{\hat{n}}$. Taking the $\log$ both sides and reordering the terms (and using $\rho = \frac{\log |\mathcal{C}|}{\hat{n}\log q}$) concludes the proof. \section*{Problem 5.2} \paragraph{1.} For two different messages $m_1$ and $m_2$, $\Enc_1(m_1)$ and $\Enc_1(m_2)$ differ on at least $\delta_1 n_1$ letters by definition of the minimum distance of $\Enc_1$. For each one of those $\delta_1 n_1$ positions where the letters differ, the images by $\Enc_2$ of the letters at this position differ on at least $\delta_2 n_2$ letters by definition of the minimum distance of $\Enc_2$. By summing over all such positions, we obtain $\delta_1n_1\delta_2n_2$ positions where $\Enc(m_1)$ and $\Enc(m_2)$ differ. This proves that the minimum distance of $\Enc$ is at least $\delta_1\delta_2$. \paragraph{2.} \emph{(Thanks Thomas for helping me late at night two days after the homework was due!)} Let us consider a received word $r$, and a message $m$ such that $d(\Enc(m), r)\leq (1-\eps_1l_2)\delta_2$. We can view $r$ as the concatenation of $n_1$ blocks, each of size $n_2$. Denoting by $r_i$ the $i$th block, I first note that the fraction of indicdes $i$ such that $d(r_i, \Enc_2(\Enc_1(m)_i))> \delta_2$ is at most $(1-\eps_1l_2)$. Otherwise, by summing over $i$ we would have $d(Enc(m), r)> (1-\eps_1l_2)\delta_2$, a contradiction. Hence, by $l_2$ list-decoding each of the blocks $r_i$, we obtain $n_1$ lists of $l_2$ characters from $\Sigma_1$, such the indices $i$ such that the $i$th list does not contain $\Enc_1(m)_i$ are a fraction at most $(1-\eps_1l_2)$. I now construct $l_2$ words $c_1,\dots, c_{l_2}$ such that $c_j$ is obtained by taking the $j$th character in each of the $n_1$ lists of length $l_2$ obtained by $l_2$ list-decoding in the previous paragraph. I claim that one of those words $c_j$ is such that $d(c_j, Enc_1(m))\leq 1-\eps_1$. Otherwise, by summing over $j$, and denoting by $f$ the fraction of indices $i$ such that the $i$th list does not contain $\Enc_1(m)_i$: \begin{align*} (1-\eps_1)l_2&< \sum_{j=1}^{l_2} d(c_j, Enc_1(m)) = \frac{1}{n_1}\sum_{i=1}^{n_1}\sum_{j=1}^{l_2} \left[(c_j)_i\neq Enc_1(m)_i\right]\\ &= fl_2 + (1-f)(l_2-1) = l_2 - 1 + f\leq l_2-1+(1-\eps_1l_2) = (1-\eps_1)l_2 \end{align*} where the second equality was obtained by splitting the sum over $i$ depending on whether or not $\Enc_1(m)_i$ belongs to the $i$th list, and the last inequality was obtained by using the bound on $f$ found in the first paragraph of this answer. Looking at the two extremal terms in the above computation, we see a contradiction. Hence, one of the $c_j$ is such that $d(c_j, Enc_1(m))\leq 1-\eps_1$. By $l_1$-decoding all the $c_j$s, we obtain $l_2l_1$ words in $\{1,\dots, N\}$ such that one of them is $m$ (since $\Enc_1$ is $l_2$ list-decodable at distance $1-\eps_1$). This proves that $\Enc$ is $l_1l_2$ list-decodable at distance $(1-\eps_1\l_2)\delta_2$. \paragraph{3.} We consider: \begin{itemize} \item $\Enc_1$: the Reed-Solomon code consisting of all polynomials of degree $d = \frac{n}{m}-1$ over $\mathbb{F}_{2^m}$. \item $\Enc_2$: the Hadamard code of all linear functions from $\mathbb{F}_2^m$ to $\mathbb{F}_2$. \end{itemize} Since $\mathbb{F}_2^m$ can be identified with $\mathbb{F}_{2^m}$ these two codes are compatible for concatenation. Both codes have block length $2^m$ so the resulting block length after concatenation is $2^{2m}$. The bit length of the code is $(d+1)m = n$ as requested. If we choose $m$ large enough such that $\frac{n}{m2^m}\leq \eps$, then the minimum distance of the RS code is $1-\frac{n/m-1}{2^m}\geq 1-\frac{n/m}{2^m}\geq 1-\eps$. Hence, by \textbf{1.}, the minimum distance of the concatenated code is at least $\frac{1}{2}(1-\eps)$ as requested. Finally, if $\frac{n}{m2^m}\leq \eps$, the block length of the concatenated code, $2^{2m}$, is $O(\frac{n^2}{\eps^2})$ as requested. Since $m = O(\log \frac{n}{\eps})$ and $m(d+1) = n$, it is clear that the code is fully explicit. \paragraph{} To get list-decoding, we need a different setting of $m$. We know that the Hadamard code can be $\frac{1}{2}-\frac{\eps}{2}$ list-decoded for any $\eps$ with list length $l_2 = \Theta(\frac{1}{\eps^2})$. If we set $m$ such that: \begin{displaymath} 2l_2\sqrt{\frac{n}{m2^m}} = \eps \end{displaymath} i.e $\frac{n}{m2^m} = \Theta(\eps^6)$, then applying \textbf{2.} the concatenated code will have distance $(1-\eps)(\frac{1}{2}-\frac{\eps}{2}) \geq (\frac{1}{2} -\eps)$. The resulting list length will be $\sqrt{\frac{m2^m}{d}}l_2 =\Theta(\frac{1}{\eps^5})$. The inner Hadamard code as $2^m = \Theta(\frac{n}{\eps^6})$ code-words, so the list-decoding can be done in polynomial time by brute-force testing all code-words. Finally, since $m = \Theta(\log \frac{n}{\eps^6})$, The outer RS code can be list-decoded in polynomial time using the algorithm seen in class. Note that by \textbf{2.} we only need to apply this algorithm $l_2 = \Theta(\frac{1}{\eps^2})$ times, so the overall resulting running time is still polynomial. \section*{Problem 5.6} \paragraph{1.} I follow the hint by allowing the bivariate polynomial $Q$ in the proof of Theorem 5.19 to have as many monomials as possible. More precisely, we allow all monomials of degree $(d_1, d_2)$ such that: \begin{displaymath} d_1 + d\cdot d_2\leq \lfloor\eps q\rfloor \eqdef D \end{displaymath} and we will be fine as long as the number of monomials is strictly larger than $q$ (to make sure we can find such a $Q$). Hence, all we have to do is count the number of pairs $(d_1,d_2)$ such that $d_1 + d\cdot d_2\leq \eps q$ and make sure that it is strictly larger than $q$. We see that $d_2$ can be at most $A\eqdef\lfloor D/d\rfloor$ (in which case $d_1$ has to be zero). If $d_2 = A - k$ for $k\in\{0,\dots, A\}$, then $d_1$ can be as large as $D -kd$. Hence, the total number of monomials is: \begin{displaymath} N = \sum_{k=0}^A (D-kd + 1) = (D+1)(A+1) - \frac{d}{2} A(A+1) = (A+1)\left(D+1-\frac{d}{2}A\right) \end{displaymath} For $\eps=\sqrt{2d/q}$, we have $A=\lfloor D/d\rfloor = \lfloor \sqrt{2q/d}\rfloor$ and $D=\lfloor \sqrt{2qd}\rfloor$. This leads to: \begin{displaymath} N > \sqrt{2q/d}\left(\sqrt{2qd} - \frac{d}{2}\sqrt{2q/d}\right) = q \end{displaymath} as desired. \section*{Problem 5.7} \paragraph{1.} Let us denote by $r\in\{0,1\}^{\hat{n}}$ the answers given by the mean oracle trying to hide the secret from us. First observe that, denoting by $[A]$ the Iverson bracket of event $A$ and $c_i = \Enc(s_i), i\in\{1,2\}$: \begin{displaymath} \hat{n}d(c_1, c_2) = \sum_{l=1}^{\hat{n}} [(c_1)_i\neq (c_2)_i] = \sum_{i=1}^{\hat{n}}[(c_1)_i\neq r_i] + [(c_2)_i\neq r_i] = \hat{n} d(c_1, r) + \hat{n}d(c_2, r) \end{displaymath} where the second equality follows from the fact that $r_i$ is either equal to $(c_1)_i$ or $(c_2)_i$ when $(c_1)_i\neq (c_2)_i$. The other equalities simply use the definition of the relative distance. I claim that one of the two secrets $s\in\{s_1, s_2\}$ is such that the relative distance $d(\Enc(s), r)\leq \frac{1}{4} + \eps$. Otherwise, using the identity proven above: \begin{displaymath} d(c_1, c_2) = d(c_1, r) + d(c_2, r)> \frac{1}{2} + 2\eps \end{displaymath} This implies that $\Enc(s_1)$ and $\Enc(s_2)$ agree on less than a $\frac{1}{2} - 2\eps$ fraction of positions and contradicts the assumption on $\Enc$. Since one of the two secrets $s\in\{s_1,s_2\}$ is such that $d(\Enc(s),r)\leq\frac{1}{4}+\eps$, by list-decoding the received answer $r$ at distance $\frac{1}{4}+\eps$, we are guaranteed that the list $L$ of length $l$ returned by the decoder contains one of the two secrets. By assumption, the decoding takes polynomial time. \paragraph{2.} First, we verify that the code-words in the concatenated code of Problem 5.2.3 have sufficient agreement. Denoting by $a(\cdot,\cdot)$ the agreement between two words, let us consider two different messages $m_1$ and $m_2$ of the code of Problem 5.2.3: \begin{displaymath} a\big(\Enc(m_1), \Enc(m_2)\big) = 1- d\big(\Enc(m_1), \Enc(m_2)\big) = 1 - d\big(\Enc_1(m_1), \Enc_2(m_2)\big)\frac{1}{2} \end{displaymath} where the last equality used the following reasoning (this is basically the same thing as \textbf{1.} plus using that Hadamard codes have constant distance): \begin{itemize} \item on the positions where $\Enc_1(m_1)$ and $\Enc_1(m_2)$ coincide, then the resulting blocks after applying $\Enc_2$ will be identical and do not contribute anything to the $d\big(\Enc(m_1), \Enc(m_2)\big)$. \item on the positions where $\Enc_1(m_1)$ and $\Enc_1(m_2)$ differ, the resulting blocks after applying $\Enc_2$ will differ on exactly half the positions, because for the Hadamard code, the distance between any two different codewords is $\frac{1}{2}$. \end{itemize} Since the distance is at most 1, we get that the agreement is at least $\frac{1}{2}$ which is a sufficient condition given \textbf{1.}. Hence, we can directly apply the last part of Problem 5.2.3 with $\eps = \frac{1}{4}-\eps'$ for some $\eps'< \frac{1}{4}$ and obtain a $(\frac{1}{4}+\eps', \mathrm{poly}(\frac{1}{1/4-\eps'}))$ list decodable code. The number of questions (blocklength) will be $\mathrm{poly}(n)$ for $\eps'$ a small enough constant. \end{document}