\documentclass[10pt]{article}
\usepackage{fullpage}
\usepackage[utf8x]{inputenc}
\usepackage{amsmath,amsfonts,amsthm}
\usepackage[english]{babel}
\usepackage[capitalize, noabbrev]{cleveref}
\usepackage{algorithm}
\usepackage{algpseudocode}

\newenvironment{enumerate*}%
  {\vspace{-2ex} \begin{enumerate} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{enumerate}}
 
\newenvironment{itemize*}%
  {\vspace{-2ex} \begin{itemize} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{itemize}}
 
\newenvironment{description*}%
  {\vspace{-2ex} \begin{description} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{description}}


\DeclareMathOperator{\E}{\mathbb{E}}
\DeclareMathOperator{\Enc}{\mathrm{Enc}}
\DeclareMathOperator{\Dec}{\mathrm{Dec}}
\DeclareMathOperator{\Ext}{\mathrm{Ext}}
\let\P\relax
\DeclareMathOperator{\P}{\mathbb{P}}
\DeclareMathOperator*{\argmin}{\arg\min}
\newcommand{\ex}[1]{\E\left[#1\right]}
\newcommand{\prob}[1]{\P\left[#1\right]}
\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
\newcommand{\R}{\mathbf{R}}
\newcommand{\poly}{\text{poly}}
\newcommand{\N}{\mathbf{N}}
\newcommand{\C}{\mathcal{C}}
\newcommand{\eps}{\varepsilon}
\newcommand{\cl}[1]{\text{\textbf{#1}}}
\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}}
\newcommand{\llbracket}{[\![}


\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\algrenewcommand\algorithmicensure{\textbf{Output:}}
\algrenewcommand\algorithmicrequire{\textbf{Input:}}

\author{Thibaut Horel}
\title{CS 225 Problem Set 6 -- Solutions}

\begin{document}
\maketitle

\section*{Problem 7.1}

Let us consider the function $f:\{0,1\}^l\to\{0,1\}$ defined by:
\begin{displaymath}
    f(x) = \begin{cases}
        1&\text{if }\exists\, y\in\{0,1\}^{d^{-1}(l-1)-l}\text{ st } x\cdot y\in \text{Im}(G_{d^{-1}(l-1)})\\
        0&\text{otherwise}
    \end{cases}
\end{displaymath}
where $\text{Im}$ denotes the image of a function (here the generator). In
other words, $f(x)$ is 1 iff $x$ is the first $l$ bits of one of the outputs
of $G_{d^{-1}(l-1)}$. Note that the output is guaranteed to have at least $l-1
+1 = l$ by definition of a pseudorandom generator.

$f(x)$ can be computed by iterating over all possible $2^{l-1}$ inputs of
$G_{d^{-1}(l-1)}$ and checking whether or not the first $l$ bits of the output
are equal to $x$, hence $f\in \cl{E}$.

Let us assume that there exists a probabilistic non-uniform algorithm $A$ such
that $\prob{A(x) = f(x)}\geq \frac{2}{3}$ for all $x\in\{0,1\}^l$. Using $A$,
I define the following test of randomness $T$:
\begin{displaymath}
    \forall z\in \{0,1\}^{d^{-1}(l-1)},\; T(z) = A(z_l)
\end{displaymath}
where $z_l$ denotes the first $l$ bits of $z$. Then we have:
\begin{displaymath}
    \prob{T(G(U_{l-1})) = 1} = \prob{A(G(U_{l-1})_l) = 1}
    = \prob{A(G(U_{l-1})_l) = f(G(U_{l-1})_l)} = p_A
\end{displaymath}
where $U_{l-1}$ is a uniform variable over $\{0,1\}^{l-1}$ and $p_A$ is the
probability that $A$ is correct (\emph{i.e} $p_A \geq \frac{2}{3})$. Indeed, $f$
is constant equal to $1$ on the first $l$ bits of the image of $G$. We also
have:
\begin{displaymath}
    \prob{T(U_d) = 1} = \prob{U_d\in \text{Im}(G)}p_A + (1-p_A)\prob{U_d\notin
    \text{Im}(G)}
\end{displaymath}
where $U_d$ is uniform over $\{0,1\}^{d^{-1}(l-1)}$.  Hence:
\begin{displaymath}
    |\prob{T(G(U_{l-1})) = 1} - \prob{T(U_d) = 1}| = (2p_A - 1)(1-\prob{U_d\in
    \text{Im}(G)})
\end{displaymath}
but $\text{Im}(G)$ is of size at most $2^{l-1}$ (when $G$ is injective) hence:
\begin{displaymath}
    |\prob{T(G(U_{l-1})) = 1} - \prob{T(U_d) = 1}|
    \geq \frac{1}{3}\left(1- \frac{2^{l-1}}{2^{d^{-1}(l-1)}}\right)\geq
    \frac{1}{6}
\end{displaymath}
Hence $T$ has an advantage larger than $\frac{1}{6}$ and by definition of $G$
this implies that $T$, hence $A$ has running time at least $d^{-1}(l-1)$, which
concludes the proof.

\section*{Problem 7.4}

\paragraph{1.} Given constant depth circuit $C$ of size $n$, pick $m$ such that
$m\geq n$ and $m\geq \frac{1}{\eps}$, e.g, $m = n + \frac{1}{\eps}$. And
consider the pseudorandom generator $G_m$ given by Theorem 7.29. By definition:
\begin{displaymath}
    |\prob{C(G(U_1)) = 1} - \prob{C(U_2) = 1}|\leq \frac{1}{m}\leq \eps
\end{displaymath}
where $U_1$ is uniformly distributed over $\{0,1\}^{\log^{O(k)}m}$ and $U_2$ is
uniformly distributed over $\{0,1\}^m$. Note that the input size of $C$ is at
most $n\leq m$, so we might have to drop some bits from $U_2$ or $G((U_1)$ for
the above formula to make sense (this is not a problem since the truncation of
a uniform variable is still uniform).

Now, observe that $\prob{C(U_2) = 1}$ is exactly the quantity we wish to
estimate within an additive error of $\eps$. $\prob{C(G(U_1) = 1}$ can be computed
by averaging over all the possible $2^{\log^{O(k)}m} = 2^{\text{poly}(\log
n,\log\frac{1}{\eps})}$ values of $U_1$. Since $G_m$ can be computed in time
$\text{poly}(m) = \poly(n,\frac{1}{\eps})$, the running time is still $2^{\text{poly}(\log
n,\log\frac{1}{\eps})}$.

By the inequality above, the computed value of $\prob{C(G(U_1)) = 1}$ is within
an additive $\eps$ error of $\prob{C(U_2) = 1}$.

\paragraph{2.} Let us consider the circuit $C$ of depth $2$ which computes
$\phi$. I denote by $l$ the number of clauses in $C$ and by $v$ the number of
literals. As suggested, I assume that each clause contains exactly $r$
literals. Note that we have $rl = v\leq n$.

It is easy to see that $C$ can be “extended” into a circuit $C'$
which takes as input $i\in[l]$ and assignment $\alpha\in\{0,1\}^{v-r}$ (which
should be interpreted as an assignment of all the variables which do not appear
in clause $i$) and output $C'(i,\alpha)$ which is 0 or 1 depending on whether
or not $\alpha$ violates all the clauses before the $i$th clause (we do not
need to know the assignment of variables in clause $i$ to verify this). $C'$
still has constant depth (I was able to draw a circuit of depth 4 for this,
maybe 3 is possible?).

Now, using the notations of Algorithm 2.35, a uniform sample $(i,\alpha)$ in
$[n]\times \{0,1\}^{v-r}$ can be interpreted as a random sample in $B$ (since
we do not need to specify the assignment of the variables in clause $i$) and
the output of $C'$ tells whether or not the sample is in $A'$. By the analysis
of Algorithm 2.35, for uniform $U_2$ in $[n]\times \{0,1\}^{v-r}$ (since all
the clauses have the same number of literals, we have the right probability of
picking each clause):
\begin{displaymath}
    \prob{C'(U_2) = 1} = |A|/|B|
\end{displaymath}

Using the same averaging argument as in \textbf{1.} by averaging over all
inputs of an appropriate pseudorandom generator, it is possible to approximate
$\prob{C'(U_2) = 1} = \frac{|A|}{|B|}$ within an \emph{additive}
$\frac{\eps}{l}$ error in time $2^{\text{poly}(\log n,\log\frac{l}{\eps})}= 
2^{\text{poly}(\log n,\log\frac{1}{\eps})}$ since $l\leq n$.

Now our algorithm simply outputs this average times $B$. The same analysis as
the one Algorithm 2.35 shows that this quantity is within
a \emph{multiplicative} $1+\eps$ ratio of $|A|$ (since $|B|/l\leq |A|$). Since
$|A|$ is the number of satisfying assignments, this concludes the proof.

\section*{Problem 7.6}

\paragraph{1.} Assume that we are given received word $r$ which differs from
$\Enc(x)$ on a fraction at most $\frac{1}{3q}$ of the symbols, and apply the
decoder $\Dec$ of the $q$-query smooth code for some query $i$. By union bound,
the probability $p$ that the $q$ queries made by $\Dec$ are all at positions
where $r$ coincides with $x$ is:
\begin{displaymath}
    p\geq 1 - q\cdot\frac{1}{3q} = \frac{2}{3}
\end{displaymath}
Hence, with probability at least $\frac{2}{3}$, $\Dec$ will only query correct
symbols of $r$ and will behave exactly the same as if it had been given
$\Enc(x)$, it will thus return $x_i$ in this case. In others words, for query
$i$, $\Dec$ returns $x_i$ with probability at least $\frac{2}{3}$ given
received word $r$ at distance at most $\frac{1}{3q}$ from $\Enc(x)$. This is
the definition of a local $\frac{1}{3q}$-decoder.

\paragraph{2.} The scheme is very simple: the server algorithms are all equal
and given query $i\in[\hat{n}]$ and database $D$ return $\Enc(D)_i$. Given
query $i\in[n]$, the user algorithm run the decoder $\Dec$ of the $q$-query
smooth code on input $i$. The $q$ oracle queries made by $\Dec$ to $\Enc(D)$
are sent to the $q$ different servers. In other words, we simulate the oracle
access to $\Enc(D)$ required by $\Dec$ by having the servers answer the oracle
queries.  Since $\Dec$ makes $q$ queries, we need $q$ servers.

By definition of a smooth code, the queries made to the server are uniformly
distributed in $[\hat{n}]$ and hence have the same distribution regardless of
the index $i$ being queried, this satisfies the privacy requirement of our PIR
scheme.

Finally, the communication complexity is $\log \hat{n} + \log|\Sigma|$ for each
query (we ask an index $j\in[\hat{n}]$ and receive a symbol in $\Sigma$). The
overall number of bits exchanged is $q(\log\hat{n} + \log|\Sigma|)$.

\paragraph{3.} I follow an approach very similar to Algorithm 7.43 except that
it is simpler since there are no corruptions.

I interpret the database $D$ as a function $f:\{0,1\}^l \to \{0,1\}$,
\emph{i.e}, we have $n = 2^l$. A query $i$ to the database can be interpreted
as obtaining the value $f(x)$ for some input $x\in\{0,1\}^l$. Using this
interpretation the $\text{polylog}(n)$ requirements of the scheme are
equivalent to $\text{poly}(l)$ requirements.

I choose a finite field $F$ of size $2l$, and consider the multilinear
extension $\hat{f}$ of $f$ over $F$. In particular, $\hat{f}:F^{l}\to F$ is
a $l$-variate polynomial of total degree at most $l$.

I define the PIR scheme as follows:
\begin{itemize}
    \item $l+1$ servers with the same algorithm: given query $z\in F^{l}$,
        the server returns $\hat{f}(z)$.
    \item the user algorithm:
        \begin{itemize}
            \item fix $l+1$ elements in $F$ distinct from $0$ (these are the
                same regardless of the query). Denote these elements
                $y_0,\dots, y_l$.
            \item Given input $x\in\{0,1\}^l$, pick a uniformly random
                direction $y\in F^{l}$ and query $z_i = x+ y\cdot y_i$ for
                $0\leq i\leq l$ to the $l+1$ servers (one query per server).
            \item use the $l+1$ answers from the servers to find the unique
                univariate polynomial $p$ of degree $l$ equal to $\hat{f}|_L$
                where $L$ denotes the affine line passing through $x$ and
                direction $y$. Note that this is feasible since the answers
                given by the server are the values of $\hat{f}$ at $l+1$
                distinct points of $L$, and $\hat{f}|_L$ is a univariate
                polynomial (over $F$) of degree at most $l$.
            \item output $p(0) = \hat{f}|_L(0) = \hat{f}(x) = f(x)$
        \end{itemize}
\end{itemize}

The correctness of this PIR scheme was established while defining it (since
there are no corruptions, this follows directly from univariate polynomial
interpolation, there is no need to account for the corruptions as in the
lecture notes).

The privacy requirement follows from the fact that for fixed $x$ and $y_i$,
$z_i = x+y\cdot y_i$ is uniform over $F^{l}$ for $y$ uniform over $F^{l}$.
Hence query $i$ is uniformly distributed (in particular, the distribution does
not depend on $x$).

Finally, the number of server is $l+1 = \text{poly}(l)$. The number of bits
exchanged is $l\log |F| + \log |F| = O(l\log l)$ per server, for a total of
$O(l^2\log l) = \text{poly}(l)$ bits exchanged overall.

Note that I had to choose a field of size $|F| = 2l$ because I need at least
$l+1$ distinct elements different from $0$ in $F$ ($|F| = l+2$ would have been
sufficient).

\section*{Problem 7.13}

\textbf{1.} Assume by contradiction that there exists a constant $c$ and
algorithm $A$ running in time $(l+\log\hat{L})^c$ such that:
\begin{displaymath}
    \P_{X,Y}\big[A(f(X), Y) = \Enc(X)_Y\big] > \frac{1}{2}
    + \frac{1}{(l+\log\hat{L})^c}
\end{displaymath}
Then by Markov's inequality, for all $d$:
\begin{displaymath}
    \P_{X}\Big[\P_Y\big[A(f(X), Y) = \Enc(X)_Y\big] > \frac{1}{2}
    + \frac{1}{l^d}\Big]\geq \frac{1}{(l+\log\hat{L})^c} - \frac{1}{l^d}
\end{displaymath}
Since $\Enc$ can be $(1/2-1/l^d)$ local list decoded, we construct the
following algorithm $A'$ to “invert” $f'$. Given input $(f(x), y)$:
\begin{itemize}
    \item do local list decoding with oracle $g_x(y') = A((f(x), y')$ and
        obtain a list of $s$ possible decoded messages $x_1,\dots, x_s$. Here
        I combined $\Dec_1$ and $\Dec_2$ in one step and decoded all symbols.
        However, the local-list decoding property is still important for the
        running time: we don't need to query the oracle for all $y'$ to do the
        decoding (this is important when $\hat{L}$ is exponential in $l$).
    \item for each message $x_1,\dots, x_s$ check whether or not $f(x_i)
        = f(x)$
    \item if one $x_i$ was found in the previous step, output it $(x_i, y)$
        (pick one at arbitrarily if there are several of them), if not pick $x_i$ at
        random and output $(x_i, y)$.
\end{itemize}

By the inequality above, the probability over $X$ that the oracle $g_X$ agrees
with $\Enc(X)$ on at least $\frac{1}{2}+\frac{1}{l^d}$ fraction of the symbols
is at least $\frac{1}{(l+\log\hat{L})^c} - \frac{1}{l^d}$. In those cases, local
list decoding is successful, that is, with probability $\frac{2}{3}$ the list
of messages contains $X$ and in this case $A'$ output $(X', Y)$ such that
$f(X') = f(X)$.

Since $f$, $\Dec_1$ and $\Dec_2$ can be computed in time $\poly(l)$, the
running time of $A'$ is $\poly(l)(l+\log\hat{L})^c$.

Furthermore we have just proven that the probability of “success of inversion”
of $A'$ over random input $(f(X),Y)$ is at least:
\begin{displaymath}
    \frac{2}{3}\cdot\left(\frac{1}{(l+\log\hat{L})^c} - \frac{1}{l^d}\right)
\end{displaymath}
By chosing $d$ appropriately, this can be made larger than the inverse of the
running time of $A'$, yielding a contradiction to the \emph{one-wayness}
(unilaterality?  unidirectionality?) of $f'$.

\paragraph{2.} Using Proposition 7.16, it suffices to show that $G(X)$ is
previous-bit unpredictable in the following sense: for all non uniform
algorithm $A$ running in time $m^c$ and all $i\in[m]$:
\begin{displaymath}
    \prob{A(G(X)_{:i}) = G(X)_i}\leq\frac{1}{2}+\frac{1}{m^{c+1}}
\end{displaymath}
where $G(X)_i$ represents the $i$th last bit of $G(X)$ and $G(X)_{:i}$
represents the last bits of $G(X)$ up to (but excluding) the $i$th last one.

Assume by contradiction that there exists $c$ and algorithm $A$ running in time
$m^c$ such that:
\begin{displaymath}
    \P_X[A(G(X)_{:i}) = G(X)_i]>\frac{1}{2}+\frac{1}{m^{c+1}}
\end{displaymath}
Then I construct the following algorithm $A'$ contradicting the hardcoreness of
predicate $b$. Given input $f(X)$:
\begin{itemize}
    \item compute $b(f(X))$, $b(f^2(X))$, $\dots$, $b(f^{(i-1)}(X))$
    \item apply $A$ on the values computed at the previous step and output the
        result
\end{itemize}
The running time of $A'$ is at most $m\poly(m) + m^c$ where the $\poly(m)$ factor
accounts for the running time of $f$ composed with $b$.

Since $f$ is a permutation, $f(X)$ has the same distribution as
$f^{(m-i+1)}(X)$ (i.e it is uniform on $\{0,1\}^l$), hence:
\begin{displaymath}
    \P_X[A'(f(X)) = b(X)] =
    \P_X[A'(f^{(m-i+1)}(X)) = b(f^{(m-i)}(X)] =
    \P_X[A(G(X)_{:i}) = G(X)_i]
\end{displaymath} 
by our assumption this probability is at least $\frac{1}{2}+\frac{1}{m^{c+1}}$.
Hence, the algorithm $A'$ contradicts the hardcoreness of the predicate $b$.
\end{document}