diff options
Diffstat (limited to 'ps6')
| -rw-r--r-- | ps6/main.tex | 338 |
1 files changed, 338 insertions, 0 deletions
diff --git a/ps6/main.tex b/ps6/main.tex new file mode 100644 index 0000000..87c179b --- /dev/null +++ b/ps6/main.tex @@ -0,0 +1,338 @@ +\documentclass[10pt]{article} +\usepackage{fullpage} +\usepackage[utf8x]{inputenc} +\usepackage{amsmath,amsfonts,amsthm} +\usepackage[english]{babel} +\usepackage[capitalize, noabbrev]{cleveref} +\usepackage{algorithm} +\usepackage{algpseudocode} + +\newenvironment{enumerate*}% + {\vspace{-2ex} \begin{enumerate} % + \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}% + {\end{enumerate}} + +\newenvironment{itemize*}% + {\vspace{-2ex} \begin{itemize} % + \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}% + {\end{itemize}} + +\newenvironment{description*}% + {\vspace{-2ex} \begin{description} % + \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}% + {\end{description}} + + +\DeclareMathOperator{\E}{\mathbb{E}} +\DeclareMathOperator{\Enc}{\mathrm{Enc}} +\DeclareMathOperator{\Dec}{\mathrm{Dec}} +\DeclareMathOperator{\Ext}{\mathrm{Ext}} +\let\P\relax +\DeclareMathOperator{\P}{\mathbb{P}} +\DeclareMathOperator*{\argmin}{\arg\min} +\newcommand{\ex}[1]{\E\left[#1\right]} +\newcommand{\prob}[1]{\P\left[#1\right]} +\newcommand{\inprod}[1]{\left\langle #1 \right\rangle} +\newcommand{\R}{\mathbf{R}} +\newcommand{\poly}{\text{poly}} +\newcommand{\N}{\mathbf{N}} +\newcommand{\C}{\mathcal{C}} +\newcommand{\eps}{\varepsilon} +\newcommand{\cl}[1]{\text{\textbf{#1}}} +\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}} +\newcommand{\llbracket}{[\![} + + +\newtheorem{theorem}{Theorem} +\newtheorem{lemma}{Lemma} +\algrenewcommand\algorithmicensure{\textbf{Output:}} +\algrenewcommand\algorithmicrequire{\textbf{Input:}} + +\author{Thibaut Horel} +\title{CS 225 Problem Set 6 -- Solutions} + +\begin{document} +\maketitle + +\section*{Problem 7.1} + +Let us consider the function $f:\{0,1\}^l\to\{0,1\}$ defined by: +\begin{displaymath} + f(x) = \begin{cases} + 1&\text{if }\exists\, y\in\{0,1\}^{d^{-1}(l-1)-l}\text{ st } x\cdot y\in \text{Im}(G_{d^{-1}(l-1)})\\ + 0&\text{otherwise} + \end{cases} +\end{displaymath} +where $\text{Im}$ denotes the image of a function (here the generator). In +other words, $f(x)$ is 1 iff $x$ is the first $l$ bits of one of the outputs +of $G_{d^{-1}(l-1)}$. Note that the output is guaranteed to have at least $l-1 ++1 = l$ by definition of a pseudorandom generator. + +$f(x)$ can be computed by iterating over all possible $2^{l-1}$ inputs of +$G_{d^{-1}(l-1)}$ and checking whether or not the first $l$ bits of the output +are equal to $x$, hence $f\in \cl{E}$. + +Let us assume that there exists a probabilistic non-uniform algorithm $A$ such +that $\prob{A(x) = f(x)}\geq \frac{2}{3}$ for all $x\in\{0,1\}^l$. Using $A$, +I define the following test of randomness $T$: +\begin{displaymath} + \forall z\in \{0,1\}^{d^{-1}(l-1)},\; T(z) = A(z_l) +\end{displaymath} +where $z_l$ denotes the first $l$ bits of $z$. Then we have: +\begin{displaymath} + \prob{T(G(U_{l-1})) = 1} = \prob{A(G(U_{l-1})_l) = 1} + = \prob{A(G(U_{l-1})_l) = f(G(U_{l-1})_l)} = p_A +\end{displaymath} +where $U_{l-1}$ is a uniform variable over $\{0,1\}^{l-1}$ and $p_A$ is the +probability that $A$ is correct (\emph{i.e} $p_A \geq \frac{2}{3})$. Indeed, $f$ +is constant equal to $1$ on the first $l$ bits of the image of $G$. We also +have: +\begin{displaymath} + \prob{T(U_d) = 1} = \prob{U_d\in \text{Im}(G)}p_A + (1-p_A)\prob{U_d\notin + \text{Im}(G)} +\end{displaymath} +where $U_d$ is uniform over $\{0,1\}^{d^{-1}(l-1)}$. Hence: +\begin{displaymath} + |\prob{T(G(U_{l-1})) = 1} - \prob{T(U_d) = 1}| = (2p_A - 1)(1-\prob{U_d\in + \text{Im}(G)}) +\end{displaymath} +but $\text{Im}(G)$ is of size at most $2^{l-1}$ (when $G$ is injective) hence: +\begin{displaymath} + |\prob{T(G(U_{l-1})) = 1} - \prob{T(U_d) = 1}| + \geq \frac{1}{3}\left(1- \frac{2^{l-1}}{2^{d^{-1}(l-1)}}\right)\geq + \frac{1}{6} +\end{displaymath} +Hence $T$ has an advantage larger than $\frac{1}{6}$ and by definition of $G$ +this implies that $T$, hence $A$ has running time at least $d^{-1}(l-1)$, which +concludes the proof. + +\section*{Problem 7.4} + +\paragraph{1.} Given constant depth circuit $C$ of size $n$, pick $m$ such that +$m\geq n$ and $m\geq \frac{1}{\eps}$, e.g, $m = n + \frac{1}{\eps}$. And +consider the pseudorandom generator $G_m$ given by Theorem 7.29. By definition: +\begin{displaymath} + |\prob{C(G(U_1)) = 1} - \prob{C(U_2) = 1}|\leq \frac{1}{m}\leq \eps +\end{displaymath} +where $U_1$ is uniformly distributed over $\{0,1\}^{\log^{O(k)}m}$ and $U_2$ is +uniformly distributed over $\{0,1\}^m$. Note that the input size of $C$ is at +most $n\leq m$, so we might have to drop some bits from $U_2$ or $G((U_1)$ for +the above formula to make sense (this is not a problem since the truncation of +a uniform variable is still uniform). + +Now, observe that $\prob{C(U_2) = 1}$ is exactly the quantity we wish to +estimate within an additive error of $\eps$. $\prob{C(G(U_1) = 1}$ can be computed +by averaging over all the possible $2^{\log^{O(k)}m} = 2^{\text{poly}(\log +n,\log\frac{1}{\eps})}$ values of $U_1$. Since $G_m$ can be computed in time +$\text{poly}(m) = \poly(n,\frac{1}{\eps})$, the running time is still $2^{\text{poly}(\log +n,\log\frac{1}{\eps})}$. + +By the inequality above, the computed value of $\prob{C(G(U_1)) = 1}$ is within +an additive $\eps$ error of $\prob{C(U_2) = 1}$. + +\paragraph{2.} Let us consider the circuit $C$ of depth $2$ which computes +$\phi$. I denote by $l$ the number of clauses in $C$ and by $v$ the number of +literals. As suggested, I assume that each clause contains exactly $r$ +literals. Note that we have $rl = v\leq n$. + +It is easy to see that $C$ can be “extended” into a circuit $C'$ +which takes as input $i\in[l]$ and assignment $\alpha\in\{0,1\}^{v-r}$ (which +should be interpreted as an assignment of all the variables which do not appear +in clause $i$) and output $C'(i,\alpha)$ which is 0 or 1 depending on whether +or not $\alpha$ violates all the clauses before the $i$th clause (we do not +need to know the assignment of variables in clause $i$ to verify this). $C'$ +still has constant depth (I was able to draw a circuit of depth 4 for this, +maybe 3 is possible?). + +Now, using the notations of Algorithm 2.35, a uniform sample $(i,\alpha)$ in +$[n]\times \{0,1\}^{v-r}$ can be interpreted as a random sample in $B$ (since +we do not need to specify the assignment of the variables in clause $i$) and +the output of $C'$ tells whether or not the sample is in $A'$. By the analysis +of Algorithm 2.35, for uniform $U_2$ in $[n]\times \{0,1\}^{v-r}$ (since all +the clauses have the same number of literals, we have the right probability of +picking each clause): +\begin{displaymath} + \prob{C'(U_2) = 1} = |A|/|B| +\end{displaymath} + +Using the same averaging argument as in \textbf{1.} by averaging over all +inputs of an appropriate pseudorandom generator, it is possible to approximate +$\prob{C'(U_2) = 1} = \frac{|A|}{|B|}$ within an \emph{additive} +$\frac{\eps}{l}$ error in time $2^{\text{poly}(\log n,\log\frac{l}{\eps})}= +2^{\text{poly}(\log n,\log\frac{1}{\eps})}$ since $l\leq n$. + +Now our algorithm simply outputs this average times $B$. The same analysis as +the one Algorithm 2.35 shows that this quantity is within +a \emph{multiplicative} $1+\eps$ ratio of $|A|$ (since $|B|/l\leq |A|$). Since +$|A|$ is the number of satisfying assignments, this concludes the proof. + +\section*{Problem 7.6} + +\paragraph{1.} Assume that we are given received word $r$ which differs from +$\Enc(x)$ on a fraction at most $\frac{1}{3q}$ of the symbols, and apply the +decoder $\Dec$ of the $q$-query smooth code for some query $i$. By union bound, +the probability $p$ that the $q$ queries made by $\Dec$ are all at positions +where $r$ coincides with $x$ is: +\begin{displaymath} + p\geq 1 - q\cdot\frac{1}{3q} = \frac{2}{3} +\end{displaymath} +Hence, with probability at least $\frac{2}{3}$, $\Dec$ will only query correct +symbols of $r$ and will behave exactly the same as if it had been given +$\Enc(x)$, it will thus return $x_i$ in this case. In others words, for query +$i$, $\Dec$ returns $x_i$ with probability at least $\frac{2}{3}$ given +received word $r$ at distance at most $\frac{1}{3q}$ from $\Enc(x)$. This is +the definition of a local $\frac{1}{3q}$-decoder. + +\paragraph{2.} The scheme is very simple: the server algorithms are all equal +and given query $i\in[\hat{n}]$ and database $D$ return $\Enc(D)_i$. Given +query $i\in[n]$, the user algorithm run the decoder $\Dec$ of the $q$-query +smooth code on input $i$. The $q$ oracle queries made by $\Dec$ to $\Enc(D)$ +are sent to the $q$ different servers. In other words, we simulate the oracle +access to $\Enc(D)$ required by $\Dec$ by having the servers answer the oracle +queries. Since $\Dec$ makes $q$ queries, we need $q$ servers. + +By definition of a smooth code, the queries made to the server are uniformly +distributed in $[\hat{n}]$ and hence have the same distribution regardless of +the index $i$ being queried, this satisfies the privacy requirement of our PIR +scheme. + +Finally, the communication complexity is $\log \hat{n} + \log|\Sigma|$ for each +query (we ask an index $j\in[\hat{n}]$ and receive a symbol in $\Sigma$). The +overall number of bits exchanged is $q(\log\hat{n} + \log|\Sigma|)$. + +\paragraph{3.} I follow an approach very similar to Algorithm 7.43 except that +it is simpler since there are no corruptions. + +I interpret the database $D$ as a function $f:\{0,1\}^l \to \{0,1\}$, +\emph{i.e}, we have $n = 2^l$. A query $i$ to the database can be interpreted +as obtaining the value $f(x)$ for some input $x\in\{0,1\}^l$. Using this +interpretation the $\text{polylog}(n)$ requirements of the scheme are +equivalent to $\text{poly}(l)$ requirements. + +I choose a finite field $F$ of size $2l$, and consider the multilinear +extension $\hat{f}$ of $f$ over $F$. In particular, $\hat{f}:F^{l}\to F$ is +a $l$-variate polynomial of total degree at most $l$. + +I define the PIR scheme as follows: +\begin{itemize} + \item $l+1$ servers with the same algorithm: given query $z\in F^{l}$, + the server returns $\hat{f}(z)$. + \item the user algorithm: + \begin{itemize} + \item fix $l+1$ elements in $F$ distinct from $0$ (these are the + same regardless of the query). Denote these elements + $y_0,\dots, y_l$. + \item Given input $x\in\{0,1\}^l$, pick a uniformly random + direction $y\in F^{l}$ and query $z_i = x+ y\cdot y_i$ for + $0\leq i\leq l$ to the $l+1$ servers (one query per server). + \item use the $l+1$ answers from the servers to find the unique + univariate polynomial $p$ of degree $l$ equal to $\hat{f}|_L$ + where $L$ denotes the affine line passing through $x$ and + direction $y$. Note that this is feasible since the answers + given by the server are the values of $\hat{f}$ at $l+1$ + distinct points of $L$, and $\hat{f}|_L$ is a univariate + polynomial (over $F$) of degree at most $l$. + \item output $p(0) = \hat{f}|_L(0) = \hat{f}(x) = f(x)$ + \end{itemize} +\end{itemize} + +The correctness of this PIR scheme was established while defining it (since +there are no corruptions, this follows directly from univariate polynomial +interpolation, there is no need to account for the corruptions as in the +lecture notes). + +The privacy requirement follows from the fact that for fixed $x$ and $y_i$, +$z_i = x+y\cdot y_i$ is uniform over $F^{l}$ for $y$ uniform over $F^{l}$. +Hence query $i$ is uniformly distributed (in particular, the distribution does +not depend on $x$). + +Finally, the number of server is $l+1 = \text{poly}(l)$. The number of bits +exchanged is $l\log |F| + \log |F| = O(l\log l)$ per server, for a total of +$O(l^2\log l) = \text{poly}(l)$ bits exchanged overall. + +Note that I had to choose a field of size $|F| = 2l$ because I need at least +$l+1$ distinct elements different from $0$ in $F$ ($|F| = l+2$ would have been +sufficient). + +\section*{Problem 7.13} + +\textbf{1.} Assume by contradiction that there exists a constant $c$ and +algorithm $A$ running in time $(l+\log\hat{L})^c$ such that: +\begin{displaymath} + \P_{X,Y}\big[A(f(X), Y) = \Enc(X)_Y\big] > \frac{1}{2} + + \frac{1}{(l+\log\hat{L})^c} +\end{displaymath} +Then by Markov's inequality, for all $d$: +\begin{displaymath} + \P_{X}\Big[\P_Y\big[A(f(X), Y) = \Enc(X)_Y\big] > \frac{1}{2} + + \frac{1}{l^d}\Big]\geq \frac{1}{(l+\log\hat{L})^c} - \frac{1}{l^d} +\end{displaymath} +Since $\Enc$ can be $(1/2-1/l^d)$ local list decoded, we construct the +following algorithm $A'$ to “invert” $f'$. Given input $(f(x), y)$: +\begin{itemize} + \item do local list decoding with oracle $g_x(y') = A((f(x), y')$ and + obtain a list of $s$ possible decoded messages $x_1,\dots, x_s$. Here + I combined $\Dec_1$ and $\Dec_2$ in one step and decoded all symbols. + However, the local-list decoding property is still important for the + running time: we don't need to query the oracle for all $y'$ to do the + decoding (this is important when $\hat{L}$ is exponential in $l$). + \item for each message $x_1,\dots, x_s$ check whether or not $f(x_i) + = f(x)$ + \item if one $x_i$ was found in the previous step, output it $(x_i, y)$ + (pick one at arbitrarily if there are several of them), if not pick $x_i$ at + random and output $(x_i, y)$. +\end{itemize} + +By the inequality above, the probability over $X$ that the oracle $g_X$ agrees +with $\Enc(X)$ on at least $\frac{1}{2}+\frac{1}{l^d}$ fraction of the symbols +is at least $\frac{1}{(l+\log\hat{L})^c} - \frac{1}{l^d}$. In those cases, local +list decoding is successful, that is, with probability $\frac{2}{3}$ the list +of messages contains $X$ and in this case $A'$ output $(X', Y)$ such that +$f(X') = f(X)$. + +Since $f$, $\Dec_1$ and $\Dec_2$ can be computed in time $\poly(l)$, the +running time of $A'$ is $\poly(l)(l+\log\hat{L})^c$. + +Furthermore we have just proven that the probability of “success of inversion” +of $A'$ over random input $(f(X),Y)$ is at least: +\begin{displaymath} + \frac{2}{3}\cdot\left(\frac{1}{(l+\log\hat{L})^c} - \frac{1}{l^d}\right) +\end{displaymath} +By chosing $d$ appropriately, this can be made larger than the inverse of the +running time of $A'$, yielding a contradiction to the \emph{one-wayness} +(unilaterality? unidirectionality?) of $f'$. + +\paragraph{2.} Using Proposition 7.16, it suffices to show that $G(X)$ is +previous-bit unpredictable in the following sense: for all non uniform +algorithm $A$ running in time $m^c$ and all $i\in[m]$: +\begin{displaymath} + \prob{A(G(X)_{:i}) = G(X)_i}\leq\frac{1}{2}+\frac{1}{m^{c+1}} +\end{displaymath} +where $G(X)_i$ represents the $i$th last bit of $G(X)$ and $G(X)_{:i}$ +represents the last bits of $G(X)$ up to (but excluding) the $i$th last one. + +Assume by contradiction that there exists $c$ and algorithm $A$ running in time +$m^c$ such that: +\begin{displaymath} + \P_X[A(G(X)_{:i}) = G(X)_i]>\frac{1}{2}+\frac{1}{m^{c+1}} +\end{displaymath} +Then I construct the following algorithm $A'$ contradicting the hardcoreness of +predicate $b$. Given input $f(X)$: +\begin{itemize} + \item compute $b(f(X))$, $b(f^2(X))$, $\dots$, $b(f^{(i-1)}(X))$ + \item apply $A$ on the values computed at the previous step and output the + result +\end{itemize} +The running time of $A'$ is at most $m\poly(m) + m^c$ where the $\poly(m)$ factor +accounts for the running time of $f$ composed with $b$. + +Since $f$ is a permutation, $f(X)$ has the same distribution as +$f^{(m-i+1)}(X)$ (i.e it is uniform on $\{0,1\}^l$), hence: +\begin{displaymath} + \P_X[A'(f(X)) = b(X)] = + \P_X[A'(f^{(m-i+1)}(X)) = b(f^{(m-i)}(X)] = + \P_X[A(G(X)_{:i}) = G(X)_i] +\end{displaymath} +by our assumption this probability is at least $\frac{1}{2}+\frac{1}{m^{c+1}}$. +Hence, the algorithm $A'$ contradicts the hardcoreness of the predicate $b$. +\end{document} |
