diff options
Diffstat (limited to 'ps2/main.tex')
| -rw-r--r-- | ps2/main.tex | 313 |
1 files changed, 313 insertions, 0 deletions
diff --git a/ps2/main.tex b/ps2/main.tex new file mode 100644 index 0000000..45638ab --- /dev/null +++ b/ps2/main.tex @@ -0,0 +1,313 @@ +\documentclass[10pt]{article} +\usepackage{fullpage} +\usepackage[utf8x]{inputenc} +\usepackage{amsmath,amsfonts,amsthm} +\usepackage[english]{babel} +\usepackage[capitalize, noabbrev]{cleveref} + +% these are compressed lists to help fit into a 1 page limit +\newenvironment{enumerate*}% + {\vspace{-2ex} \begin{enumerate} % + \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}% + {\end{enumerate}} + +\newenvironment{itemize*}% + {\vspace{-2ex} \begin{itemize} % + \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}% + {\end{itemize}} + +\newenvironment{description*}% + {\vspace{-2ex} \begin{description} % + \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}% + {\end{description}} + +\DeclareMathOperator*{\E}{\mathbb{E}} +\let\Pr\relax +\DeclareMathOperator*{\Pr}{\mathbb{P}} + +\newcommand{\inprod}[1]{\left\langle #1 \right\rangle} +\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}} +\newcommand{\llbracket}{[\![} +\newcommand{\eps}{\varepsilon} + +\newtheorem{theorem}{Theorem} +\newtheorem{lemma}{Lemma} + +\author{Thibaut Horel} +\title{CS 229r Problem Set 2 -- Solutions} + +\begin{document} + +\maketitle +\section*{Problem 1} + +\paragraph{(a)} Let us consider the function $f$ defined for $x>-1$ by: +\begin{displaymath} + \forall x>-1,\quad f(x) = \frac{(1+x)\log(1+x)-x}{x^2} +\end{displaymath} +Using the fact that $\log(1+x) = x -\frac{x^2}{2} + o(x^2)$ as $x\rightarrow +0$, we have: +\begin{displaymath} + f(x) = \frac{{\frac{x^2}{2} + o(x^2)}}{x^2} +\end{displaymath} +Hence, $f(x)\rightarrow \frac{1}{2}$ as $x\rightarrow 0$ and the function is +well-defined and continuous (or can be continuously extended) at 0. Since it is +continuous elsewhere, we can denote by $C$ its minimum on the interval +$[-\frac{1}{2},\frac{1}{2}]$. Then, for $0< \eps < \frac{1}{2}$: +\begin{displaymath} + (1+\eps)\log(1+\eps) - \eps \geq C\eps^2 +\end{displaymath} +and +\begin{displaymath} + (1-\eps)\log(1-\eps) + \eps \geq C\eps^2 +\end{displaymath} +By applying the exponential function, these two inequality imply: +\begin{equation}\label{eq:1} + \left(\frac{e^\eps}{(1+\eps)^{1+\eps}}\right)\leq e^{-C\eps^2} + \quad\textrm{and}\quad + \left(\frac{e^{-\eps}}{(1-\eps)^{1-\eps}}\right)\leq e^{-C\eps^2} +\end{equation} +A union bound and the two Chernoff bounds given in the problem statement give: +\begin{displaymath} + \Pr\big[|X-\mu|\geq \eps\mu\big] \leq + \left(\frac{e^\eps}{(1+\eps)^{1+\eps}}\right) + + \left(\frac{e^{-\eps}}{(1-\eps)^{1-\eps}}\right) +\end{displaymath} +The right-hand side of this inequality is bounded from above by $2e^{-C\eps^2}$ +using \cref{eq:1}. This concludes the proof of this question. + +\paragraph{(b)} Using the symmetrization trick seen in class, we can write: +\begin{displaymath} + \E\left[(X-\mu)^{2l}\right] + \leq 2^{2l} \E\left[\left(\sum_i \sigma_i X_i\right)^{2l}\right] + = 2^{2l}\sum_{1\leq i_1,\ldots, i_{2l}\leq n}\E\left[\sigma_{i_1}\ldots\sigma_{i_{2l}}\right] + \E\left[X_{i_1}\ldots X_{i_{2l}}\right] +\end{displaymath} +As in class, the only non-zero terms in this sum are the ones where each unique +index in $i_1,\ldots, i_{2l}$ is repeated an even number of times. Furthermore, +if there are $k$ unique such indices, then $\E\left[X_{i_1}\ldots +X_{i_{2l}}\right] = p^k$. By reordering the sum based on how many such $k$ +unique indices one can have (at most $l$ since each index is repeated at least +twice), we get: +\begin{displaymath} + \E\left[(X-\mu)^{2l}\right]\leq 2^{2l} + \sum_{k=1}^l p^k m_k +\end{displaymath} +where $m_k$ is the number of ways to chose $2l$ indices in $\{1,\ldots,n\}$ +such that each index is repeated an even number of times. We can bound $m_k$: +\begin{displaymath} + m_k \leq \binom{n}{k} k^{2l} +\end{displaymath} +indeed there are $\binom{n}{k}$ ways of choosing the set of $k$ unique indices. +Once this set is fixed, each of the indices in $i_1,\ldots,i_{2l}$ must map to +one of those $k$ indices. There are $k^{2l}$ ways of choosing this mapping. +This implies: +\begin{displaymath} + \E\left[(X-\mu)^{2l}\right]\leq 2^{2l} + \sum_{k=1}^l p^k\binom{n}{k}k^{2l} +\end{displaymath} +Using the hint, we can bound $\binom{n}{k} \leq \left(\frac{ne}{k}\right)^k$ +leading to: +\begin{displaymath} + \E\left[(X-\mu)^{2l}\right]\leq 2^{2l} + \sum_{k=1}^l p^k\left(\frac{ne}{k}\right)^k k^{2l} + \leq (2l)^{2l} \sum_{k=1}^l\left(\frac{nep}{l}\right)^k +\end{displaymath} +the last inequality coming from $k^{2l-k}\leq l^{2l-k}$. Assuming that +$\frac{nep}{l}\geq 2$ (this will be satisfied later when we chose $l$), we can +bound the geometric series by twice its maximum term: +\begin{displaymath} + \E\left[(X-\mu)^{2l}\right]\leq 2\cdot(2l)^{2l} + \left(\frac{nep}{l}\right)^l = + 2 \left(4\mu el\right)^l +\end{displaymath} +Hence: +\begin{displaymath} + \Pr\left[|X-\mu|\geq \eps\mu\right]\leq + 2\cdot\left(\frac{4\mu el}{\eps^2\mu^2}\right)^l = + 2\cdot\left(\frac{4el}{\eps^2\mu}\right)^l +\end{displaymath} +Choosing $l = \left\lceil\frac{\eps^2\mu}{5e}\right\rceil$, we obtain\footnote{Note that + we could possibly have an issue when $\eps^2\mu\leq 5e$ but the Chernoff +bound becomes trivial in this case: if $\eps^2\mu$ is bounded by a constant, +there exists a constant for which the Chernoff bound is true, we can then take +the minimum of this constant and the $C'$ that we are constructing to obtain +a global Chernoff bound.}: +\begin{displaymath} + \Pr\left[|X-\mu|\geq \eps\mu\right] + \leq + 2\cdot\left(\frac{4}{5}\right)^{C\mu\eps^2} +\end{displaymath} +where $C=\frac{1}{5e}$. The right-hand side can be bounded from above by +$2e^{-C'\mu\eps^2}$ where $C' = C\log(5/4)>0$. This concludes the proof. + + +\section*{Problem 2} + +\paragraph{(a)} We will use the following key observation: for any $a$, if $x$ +is a uniform random variable, then so is $x\oplus a$. This follows immediately +from the fact that $x\mapsto x\oplus a$ is a one-to-one mapping. + +We first prove that $\mathcal{H}$ is two-wise independent. For +two distinct keys $x$ and $y$, there is a character $i$ such that $x_i \neq +y_i$. Without loss of generality, we can assume that this character is $0$. +Given $x$, $H_0(y_0)$ is a completely random value (since $H_0(y_0)$ is chosen +independently of $H_0(x_0)$). Then it follows from our original key observation +that $h(y)$ is uniformly random (since $h(y) = H_0(y_0) \oplus \cdots$). + +We can now prove that $\mathcal{H}$ is three-wise independent. Let us consider +three distinct keys, $x$, $y$, and $z$. Then there exists a key for which there +exists a character which is different from the characters of the other two keys +at this position. + +Otherwise, assume that it is not the case, then for each position, the +character of each key is equal to the character of one of the other two keys. +For example, we have $x_1 = y_1$ or $x_1 = z_1$. Assume wlog that $x_1 = y_1$. +But then since $z_1 = y_1$ or $z_1 = x_1$, we get that $x_1 = y_1 = z_1$. We +can repeat this argument for each position and obtain that $x=y=z$, +a contradiction. + +Wlog of generality, assume that $z$ has a position at which it is different +from $x$ and $y$. Again wlog, we can assume that this position is $0$. Then +$H_0(z_0)$ is uniformly random given $x$ and $y$, implying that $h(z)$ is +uniformly random given $x$ and $y$ (using our original observation again). But +we already know that $h(x)$ and $h(y)$ behave like two independent uniform +random variables since $\mathcal{H}$ is 2-wise independent. This implies hat +$h(x)$, $h(y)$ and $h(z)$ are three independent uniform variables and concludes +our proof. + +\paragraph{(b)} We give a counter example for $u=4$, $c=2$. In this case we +write the keys in base two. Let us consider the four different keys $x= 01$, +$y=10$, $z=11$ and $w=00$. Let us write $a_i = H_0(i)$ and $b_i = H_1(i)$ for +$i\in\{1,2\}$. Then we have $h(x) = b_0\oplus a_1$, $h(y) = b_1\oplus a_0$, +$h(z) = b_1\oplus a_1$ and $h(w) = b_0\oplus a_0$. But then we see that $h(z) += h(x)\oplus h(y)\oplus h(z)$ (all the terms which appear twice cancel out). +Hence, no matter the random choices made by $H_0$ and $H_1$, $h(z)$ is fully +determined once $h(x)$, $h(y)$ and $h(z)$ have been observed. + +\paragraph{(c)} We first prove the hint. + +\begin{lemma} + Any set $S$ of size $k\geq 2$ contains a subset $S'$ of size at least $\log + k$ on which $\mathcal{H}$ behaves completely randomly. +\end{lemma} + +\begin{proof} + We prove this lemma by induction on $k\geq 2$. + + For $k=2$, $\log k = 1$. $\mathcal{H}$ behaves completely randomly on any + set of size 1. + + Assume that the lemma is true for any set of size $k-1$, and let us consider + a set $S$ of size $k\geq 3$. Their exists a position such that not all words + in $S$ have the same character at this position (otherwise all words in $S$ + would be equal and $S$ would be of size 1). Wlog, let us assume that this + position is $0$. + + Let us denote by $l\geq 2$ the number of different characters which appear at + position $0$ in words of $S$: $l \eqdef |\{x_0: x_0\in S\}|$. We can now cluster + the words of $S$ in $l$ different buckets, each bucket containing all the + words sharing the same character at position $0$. Let us denote by $S^*$ + the largest of these buckets. We have that $|S^*|\geq \frac{k}{l}$. We can + now construct a subset $S'$ as follows: + \begin{itemize*} + \item Apply the induction hypothesis to $|S^*|$ and obtain a set $T$ on + which $\mathcal{H}$ behaves completely randomly. + \item Keep exactly one word in each of the remaining $l-1$ buckets + other than $S^*$ to create a second set $T'$. + \item Let $S' = T'\cup T$. + \end{itemize*} + We now prove that $S'$ satisfies the requirements of the lemma: + \begin{itemize*} + \item by the induction hypothesis, $|T| \geq \log\frac{k}{l} = \log + k -\log l$. By construction, $|T'| = l-1$ and $T$ and $T'$ are + disjoint. Hence $S' \geq \log k - \log l + l-1 \geq \log k$, since + $l-1\geq \log l$. Thus, $S'$ has the required size. + \item By construction all the elements in $T'$ have + different characters at position 0 and different from the + characters at position 0 of the words in $T'$. Hence, the given words + in $T$, $\mathca{H}$ behaves completely randomly on $T'$ by the + same observation as the one we used in \textbf{(b)}. But by the + induction hypothesis, $\mathcal{H}$ behaves completely randomly on + $T$. As a consequence $\mathcal{H}$ behaves completely randomly on + $S'$. + \end{itemize*} + This shows that the lemma is true for any set of size $k$ and concludes the + proof. +\end{proof} + +Let us denote by $L$ the longest linked list and by $l$ its length (this is +a random variable): +\begin{align*} + \Pr[l\geq k] &\leq \Pr[\exists\, k\text{ elements mapping to $L$}]\\ + &\leq \Pr[\exists\, S\text{ s.t. } |S|\geq\log k, \text{ $S$ maps to $L$ and + $\mathcal{H}$ is completely random over $S$}] +\end{align*} +where the second inequality follows from the lemma. Now applying a union bound: +\begin{align*} + \Pr[l\geq k] &\leq \sum_{S,\, |S| = \log k}\Pr[S\text{ maps to $L$ and + $\mathcal{H}$ random over $S$}]\\ + &\leq\binom{n}{\log k}m\frac{1}{m^{\log k}} + \leq\frac{n^\log k}{(\log k)!}\frac{1}{m^{\log k -1}} +\end{align*} +Using $m> n^{1.01}$, we get: +\begin{displaymath} + \Pr[l\geq k] \leq \frac{n^{-0.1\log k + 1.01}}{(\log k)!} +\end{displaymath} +We see that by taking $k$ large enough such that $-0.1\log k + 1.01$ becomes +negative, the numerator becomes smaller than 1 (regardless of $n$). By +consequence there exists are universal constant $K$ such that: +\begin{displaymath} + \Pr[l\geq K]\leq \frac{1}{3} +\end{displaymath} +The probability that the largest list has length more than $K$ is less than +$\frac{1}{3}$. Hence, with probability at least $\frac{2}{3}$, all the linked +lists have length less than $K = O(1)$. + +\section*{Problem 3} + +\paragraph{(a)} We define our scheme $Z$ to be the standard unbiased estimator +of $\Pr[h(A) = h(B)]$ for $h$ uniformly random: this is the fraction of time +$h_i(A)$ agrees with $h_i(B)$ for $1\leq i\leq k$. Formally: +\begin{displaymath} + \boxed{Z \eqdef \frac{1}{k}\sum_{i=1}^k \mathbf{1}\{h_i(A) = h_i(B)\}} +\end{displaymath} +Let us denote by $X_i = \mathbf{1}\{h_i(A) = h_i(B)\}$. We have $\E[X_i] += \Pr_{h\sim\mathcal{U}}[h(A) = h(B)] = J(A, B)\eqdef p$. Hence: +\begin{displaymath} + \Pr[|Z-J(A,B)|\geq \eps] + = \Pr\left[\left|\sum_{i=1}^k (X_i - p)\right|\geq k\eps\right] +\end{displaymath} +Using Chebyshev's inequality (moment method of order 2): +\begin{displaymath} + \Pr[|Z-J(A,B)|\geq \eps] + \leq\frac{1}{(k\eps)^2}\E\left[\left(\sum_{i=1}^k(X_i - p)\right)^2\right] +\end{displaymath} +We have: +\begin{displaymath} + \E\left[\left(\sum_{i=1}^k(X_i - p)\right)^2\right] + = \sum_{1\leq i,j\leq k}\E\big[(X_i-p)(X_j-p)\big] +\end{displaymath} +By pairwise independence of $X_1,\ldots X_k$, and since $X_i-p$ has mean zero, +we have that $\E\big[(X_i-p)(X_j-p)\big] = 0$ whenever $i\neq j$. Hence: +\begin{displaymath} + \Pr[|Z-J(A,B)|\geq \eps] + \leq \frac{1}{(k\eps)^2}\sum_{i=1}^k\E[(X_i-p)^2]\leq \frac{1}{4k\eps^2} +\end{displaymath} +where the last inequality uses $\E[(X_i-p)^2] = p(1-p)\leq \frac{1}{4}$. Hence, +choosing $k=\left\lceil\frac{3}{4\eps^2}\right\rceil$ is enough to obtain: +\begin{displaymath} + \Pr[|Z-J(A,B)|\geq \eps]\leq \frac{1}{3} +\end{displaymath} + +\paragraph{(b)} +\begin{itemize*} + \item In case we want a probability of success of $\delta$, +\end{itemize*} + +\section*{Problem 4} + +Time spent: Problem 1 (4 hours), Problem 2 (3 hours), Problem 3 (2 hours). +\textbf{Total}: 9 hours. + +\end{document} |
