ps2/main.tex


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313

\documentclass[10pt]{article}
\usepackage{fullpage}
\usepackage[utf8x]{inputenc}
\usepackage{amsmath,amsfonts,amsthm}
\usepackage[english]{babel}
\usepackage[capitalize, noabbrev]{cleveref}

% these are compressed lists to help fit into a 1 page limit
\newenvironment{enumerate*}%
  {\vspace{-2ex} \begin{enumerate} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{enumerate}}
 
\newenvironment{itemize*}%
  {\vspace{-2ex} \begin{itemize} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{itemize}}
 
\newenvironment{description*}%
  {\vspace{-2ex} \begin{description} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{description}}

\DeclareMathOperator*{\E}{\mathbb{E}}
\let\Pr\relax
\DeclareMathOperator*{\Pr}{\mathbb{P}}

\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}}
\newcommand{\llbracket}{[\![}
\newcommand{\eps}{\varepsilon}

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}

\author{Thibaut Horel}
\title{CS 229r Problem Set 2 -- Solutions}

\begin{document}

\maketitle
\section*{Problem 1}

\paragraph{(a)} Let us consider the function $f$ defined for $x>-1$ by:
\begin{displaymath}
    \forall x>-1,\quad f(x) = \frac{(1+x)\log(1+x)-x}{x^2}
\end{displaymath}
Using the fact that $\log(1+x) = x -\frac{x^2}{2} + o(x^2)$ as $x\rightarrow
0$, we have:
\begin{displaymath}
    f(x) = \frac{{\frac{x^2}{2} + o(x^2)}}{x^2}
\end{displaymath}
Hence, $f(x)\rightarrow \frac{1}{2}$ as $x\rightarrow 0$ and the function is
well-defined and continuous (or can be continuously extended) at 0. Since it is
continuous elsewhere, we can denote by $C$ its minimum on the interval
$[-\frac{1}{2},\frac{1}{2}]$. Then, for $0< \eps < \frac{1}{2}$:
\begin{displaymath}
    (1+\eps)\log(1+\eps) - \eps \geq C\eps^2
\end{displaymath}
and
\begin{displaymath}
    (1-\eps)\log(1-\eps) + \eps \geq C\eps^2
\end{displaymath}
By applying the exponential function, these two inequality imply:
\begin{equation}\label{eq:1}
    \left(\frac{e^\eps}{(1+\eps)^{1+\eps}}\right)\leq e^{-C\eps^2}
    \quad\textrm{and}\quad
    \left(\frac{e^{-\eps}}{(1-\eps)^{1-\eps}}\right)\leq e^{-C\eps^2}
\end{equation}
A union bound and the two Chernoff bounds given in the problem statement give:
\begin{displaymath}
    \Pr\big[|X-\mu|\geq \eps\mu\big] \leq 
    \left(\frac{e^\eps}{(1+\eps)^{1+\eps}}\right) + 
    \left(\frac{e^{-\eps}}{(1-\eps)^{1-\eps}}\right)
\end{displaymath}
The right-hand side of this inequality is bounded from above by $2e^{-C\eps^2}$
using \cref{eq:1}. This concludes the proof of this question.

\paragraph{(b)} Using the symmetrization trick seen in class, we can write:
\begin{displaymath}
    \E\left[(X-\mu)^{2l}\right]
    \leq 2^{2l} \E\left[\left(\sum_i \sigma_i X_i\right)^{2l}\right]
    = 2^{2l}\sum_{1\leq i_1,\ldots, i_{2l}\leq n}\E\left[\sigma_{i_1}\ldots\sigma_{i_{2l}}\right]
    \E\left[X_{i_1}\ldots X_{i_{2l}}\right]
\end{displaymath}
As in class, the only non-zero terms in this sum are the ones where each unique
index in $i_1,\ldots, i_{2l}$ is repeated an even number of times. Furthermore,
if there are $k$ unique such indices, then $\E\left[X_{i_1}\ldots
X_{i_{2l}}\right] = p^k$. By reordering the sum based on how many such $k$
unique indices one can have (at most $l$ since each index is repeated at least
twice), we get:
\begin{displaymath}
    \E\left[(X-\mu)^{2l}\right]\leq 2^{2l}
    \sum_{k=1}^l p^k m_k
\end{displaymath}
where $m_k$ is the number of ways to chose $2l$ indices in $\{1,\ldots,n\}$
such that each index is repeated an even number of times. We can bound $m_k$:
\begin{displaymath}
    m_k \leq \binom{n}{k} k^{2l}
\end{displaymath}
indeed there are $\binom{n}{k}$ ways of choosing the set of $k$ unique indices.
Once this set is fixed, each of the indices in $i_1,\ldots,i_{2l}$ must map to
one of those $k$ indices. There are $k^{2l}$ ways of choosing this mapping.
This implies:
\begin{displaymath}
    \E\left[(X-\mu)^{2l}\right]\leq 2^{2l}
    \sum_{k=1}^l p^k\binom{n}{k}k^{2l}
\end{displaymath}
Using the hint, we can bound $\binom{n}{k} \leq \left(\frac{ne}{k}\right)^k$
leading to:
\begin{displaymath}
    \E\left[(X-\mu)^{2l}\right]\leq 2^{2l}
    \sum_{k=1}^l p^k\left(\frac{ne}{k}\right)^k k^{2l}
    \leq (2l)^{2l} \sum_{k=1}^l\left(\frac{nep}{l}\right)^k
\end{displaymath}
the last inequality coming from $k^{2l-k}\leq l^{2l-k}$. Assuming that
$\frac{nep}{l}\geq 2$ (this will be satisfied later when we chose $l$), we can
bound the geometric series by twice its maximum term:
\begin{displaymath}
    \E\left[(X-\mu)^{2l}\right]\leq 2\cdot(2l)^{2l}
    \left(\frac{nep}{l}\right)^l = 
    2 \left(4\mu el\right)^l
\end{displaymath}
Hence:
\begin{displaymath}
    \Pr\left[|X-\mu|\geq \eps\mu\right]\leq 
    2\cdot\left(\frac{4\mu el}{\eps^2\mu^2}\right)^l = 
    2\cdot\left(\frac{4el}{\eps^2\mu}\right)^l 
\end{displaymath}
Choosing $l = \left\lceil\frac{\eps^2\mu}{5e}\right\rceil$, we obtain\footnote{Note that
    we could possibly have an issue when $\eps^2\mu\leq 5e$ but the Chernoff
bound becomes trivial in this case: if $\eps^2\mu$ is bounded by a constant,
there exists a constant for which the Chernoff bound is true, we can then take
the minimum of this constant and the $C'$ that we are constructing to obtain
a global Chernoff bound.}:
\begin{displaymath}
    \Pr\left[|X-\mu|\geq \eps\mu\right]
    \leq
    2\cdot\left(\frac{4}{5}\right)^{C\mu\eps^2}
\end{displaymath}
where $C=\frac{1}{5e}$. The right-hand side can be bounded from above by
$2e^{-C'\mu\eps^2}$ where $C' = C\log(5/4)>0$. This concludes the proof.


\section*{Problem 2}

\paragraph{(a)} We will use the following key observation: for any $a$, if $x$
is a uniform random variable, then so is $x\oplus a$. This follows immediately
from the fact that $x\mapsto x\oplus a$ is a one-to-one mapping.

We first prove that $\mathcal{H}$ is two-wise independent. For
two distinct keys $x$ and $y$, there is a character $i$ such that $x_i \neq
y_i$. Without loss of generality, we can assume that this character is $0$.
Given $x$, $H_0(y_0)$ is a completely random value (since $H_0(y_0)$ is chosen
independently of $H_0(x_0)$). Then it follows from our original key observation
that $h(y)$ is uniformly random (since $h(y) = H_0(y_0) \oplus \cdots$).

We can now prove that $\mathcal{H}$ is three-wise independent. Let us consider
three distinct keys, $x$, $y$, and $z$. Then there exists a key for which there
exists a character which is different from the characters of the other two keys
at this position.

Otherwise, assume that it is not the case, then for each position, the
character of each key is equal to the character of one of the other two keys.
For example, we have $x_1 = y_1$ or $x_1 = z_1$. Assume wlog that $x_1 = y_1$.
But then since $z_1 = y_1$ or $z_1 = x_1$, we get that $x_1 = y_1 = z_1$. We
can repeat this argument for each position and obtain that $x=y=z$,
a contradiction.

Wlog of generality, assume that $z$ has a position at which it is different
from $x$ and $y$. Again wlog, we can assume that this position is $0$.  Then
$H_0(z_0)$ is uniformly random given $x$ and $y$, implying that $h(z)$ is
uniformly random given $x$ and $y$ (using our original observation again). But
we already know that $h(x)$ and $h(y)$ behave like two independent uniform
random variables since $\mathcal{H}$ is 2-wise independent. This implies hat
$h(x)$, $h(y)$ and $h(z)$ are three independent uniform variables and concludes
our proof.

\paragraph{(b)} We give a counter example for $u=4$, $c=2$. In this case we
write the keys in base two. Let us consider the four different keys $x= 01$,
$y=10$, $z=11$ and $w=00$. Let us write $a_i = H_0(i)$ and $b_i = H_1(i)$ for
$i\in\{1,2\}$. Then we have $h(x) = b_0\oplus a_1$, $h(y) = b_1\oplus a_0$,
$h(z) = b_1\oplus a_1$ and $h(w) = b_0\oplus a_0$. But then we see that $h(z)
= h(x)\oplus h(y)\oplus h(z)$ (all the terms which appear twice cancel out).
Hence, no matter the random choices made by $H_0$ and $H_1$, $h(z)$ is fully
determined once $h(x)$, $h(y)$ and $h(z)$ have been observed.

\paragraph{(c)} We first prove the hint.

\begin{lemma}
    Any set $S$ of size $k\geq 2$ contains a subset $S'$ of size at least $\log
    k$ on which $\mathcal{H}$ behaves completely randomly.
\end{lemma}

\begin{proof}
    We prove this lemma by induction on $k\geq 2$.

    For $k=2$, $\log k = 1$. $\mathcal{H}$ behaves completely randomly on any
    set of size 1.

    Assume that the lemma is true for any set of size $k-1$, and let us consider
    a set $S$ of size $k\geq 3$. Their exists a position such that not all words
    in $S$ have the same character at this position (otherwise all words in $S$
    would be equal and $S$ would be of size 1). Wlog, let us assume that this
    position is $0$.

    Let  us denote by $l\geq 2$ the number of different characters which appear at
    position $0$ in words of $S$: $l \eqdef |\{x_0: x_0\in S\}|$. We can now cluster
    the words of $S$ in $l$ different buckets, each bucket containing all the
    words sharing the same character at position $0$. Let us denote by $S^*$
    the largest of these buckets. We have that $|S^*|\geq \frac{k}{l}$. We can
    now construct a subset $S'$ as follows:
    \begin{itemize*}
        \item Apply the induction hypothesis to $|S^*|$ and obtain a set $T$ on
            which $\mathcal{H}$ behaves completely randomly.
        \item Keep exactly one word in each of the remaining $l-1$ buckets
            other than $S^*$ to create a second set $T'$.
        \item Let $S' = T'\cup T$.
    \end{itemize*}
    We now prove that $S'$ satisfies the requirements of the lemma:
    \begin{itemize*}
        \item by the induction hypothesis, $|T| \geq \log\frac{k}{l} = \log
            k -\log l$. By construction, $|T'| = l-1$ and $T$ and $T'$ are
            disjoint. Hence $S' \geq \log k - \log l + l-1 \geq \log k$, since
            $l-1\geq \log l$. Thus, $S'$ has the required size.
        \item By construction all the elements in $T'$ have
            different characters at position 0 and different from the
            characters at position 0 of the words in $T'$. Hence, the given words
            in $T$, $\mathca{H}$ behaves completely randomly on $T'$ by the
            same observation as the one we used in \textbf{(b)}. But by the
            induction hypothesis, $\mathcal{H}$ behaves completely randomly on
            $T$. As a consequence $\mathcal{H}$ behaves completely randomly on
            $S'$.
    \end{itemize*}
    This shows that the lemma is true for any set of size $k$ and concludes the
    proof.
\end{proof}

Let us denote by $L$ the longest linked list and by $l$ its length (this is
a random variable):
\begin{align*}
    \Pr[l\geq k] &\leq \Pr[\exists\, k\text{ elements mapping to $L$}]\\
                 &\leq \Pr[\exists\, S\text{ s.t. } |S|\geq\log k, \text{ $S$ maps to $L$ and
    $\mathcal{H}$ is completely random over $S$}]
\end{align*}
where the second inequality follows from the lemma. Now applying a union bound:
\begin{align*}
    \Pr[l\geq k] &\leq \sum_{S,\, |S| = \log k}\Pr[S\text{ maps to $L$ and
    $\mathcal{H}$ random over $S$}]\\
    &\leq\binom{n}{\log k}m\frac{1}{m^{\log k}}
    \leq\frac{n^\log k}{(\log k)!}\frac{1}{m^{\log k -1}}
\end{align*}
Using $m> n^{1.01}$, we get:
\begin{displaymath}
    \Pr[l\geq k] \leq \frac{n^{-0.1\log k + 1.01}}{(\log k)!}
\end{displaymath}
We see that by taking $k$ large enough such that $-0.1\log k + 1.01$ becomes
negative, the numerator becomes smaller than 1 (regardless of $n$). By
consequence there exists are universal constant $K$ such that:
\begin{displaymath}
    \Pr[l\geq K]\leq \frac{1}{3}
\end{displaymath}
The probability that the largest list has length more than $K$ is less than
$\frac{1}{3}$. Hence, with probability at least $\frac{2}{3}$, all the linked
lists have length less than $K = O(1)$.

\section*{Problem 3}

\paragraph{(a)} We define our scheme $Z$ to be the standard unbiased estimator
of $\Pr[h(A) = h(B)]$ for $h$ uniformly random: this is the fraction of time
$h_i(A)$ agrees with $h_i(B)$ for $1\leq i\leq k$. Formally:
\begin{displaymath}
    \boxed{Z \eqdef \frac{1}{k}\sum_{i=1}^k \mathbf{1}\{h_i(A) = h_i(B)\}}
\end{displaymath}
Let us denote by $X_i = \mathbf{1}\{h_i(A) = h_i(B)\}$. We have $\E[X_i]
= \Pr_{h\sim\mathcal{U}}[h(A) = h(B)] = J(A, B)\eqdef p$. Hence:
\begin{displaymath}
    \Pr[|Z-J(A,B)|\geq \eps]
    = \Pr\left[\left|\sum_{i=1}^k (X_i - p)\right|\geq k\eps\right]
\end{displaymath}
Using Chebyshev's inequality (moment method of order 2):
\begin{displaymath}
    \Pr[|Z-J(A,B)|\geq \eps]
    \leq\frac{1}{(k\eps)^2}\E\left[\left(\sum_{i=1}^k(X_i - p)\right)^2\right]
\end{displaymath}
We have:
\begin{displaymath}
    \E\left[\left(\sum_{i=1}^k(X_i - p)\right)^2\right]
    = \sum_{1\leq i,j\leq k}\E\big[(X_i-p)(X_j-p)\big]
\end{displaymath}
By pairwise independence of $X_1,\ldots X_k$, and since $X_i-p$ has mean zero,
we have that $\E\big[(X_i-p)(X_j-p)\big] = 0$ whenever $i\neq j$. Hence:
\begin{displaymath}
    \Pr[|Z-J(A,B)|\geq \eps]
    \leq \frac{1}{(k\eps)^2}\sum_{i=1}^k\E[(X_i-p)^2]\leq \frac{1}{4k\eps^2}
\end{displaymath}
where the last inequality uses $\E[(X_i-p)^2] = p(1-p)\leq \frac{1}{4}$. Hence,
choosing $k=\left\lceil\frac{3}{4\eps^2}\right\rceil$ is enough to obtain:
\begin{displaymath}
    \Pr[|Z-J(A,B)|\geq \eps]\leq \frac{1}{3}
\end{displaymath}

\paragraph{(b)}
\begin{itemize*}
    \item In case we want a probability of success of $\delta$,
\end{itemize*}

\section*{Problem 4}

Time spent: Problem 1 (4 hours), Problem 2 (3 hours), Problem 3 (2 hours).
\textbf{Total}: 9 hours.

\end{document}