From 30e1f244e6381a0439a52e326fe7ca700fe19e7b Mon Sep 17 00:00:00 2001 From: Thibaut Horel Date: Thu, 11 Sep 2014 11:59:19 -0400 Subject: Minor typos --- lecture/main.tex | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'lecture') diff --git a/lecture/main.tex b/lecture/main.tex index 8b273ca..0f548ab 100644 --- a/lecture/main.tex +++ b/lecture/main.tex @@ -189,13 +189,15 @@ the following definitions. \begin{definition} A set of hash functions $\H$ is a \emph{$k$-wise independent family} iff the random variables $h(0), h(1),\ldots,h(u-1)$ are $k$-wise independent - and uniformly distributed in $[m]$ when $h\in\H$ is drawn uniformly at - random. + when $h\in\H$ is drawn uniformly at random. \end{definition} \begin{example} The set $\H$ of all functions from $[u]$ to $[m]$ is $k$-wise independent - for all $k$. + for all $k$. Note that this family of functions is impractical for many + applications since you need $O(u\log m)$ bits to store a totally random + function. For a hash table, this is already larger than the space of the + data structure itself, $O(n)$. \end{example} \begin{example}[$u=m=p$, $p$ prime] We define $\H = \big\{h: h(x) @@ -216,7 +218,7 @@ the following definitions. We define $\H = \big\{h: h(x) = \big(\sum_{i=0}^{k-1}a_i x^i \bmod p\big) \bmod m\big\}$. We see that $\Pr(h(x_1) = y_1,\ldots, h(x_k) = y_k)\leq \left(\frac{2}{m}\right)^k$. Note that in this example, - $h(x_1),\ldots,h(x_k)$ does not behave exactly $k$ independent uniformly + $h(x_1),\ldots,h(x_k)$ does not behave exactly like $k$ independent uniformly distributed variables, but this bound is sufficient in most applications. \end{example} @@ -253,7 +255,7 @@ In this problem there is some set $S\subset[u]$ where $|S| = n$, and we would li In real architectures, sequential memory accesses are much faster. A solution to the dictionary problem with better spacial locality is \emph{linear probing}. In this solution, the hash table is an array $A$, but unlike the -solution with chaining and linked lists, the values are stored directrly in +solution with chaining and linked lists, the values are stored directly in $A$. More specifically, the value associated with key $k$ is stored at $h(k)$. If this cell is already taken, we start walking to the right, one cell at a time\footnote{We wrap back to the beginning of the array when we reach its @@ -288,7 +290,7 @@ that this expected number is constant. \end{definition} \begin{definition} - We say that $I$ is \emph{full} iff $|L(I)| \geq |I|$. + We say that $I$ is \emph{full} iff $L(I) \geq |I|$. \end{definition} \begin{lemma} @@ -312,18 +314,19 @@ By applying the previous lemma: &\leq \E[\text{number of full intervals containing $h(x)$}]\\ &\leq \sum_{k\geq 1}\sum_{\substack{I, |I|=k\\h(x)\in I}} \Pr(I\text{ - full}) \leq \sum_{k\geq 1}k \max_{I, |I|=k, h(x)\in I}\Pr(I\text{ full}) + full}) \leq \sum_{k\geq 1}k \max_{\substack{I, |I|=k\\ h(x)\in I}}\Pr(I\text{ full}) \end{align*} We need to show that the $\max$ in the previous sum is $O\left(\frac{1}{k^3}\right)$ so that the sum converges. We assume that $m=2n$ where $n$ is the number of different keys active. \begin{displaymath} - \Pr(I\text{ full}) = \Pr(|L(I)\geq |I|)\leq\Pr\big(|L(I)-\E L(I)|\geq \E L(I)\big) + \Pr(I\text{ full}) = \Pr(L(I)\geq |I|) + \leq\Pr\big(|L(I)-\E L(I)|\geq \E L(I)\big) \end{displaymath} -Now we would like to apply Chernoff, but we don't have independence. Instead we -can raise both sides of the inequality to some power and apply Markov's -inequality. This is called the \emph{methods of moments}. We will do this in -the next lecture. +where the inequality follows from $E[L(I)] = \frac{|I|}{2}$. Now we would like +to apply Chernoff, but we don't have independence. Instead we can raise both +sides of the inequality to some power and apply Markov's inequality. This is +called the \emph{methods of moments}. We will do this in the next lecture. \end{proof} \bibliographystyle{alpha} -- cgit v1.2.3-70-g09d2