summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lecture/main.tex27
1 files changed, 15 insertions, 12 deletions
diff --git a/lecture/main.tex b/lecture/main.tex
index 8b273ca..0f548ab 100644
--- a/lecture/main.tex
+++ b/lecture/main.tex
@@ -189,13 +189,15 @@ the following definitions.
\begin{definition}
A set of hash functions $\H$ is a \emph{$k$-wise independent family} iff
the random variables $h(0), h(1),\ldots,h(u-1)$ are $k$-wise independent
- and uniformly distributed in $[m]$ when $h\in\H$ is drawn uniformly at
- random.
+ when $h\in\H$ is drawn uniformly at random.
\end{definition}
\begin{example}
The set $\H$ of all functions from $[u]$ to $[m]$ is $k$-wise independent
- for all $k$.
+ for all $k$. Note that this family of functions is impractical for many
+ applications since you need $O(u\log m)$ bits to store a totally random
+ function. For a hash table, this is already larger than the space of the
+ data structure itself, $O(n)$.
\end{example}
\begin{example}[$u=m=p$, $p$ prime] We define $\H = \big\{h: h(x)
@@ -216,7 +218,7 @@ the following definitions.
We define $\H = \big\{h: h(x) = \big(\sum_{i=0}^{k-1}a_i x^i \bmod p\big)
\bmod m\big\}$. We see that $\Pr(h(x_1) = y_1,\ldots, h(x_k) = y_k)\leq
\left(\frac{2}{m}\right)^k$. Note that in this example,
- $h(x_1),\ldots,h(x_k)$ does not behave exactly $k$ independent uniformly
+ $h(x_1),\ldots,h(x_k)$ does not behave exactly like $k$ independent uniformly
distributed variables, but this bound is sufficient in most applications.
\end{example}
@@ -253,7 +255,7 @@ In this problem there is some set $S\subset[u]$ where $|S| = n$, and we would li
In real architectures, sequential memory accesses are much faster. A solution
to the dictionary problem with better spacial locality is \emph{linear
probing}. In this solution, the hash table is an array $A$, but unlike the
-solution with chaining and linked lists, the values are stored directrly in
+solution with chaining and linked lists, the values are stored directly in
$A$. More specifically, the value associated with key $k$ is stored at $h(k)$.
If this cell is already taken, we start walking to the right, one cell at
a time\footnote{We wrap back to the beginning of the array when we reach its
@@ -288,7 +290,7 @@ that this expected number is constant.
\end{definition}
\begin{definition}
- We say that $I$ is \emph{full} iff $|L(I)| \geq |I|$.
+ We say that $I$ is \emph{full} iff $L(I) \geq |I|$.
\end{definition}
\begin{lemma}
@@ -312,18 +314,19 @@ By applying the previous lemma:
&\leq \E[\text{number of full intervals
containing $h(x)$}]\\
&\leq \sum_{k\geq 1}\sum_{\substack{I, |I|=k\\h(x)\in I}} \Pr(I\text{
- full}) \leq \sum_{k\geq 1}k \max_{I, |I|=k, h(x)\in I}\Pr(I\text{ full})
+ full}) \leq \sum_{k\geq 1}k \max_{\substack{I, |I|=k\\ h(x)\in I}}\Pr(I\text{ full})
\end{align*}
We need to show that the $\max$ in the previous sum is
$O\left(\frac{1}{k^3}\right)$ so that the sum converges. We assume that $m=2n$
where $n$ is the number of different keys active.
\begin{displaymath}
- \Pr(I\text{ full}) = \Pr(|L(I)\geq |I|)\leq\Pr\big(|L(I)-\E L(I)|\geq \E L(I)\big)
+ \Pr(I\text{ full}) = \Pr(L(I)\geq |I|)
+ \leq\Pr\big(|L(I)-\E L(I)|\geq \E L(I)\big)
\end{displaymath}
-Now we would like to apply Chernoff, but we don't have independence. Instead we
-can raise both sides of the inequality to some power and apply Markov's
-inequality. This is called the \emph{methods of moments}. We will do this in
-the next lecture.
+where the inequality follows from $E[L(I)] = \frac{|I|}{2}$. Now we would like
+to apply Chernoff, but we don't have independence. Instead we can raise both
+sides of the inequality to some power and apply Markov's inequality. This is
+called the \emph{methods of moments}. We will do this in the next lecture.
\end{proof}
\bibliographystyle{alpha}