\documentclass[10pt]{article} \usepackage{fullpage} \usepackage{amsmath,amsfonts,amsthm} \usepackage[english]{babel} \usepackage[capitalize, noabbrev]{cleveref} % these are compressed lists to help fit into a 1 page limit \newenvironment{enumerate*}% {\vspace{-2ex} \begin{enumerate} % \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}% {\end{enumerate}} \newenvironment{itemize*}% {\vspace{-2ex} \begin{itemize} % \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}% {\end{itemize}} \newenvironment{description*}% {\vspace{-2ex} \begin{description} % \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}% {\end{description}} \DeclareMathOperator*{\E}{\mathbb{E}} \let\Pr\relax \DeclareMathOperator*{\Pr}{\mathbb{P}} \newcommand{\inprod}[1]{\left\langle #1 \right\rangle} \newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}} \newcommand{\llbracket}{[\![} \newtheorem{theorem}{Theorem} \newtheorem{lemma}{Lemma} \author{Thibaut Horel} \title{CS 229r Problem Set 1 -- Solutions} \begin{document} \maketitle \section*{Problem 1} \paragraph{(a)} Statement (6) is wrong. The number of min pointers is not necessarily equal to the number of elements. Indeed, when you insert an element, you have to insert it in both its own cluster and (possibly) in the summary, leading to (possibly) more than one min pointer per element. \paragraph{(c)} Let us denote by $N(u)$ the number of min pointers that you have to create when inserting an element in a $u$-vEB tree. We have two cases: \begin{itemize*} \item either a cluster already exists for that element, and then you just have to insert it in the cluster. In this case: $N(u) = N(\sqrt{u})$ \item or you need to create a new cluster for this element (which creates one min pointer) and insert the cluster id in the summary. In this case: $N(u) = 1 + N(\sqrt{u})$. \end{itemize*} In both cases, we have $N(u) \leq 1 + N(\sqrt{u})$. Solving this recursion gives $N(u) = O(\log\log u)$ number of min-pointers per element, which in turns gives a space complexity of $O(n\log\log u)$ for a $u$-vEB tree. \paragraph{(d)} \paragraph{(e)} We can use indirection to achieve linear space complexity as follows: \begin{enumerate*} \item order the $n$ elements and organize them in $\frac{n}{\log u}$ buckets of $\log u$ consecutive elements. \item for each bucket, create a balanced binary search tree (a sorted array is enough in the static case) containing its elements. \item create a $u$-vEB tree containing $\frac{n}{\log u}$ elements: the minimum element of each bucket. Each of them pointing to its associated bucket. \end{enumerate*} We can then answer a predecessor query by: \begin{enumerate*} \item finding the right bucket by doing a predecessor query in the $u$-vEB tree in time $O\left(\log\log\left(\frac{n}{\log u}\right)\right)$. \item finding the predecessor in this bucket (either using the binary search structure, or doing a binary search in the case of a sorted array) in time $O(\log\log u)$ since the bucket only contains $\log u$ elements. \end{enumerate*} Since $n\leq u$, the overall time complexity is $O(\log\log u)$. The space complexity is $O\left(\frac{n}{\log u}\log\log u\right)$ for the $u$-vEB tree and $O(n)$ for the binary search trees, which is $O(n)$ overall. \section*{Problem 2} Let us define: \begin{displaymath} m\eqdef \sum_{j=1}^{\sqrt{w}} 2^{m_j} \quad\text{with}\quad m_j\eqdef w-j\sqrt{w} + j. \end{displaymath} We have: \begin{equation}\label{eq:dec} y\eqdef\left(\sum_{i=1}^{\sqrt{w}} x_i 2^{i\sqrt{w}-1}\right)\times m = \sum_{i=1}^{\sqrt{w}}\sum_{j=1}^{\sqrt{w}} x_i 2^{m_j+i\sqrt{w} -1} \end{equation} \begin{lemma}\label{lemma:oto} The function $(i,j) \mapsto m_j + i\sqrt{w} -1$ defined on $\{1,\ldots,\sqrt{w}\}^2$ is one-to-one. \end{lemma} \begin{proof} Let us assume that $m_j + i\sqrt{w} - 1 = m_l + k\sqrt{w} - 1$ for $(i,j,k,l)\in\{1,\ldots,\sqrt{w}\}^4$. Then using the definiton of $m_j$ and $m_l$, we have: \begin{displaymath} \big[(i-j) - (k-l)\big]\sqrt{w} = l-j. \end{displaymath} Since $1-\sqrt{w}\leq l-j\leq \sqrt{w}-1$, this implies that $l-j = 0$ and $(i-j) - (k-l) = 0$. This in turn implies that $i=k$ and $l=j$. \end{proof} \cref{lemma:oto} allows us to interpret \cref{eq:dec} as the base 2 decomposition of $y$ (since all the exponents are distinct). Furthermore, for $i=j$, $m_i + i\sqrt{w} = w+i-1$, which shows that the bits between position $w$ and $w+\sqrt{w}-1$ will contain the bits of $x$ in consecutive order. Then, we simply shift $y$ by $w$ to the right ($t=w$) and keep the rightmost $\sqrt{w}$ bits (by masking) to obtain what we want. To summarize: \begin{displaymath} \boxed{ m= \sum_{j=1}^{\sqrt{w}} 2^{w-j\sqrt{w}+j} \quad\text{and}\quad t = w} \end{displaymath} \section*{Problem 3} Using Problem 2 and what we did in class, we assume that we have access to the following procedures: \begin{itemize*} \item \texttt{perfect-sketch}: takes a word $x$ of length $w$ whose bits are all $0$ except possibly the ones at position $i\sqrt{w}-1$, $1\leq i\leq\sqrt{w}$ and compress them (preserving their order) to positions $0$ to $\sqrt{w}-1$. This is achieved using Problem 2. \begin{center} \texttt{perfect-sketch(x) := ((x * m) >> w) \& (1 << $\sqrt{\texttt{w}}$ - 1)} \end{center} \item \texttt{count-ones}: takes a word of length $w$ whose bits are all $0$ except possibly the ones at position $i\sqrt{w} -1$, $1\leq i\leq \sqrt{w}$ and counts the number of ones in this word. This is achieved by multiplying $x$ by $a\eqdef\sum_{j=0}^{\sqrt{w}-1}2^j$. The number of ones can be read between positions $w-1$ and $\sqrt{w}+w -2$ which can be extracted by shifting by $w-1$ to the right and masking to keep only the rightmost $\sqrt{w}$ bits. \begin{center} \texttt{count-ones(x) := ((x * a) >> (w - 1)) \& (1 << $\sqrt{\texttt{w}}$ - 1)} \end{center} \item \texttt{non-empty-clusters}: takes a word $x$ of length $w$ seen as the concatenation of $\sqrt{w}$ clusters of length $\sqrt{w}$ and returns a word of length $w$ which is the concatenation of clusters of length $\sqrt{w}$, each cluster being all zero, except for the leading bit indicating whether of not the corresponding cluster in $x$ is non-zero. This is achieved by defining $b\eqdef\sum_{j=1}^{\sqrt{w}} 2^{j\sqrt{w}-1}$ and computing: \begin{center} \texttt{non-empty-clusters(x) := (x \& b) | ( b $\wedge$ ( b \& ( b - ( x $\wedge$ ( x \& b))))) } \end{center} \item \texttt{repeat}: takes a word $x$ of length $\sqrt{w}$ and returns a word of length $w$ obtained by concatenating $x$ to itself $\sqrt{w}$ times: \begin{center} \texttt{repeat(x) := (x * a)} \end{center} \end{itemize*} Finally, we will need one last constant, $c$ which is the concatenation of $\sqrt{w}$ blocks of length $\sqrt{w}$, the $k$-th right most block being equal to $2^k-1$, $c\eqdef \sum_{k=1}^{\sqrt{w}} (2^k-1)2^{(k-1)\sqrt{w}}$. We first define a procedure \texttt{lsb-short} which takes a word $x$ of length $\sqrt{w}$ and computes its least significant bit set to one: \begin{center} \texttt{lsb-short(x) := count-ones(non-empty-clusters(repeat(x) \& c))} \end{center} Then, the least significant bit set to one can be computed by: \begin{verbatim} def lsb(x): z := lsb-short(perfect-sketch(non-empty-clusters(x))) return 1 << z + lsb-short((x >> z * \sqrt{w}) & (1 << \sqrt{w} - 1)) \end{verbatim} In the above code, $z$ computes the position of the rightmost non empty $\sqrt{w}$-block in $x$. The least significant bit set to one can then be obtained by applying \texttt{lsb-short} to this block. \end{document}