ps1/main.tex


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189

\documentclass[10pt]{article}
\usepackage{fullpage}
\usepackage{amsmath,amsfonts,amsthm}
\usepackage[english]{babel}
\usepackage[capitalize, noabbrev]{cleveref}

% these are compressed lists to help fit into a 1 page limit
\newenvironment{enumerate*}%
  {\vspace{-2ex} \begin{enumerate} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{enumerate}}
 
\newenvironment{itemize*}%
  {\vspace{-2ex} \begin{itemize} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{itemize}}
 
\newenvironment{description*}%
  {\vspace{-2ex} \begin{description} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{description}}

\DeclareMathOperator*{\E}{\mathbb{E}}
\let\Pr\relax
\DeclareMathOperator*{\Pr}{\mathbb{P}}

\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}}
\newcommand{\llbracket}{[\![}

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}

\author{Thibaut Horel}
\title{CS 229r Problem Set 1 -- Solutions}

\begin{document}

\maketitle
\section*{Problem 1}

\paragraph{(a)} Statement (6) is wrong. The number of min pointers is
not necessarily equal to the number of elements. Indeed, when you insert an
element, you have to insert it in both its own cluster and (possibly) in the
summary, leading to (possibly) more than one min pointer per element.

\paragraph{(c)} Let us denote by $N(u)$ the number of min pointers that you
have to create when inserting an element in a $u$-vEB tree. We have two cases:

\begin{itemize*}
    \item either a cluster already exists for that element, and then you just
        have to insert it in the cluster. In this case: $N(u)
        = N(\sqrt{u})$
    \item or you need to create a new cluster for this element (which creates
        one min pointer) and insert the cluster id in the summary. In this
        case: $N(u) = 1 + N(\sqrt{u})$.
\end{itemize*}
In both cases, we have $N(u) \leq 1 + N(\sqrt{u})$. Solving this recursion
gives $N(u) = O(\log\log u)$ number of min-pointers per element, which
in turns gives a space complexity of $O(n\log\log u)$ for a $u$-vEB tree.

\paragraph{(d)}

\paragraph{(e)} We can use indirection to achieve linear space complexity as
follows:
\begin{enumerate*}
    \item order the $n$ elements and organize them in $\frac{n}{\log u}$
        buckets of $\log u$ consecutive elements.
    \item for each bucket, create a balanced binary search tree (a sorted
        array is enough in the static case) containing its elements.
    \item create a $u$-vEB tree containing $\frac{n}{\log u}$ elements: the
        minimum element of each bucket. Each of them pointing to its associated
        bucket.
\end{enumerate*}

We can then answer a predecessor query by:
\begin{enumerate*}
    \item finding the right bucket by doing a predecessor query in the $u$-vEB
        tree in time $O\left(\log\log\left(\frac{n}{\log u}\right)\right)$.
    \item finding the predecessor in this bucket (either using the
        binary search structure, or doing a binary search in the case of
        a sorted array) in time $O(\log\log u)$ since the bucket only contains
        $\log u$ elements.
\end{enumerate*}

Since $n\leq u$, the overall time complexity is $O(\log\log u)$. The space
complexity is $O\left(\frac{n}{\log u}\log\log u\right)$ for the $u$-vEB tree
and $O(n)$ for the binary search trees, which is $O(n)$ overall.

\section*{Problem 2}

Let us define:
\begin{displaymath}
    m\eqdef \sum_{j=1}^{\sqrt{w}} 2^{m_j} \quad\text{with}\quad
    m_j\eqdef w-j\sqrt{w} + j.
\end{displaymath}
We have:
\begin{equation}\label{eq:dec}
    y\eqdef\left(\sum_{i=1}^{\sqrt{w}} x_i 2^{i\sqrt{w}-1}\right)\times m
    = \sum_{i=1}^{\sqrt{w}}\sum_{j=1}^{\sqrt{w}} x_i 2^{m_j+i\sqrt{w} -1}
\end{equation}

\begin{lemma}\label{lemma:oto}
    The function $(i,j) \mapsto m_j + i\sqrt{w} -1$ defined on
    $\{1,\ldots,\sqrt{w}\}^2$ is one-to-one.
\end{lemma}

\begin{proof}
    Let us assume that $m_j + i\sqrt{w} - 1 = m_l + k\sqrt{w} - 1$ for
    $(i,j,k,l)\in\{1,\ldots,\sqrt{w}\}^4$. Then using the definiton of $m_j$
    and $m_l$, we have:
    \begin{displaymath}
        \big[(i-j) - (k-l)\big]\sqrt{w} = l-j.
    \end{displaymath}
    Since $1-\sqrt{w}\leq l-j\leq \sqrt{w}-1$, this implies that $l-j = 0$ and
    $(i-j) - (k-l) = 0$. This in turn implies that $i=k$ and $l=j$.
\end{proof}

\cref{lemma:oto} allows us to interpret \cref{eq:dec} as the base
2 decomposition of $y$ (since all the exponents are distinct). Furthermore, for
  $i=j$, $m_i + i\sqrt{w} = w+i-1$, which shows that the bits between position
  $w$ and $w+\sqrt{w}-1$ will contain the bits of $x$ in consecutive order.
  Then, we simply shift $y$ by $w$ to the right ($t=w$) and keep the rightmost
  $\sqrt{w}$ bits (by masking) to obtain what we want. To summarize:

\begin{displaymath}
    \boxed{
    m= \sum_{j=1}^{\sqrt{w}} 2^{w-j\sqrt{w}+j} \quad\text{and}\quad
t = w}
\end{displaymath}

\section*{Problem 3}

Using Problem 2 and what we did in class, we assume that we have access to the
following procedures:
\begin{itemize*}
    \item \texttt{perfect-sketch}: takes a word $x$ of length $w$ whose bits
        are all $0$ except possibly the ones at position $i\sqrt{w}-1$, $1\leq
        i\leq\sqrt{w}$ and compress them (preserving their order) to positions
        $0$ to $\sqrt{w}-1$. This is achieved using Problem 2.
        \begin{center}
            \texttt{perfect-sketch(x) := ((x * m) >> w) \& (1 << $\sqrt{\texttt{w}}$ - 1)}
        \end{center}
    \item \texttt{count-ones}: takes a word of length $w$ whose bits are all $0$
        except possibly the ones at position $i\sqrt{w} -1$, $1\leq i\leq
        \sqrt{w}$ and counts the number of ones in this word. This is achieved
        by multiplying $x$ by $a\eqdef\sum_{j=0}^{\sqrt{w}-1}2^j$. The number of ones
        can be read between positions $w-1$ and $\sqrt{w}+w -2$ which can be
        extracted by shifting by $w-1$ to the right and masking to keep only
        the rightmost $\sqrt{w}$ bits.
        \begin{center}
            \texttt{count-ones(x) := ((x * a) >> (w - 1)) \& (1 << $\sqrt{\texttt{w}}$ - 1)}
        \end{center}
    \item \texttt{non-empty-clusters}: takes a word $x$ of length $w$ seen as
        the concatenation of $\sqrt{w}$ clusters of length $\sqrt{w}$ and
        returns a word of length $w$ which is the concatenation of clusters of
        length $\sqrt{w}$, each cluster being all zero, except for the leading
        bit indicating whether of not the corresponding cluster in $x$ is
        non-zero. This is achieved by defining $b\eqdef\sum_{j=1}^{\sqrt{w}}
        2^{j\sqrt{w}-1}$ and computing:
        \begin{center}
            \texttt{non-empty-clusters(x) := (x \& b) | ( b $\wedge$ ( b \& ( b - ( x $\wedge$ ( x \& b))))) }
        \end{center}

    \item \texttt{repeat}: takes a word $x$ of length $\sqrt{w}$ and returns a word
        of length $w$ obtained by concatenating $x$ to itself $\sqrt{w}$ times:
        \begin{center}
            \texttt{repeat(x) := (x * a)}
        \end{center}
\end{itemize*}
    
Finally, we will need one last constant, $c$ which is the concatenation of
$\sqrt{w}$ blocks of length $\sqrt{w}$, the $k$-th right most block being equal
to $2^k-1$, $c\eqdef \sum_{k=1}^{\sqrt{w}} (2^k-1)2^{(k-1)\sqrt{w}}$. We first
define a procedure \texttt{lsb-short} which takes a word $x$ of length
$\sqrt{w}$ and computes its least significant bit set to one:
\begin{center}
    \texttt{lsb-short(x) := count-ones(non-empty-clusters(repeat(x) \& c))}
\end{center}
Then, the least significant bit set to one can be computed by:
\begin{verbatim}
def lsb(x):
    z := lsb-short(perfect-sketch(non-empty-clusters(x)))
    return 1 << z + lsb-short((x >> z * \sqrt{w}) & (1 << \sqrt{w} - 1))
\end{verbatim}
In the above code, $z$ computes the position of the rightmost non empty
$\sqrt{w}$-block in $x$. The least significant bit set to one can then be
obtained by applying \texttt{lsb-short} to this block.
\end{document}