\documentclass[10pt]{article} \usepackage{fullpage} \usepackage[utf8x]{inputenc} \usepackage{amsmath,amsfonts,amsthm} \usepackage[english]{babel} \usepackage[capitalize, noabbrev]{cleveref} \newenvironment{enumerate*}% {\vspace{-2ex} \begin{enumerate} % \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}% {\end{enumerate}} \newenvironment{itemize*}% {\vspace{-2ex} \begin{itemize} % \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}% {\end{itemize}} \newenvironment{description*}% {\vspace{-2ex} \begin{description} % \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}% {\end{description}} \DeclareMathOperator{\E}{\mathbb{E}} \let\P\relax \DeclareMathOperator{\P}{\mathbb{P}} \newcommand{\ex}[1]{\E\left[#1\right]} \newcommand{\prob}[1]{\P\left[#1\right]} \newcommand{\inprod}[1]{\left\langle #1 \right\rangle} \newcommand{\R}{\mathbf{R}} \newcommand{\N}{\mathbf{N}} \newcommand{\C}{\mathcal{C}} \newcommand{\eps}{\varepsilon} \newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}} \newcommand{\llbracket}{[\![} \newtheorem{theorem}{Theorem} \newtheorem{lemma}{Lemma} \author{Thibaut Horel} \title{CS 224 Problem Set 3 -- Solutions} \begin{document} \maketitle \section*{Problem 1} \paragraph{(a)} We change the weight function used in class, we now define for $i\in\{1,\ldots, n\}$: \begin{displaymath} w_i = \frac{1}{3^{l_i}} + \frac{1}{n} \end{displaymath} where $l_i$ is the height of $i$ in tree $T$. Let $W$ be the sum of the weights of the items in the splay tree. We have: \begin{displaymath} W \leq \sum_{i=1}^n \frac{1}{3^{l_i}} + \sum_{i=1}^n \frac{1}{n} \leq \sum_{h\geq 0} \left(\frac{2}{3}\right)^h + 1 \leq 3 = O(1) \end{displaymath} and for a given $i$ we have: \begin{displaymath} s(i) \geq w_i \geq \frac{1}{3^{l_i}} \end{displaymath} We know that the amortized cost of $\textsc{splay}(i)$ is $O\left(1+\log\frac{W}{s(i)}\right)$. Using the above two inequalities, this is $O(1+l_i)$. Hence, the amortized cost of $\sigma$ is $O(m + \sum_i m_i l_i) = O\left(m + C_T(\sigma)\right)$. We now want to bound the range of the potential function $\Phi = \sum_{i=1}^n\log(s_i)$. We use: \begin{displaymath} \frac{1}{n}\leq w_i\leq s_i\leq s_{root} = W = O(1) \end{displaymath} To obtain that $\Phi\in[-C'n\log n, Cn]$ for some constants $C$ and $C'$. As a consequence $-\Delta\Phi$, the opposite of the change in potential, is $O(n\log n)$ for any sequence of operations The true cost $S(\sigma)$ is simply the amortized cost minus $\Delta\Phi$. From what precedes: \begin{displaymath} S(\sigma) = O\left(m+C_T(\sigma) + n\log n\right) \end{displaymath} \paragraph{(b)} It suffices to show that $C_T(\sigma) = \sum_{i=1}^n m_il_i = \Omega (n\log n)$. Since $m_i\geq 1$ for all $i$, it suffices to show $\sum_{i=1}^n l_i = \Omega(n\log n)$. Without loss of generality, we can assume that the $l_i$'s are ranked in increasing order: $l_1\leq\ldots\leq l_n$. It is easy to see that $l_i\geq \log i$ (the maximum height of an element in a tree containing $i$ elements is at least $\log i$). Hence: \begin{displaymath} \sum_{i=1}^n l_i \geq \sum_{i=1}^n \log i = \Theta(n\log n) \end{displaymath} The last equality coming from $\sum_{i=1}^n \log i = \log n! \sim n\log n$ using Stirling's approximation. This concludes the question. \section*{Problem 2} Let us consider a Fibonacci heap of rank $k$ and let us call $x_1,\ldots,x_k$ the $k$ children of the root in increasing order of degrees, and let us denote by $d_1\leq\ldots\leq d_k$ these degrees. First, we observe that for $i\geq 2$, $d_i\geq i-2$. Indeed, when the tree rooted at $x_i$ was linked to the root during a consolidation we had $d_i = i-1$ (since we always merge trees of the same rank). Since then $x_i$ has lost at most one node (otherwise it would have been cut). Let us denote by $m_k$ the minimum size of our Fibonacci heap of rank $k$. We have: \begin{displaymath} m_k = 2 + \sum_{i=2}^k m_{d_i} \geq 2 + \sum_{i=2}^k m_{i-2} \end{displaymath} Further more we have $m_1 = 2$ and $m_0 = 1$. We now prove by induction $m_k \geq F_{k+2}$. $k=0$ and $k=1$ are trivial. Let us assume that the result is true for some $k$, then: \begin{displaymath} m_{k+1} = 2 + \sum_{i=2}^{k+1} m_{i-2} \geq 2 + \sum_{i=2}^{k+1} F_i = 1+\sum_{i=0}^{k+1} F_i = F_{k+3} \end{displaymath} The last equality is a common property of Fibonacci numbers which is easily proved by induction. This concludes the proof of this problem. \section*{Problem 3} \paragraph{(a)} The amortized cost of \textsc{decrease-key} is reduced as $k$ increases. Intuitively, cuts occur less frequently. Formally, we consider the modified potential function: \begin{displaymath} \Phi(T) = \#trees + \frac{2}{k-1}\sum_{x\in T} w(x) \end{displaymath} where $w(x)$ is the number of children that $x$ has lost (instead of being a binary mark as in standard Fibonacci heaps). We reset $w(x)$ to 0 when $x$ cuts itself from its parent. The analysis of the amortized cost of \textsc{decrease-key} is now straightforward: \begin{itemize} \item either there is no cut, in which case the actual cost as well as $\Delta\Phi$ are $O(1)$. \item or there is a cascade of $c$ cuts: $x_1,\ldots, x_c$. Then: \begin{displaymath} \Delta\Phi = \Delta_{trees} - \frac{2}{k-1}\sum_{i=1}^{c-1} (k-1) + \frac{2}{k-1} \end{displaymath} since we reset the weights of $c-1$ nodes (we are cutting them so their weight was exactly $k-1$) and increment the weight of one node (the parent of the last node to be cut). $\Delta_{trees}$ is simply $c$, so we get: \begin{displaymath} \Delta\Phi = c - 2(c-1) + \frac{2}{k-1} \end{displaymath} since the actual cost is $c$, we get that in this case the amortized cost is $O(1+\frac{1}{k})$. \end{itemize} Overall the amortized cost is $O(1+\frac{1}{k})$ and decreases as $k$ increases. \paragraph{(b)} The cost of \textsc{delete-min} however increases. Indeed, we proved in class that the amortized cost of this operation is equal to the number of trees $t$ after consolidation (this proof carries). Our Fibonacci trees of a given rank now contain less elements so we have to use more trees to store all the elements. Formally, let $m_r$ be the minimum size of a ($k$-relaxed) Fibonacci heap of rank $r$. We can prove exactly as in Problem 2 that: \begin{displaymath} m_r \geq k + \sum_{i=k}^r m_{i-k} \end{displaymath} which implies: \begin{displaymath} m_r \geq c_k^r \end{displaymath} where $c_k$ is the largest root of the polynomial $x^k - x^{k-1} -1$. This implies that $t\leq \log_{c_k}n$. It is easy to see that $\log c_k = O(\frac{\log k}{k})$, hence the amortized cost of \textsc{delete-min} is $O(\frac{k\log n}{\log k})$. \section*{Problem 4} \paragraph{(a)} \textsc{find} can be implemented in worst-case $O(\log n)$ time. We simply use the standard \textsc{find} procedure in a BST, ignoring the marks of the nodes. The only difference being that we return ``not present'' if the search ends on a marked node. It is easy to see that a tree of $n$ elements has depth $O(\log 2n)$ (the worse which can happen is that it was obtained from $n$ deletions in a balanced binary tree of size $2n$), giving a time complexity $O(\log n)$ in the worst case. \paragraph{} A rebuilding occurs when there are $2m+1 = \frac{n-1}{2} +1$ elements left in the tree. We first note that this rebuilding procedure can be implemented in time $O(n)$: \begin{itemize} \item by doing a left-first depth-first traversal, we obtain the $2m + 1$ remaining elements in the tree in increasing sorted order: $x_1\leq\ldots\leq x_{2m+1}$ \item we reconstruct the balanced binary tree $B(x_1,\ldots,x_{2m+1})$ using a simple divide-and-conquer algorithm, by choosing $x_m$ as the root, and defining the left subtree of $x_m$ to be $B(x_1,\ldots,x_{m-1})$ and the right subtree of $x_m$ to be $B(x_{m+1},\ldots,x_{2m+1})$. \end{itemize} The running time of this procedure is $O(m) = O(n)$. The amortized running time of \textsc{delete} is $O(\log n)$. Intuitively, rebuildings occur every $O(n)$ deletions for a cost $O(n)$: their amortized cost is $O(1)$. Formally, we introduce the potential function $\Phi(T) = |T| - \textsc{size}(\text{root})$, the number of marked nodes in $T$. There are two cases to compute the amortized cost of a deletion: \begin{itemize*} \item either no rebuilding occurs, the actual cost is $O(\log n)$ (we find the node to be deleted and mark it) and $\Delta\Phi = 1$ \item or a rebuilding occurs, in this case the actual cost is $O(n + \log n)$ and $\Delta\Phi = -Cn$ for some $C>0$ (since a rebuilding occurs, $\Phi_{\text{initial}} = O(n)$ and $\Phi_{\text{final}} = 0$. By adapting the constants We see that $\Delta\Phi$ compensates the rebuilding cost and the amortized cost is $O(\log n)$ again in this case. \end{itemize*} \paragraph{(b)} The height of this tree, $h(T)$ always satisfies $h(T) \leq \alpha\log n$ (the balancedness constraint is true for the root in particular). As a consequence \textsc{find} can be implemented in worst-case time $O(\alpha\log n) = O(\log n)$. We now introduce a potential function to analyze the amortized cost of \textsc{insert}. For a node $x$ we define $x_l$ and $x_r$ the left and right subtrees of $x$, and its weight $w(x)$: \begin{displaymath} w(x) \eqdef \max\left(|\textsc{size}(x_l)-\textsc{size}(x_r)| -1, 0\right) \end{displaymath} and the potential function $\Phi(T)$: \begin{displaymath} \Phi(T) = \sum_{x\in T} w(x) \end{displaymath} We first note that if the subtree rooted at $x$ is perfectly balanced, $w(x) = 0$ (the size of $x_l$ and $x_r$ defer by one at most). Second, we show that whenever a rebalancing occurs at $x$, $w(x) \geq C\cdot\textsc{size}(x)$ for some $C>0$. If a rebalancing occurs at $x$, this is because an element was inserted in the subtree rooted at $x$. Without loss of generality, we can assume that this element was inserted in $x_l$, so that: \begin{equation}\label{eq:1} \textsc{height}(x) = 1 + \textsc{height}(x_l) \end{equation} Furthermore, since a rebalancing occurs, we have: \begin{equation}\label{eq:2} \textsc{height}(x) > \alpha \log \textsc{size}(x) \end{equation} Finally, if a rebalancing occurs at $x$ it means that $x_l$ is balanced (rebalancing are done from the inserted node to the root), hence: \begin{equation}\label{eq:3} \textsc{height}(x_l) \leq \alpha \log \textsc{size}(x_l) \end{equation} Combining \cref{eq:1,eq:2,eq:3} we obtain: \begin{displaymath} \alpha \log \textsc{size}(x_l)\geq \textsc{height}(x_l) = \textsc{height}(x) - 1 > \alpha \log \textsc{size}(x) - 1 \end{displaymath} Hence: \begin{equation}\label{eq:4} \textsc{size}(x_l)\geq \frac{1}{2^{1/\alpha}}\textsc{size}(x) \end{equation} Using $\textsc{size}(x) = 1 + \textsc{size}(x_l) + \textsc{size}(x_r)$ and \cref{eq:4} we obtain: \begin{equation}\label{eq:5} \textsc{size}(x_r)\leq \left(1-\frac{1}{2^{1/\alpha}}\right)\textsc{size}(x) - 1 \end{equation} Finally, combining \cref{eq:4,eq:5}: \begin{displaymath} w(x) = \textsc{size}(x_l) - \textsc{right}(x_l) -1 \geq \left(2^{1-1/\alpha}-1\right)\textsc{size}(x) \end{displaymath} We can now analyze the amortized cost of \textsc{insert}. Let us assume that the insertion of an element caused a cascade of rebalancing at nodes $x_1,\ldots, x_k$. Then the actual cost of this insertion is: \begin{displaymath} O(\log n) + \textsc{size}(x_1) + \ldots + \textsc{size}(x_k) \end{displaymath} since we saw in \textbf{(a)} that rebuilding a BST of $n$ elements can be done in $O(n)$ time. Furthermore the difference in potential is: \begin{displaymath} - w(x_1) - \ldots - w(x_k) \end{displaymath} since the weight of a perfectly balanced node is 0 and only the weights of $x_1$ to $x_k$ are changing. Using the fact that $w(x_i) \geq C\textsc{size}(x_i)$ and adjusting the constants, we see that the difference in potential compensates for the cost of rebuilding the substrees. This shows that the amortized cost of \textsc{insert} is $O(\log n)$. \section*{Problem 5} Problem 1: 2h, Problem 2: 45min, Problem 3: 3h, Problem 4: 2h15min. Total: 8h. \end{document}