\documentclass[10pt]{article}
\usepackage{fullpage}
\usepackage[utf8x]{inputenc}
\usepackage{amsmath,amsfonts,amsthm}
\usepackage[english]{babel}
\usepackage[capitalize, noabbrev]{cleveref}

\newenvironment{enumerate*}%
  {\vspace{-2ex} \begin{enumerate} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{enumerate}}
 
\newenvironment{itemize*}%
  {\vspace{-2ex} \begin{itemize} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{itemize}}
 
\newenvironment{description*}%
  {\vspace{-2ex} \begin{description} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{description}}

\DeclareMathOperator{\E}{\mathbb{E}}
\let\P\relax
\DeclareMathOperator{\P}{\mathbb{P}}
\newcommand{\ex}[1]{\E\left[#1\right]}
\newcommand{\prob}[1]{\P\left[#1\right]}
\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
\newcommand{\R}{\mathbf{R}}
\newcommand{\N}{\mathbf{N}}
\newcommand{\C}{\mathcal{C}}
\newcommand{\eps}{\varepsilon}
\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}}
\newcommand{\llbracket}{[\![}

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}

\author{Thibaut Horel}
\title{CS 224 Problem Set 3 -- Solutions}

\begin{document}
\maketitle

\section*{Problem 1}

\paragraph{(a)} We change the weight function used in class, we now define for
$i\in\{1,\ldots, n\}$:
\begin{displaymath}
    w_i = \frac{1}{3^{l_i}} + \frac{1}{n}
\end{displaymath}
where $l_i$ is the height of $i$ in tree $T$. Let $W$ be the sum of the weights
of the items in the splay tree. We have:
\begin{displaymath}
    W \leq \sum_{i=1}^n \frac{1}{3^{l_i}} + \sum_{i=1}^n \frac{1}{n}
    \leq \sum_{h\geq 0} \left(\frac{2}{3}\right)^h + 1 \leq 3 = O(1)
\end{displaymath}
and for a given $i$ we have:
\begin{displaymath}
    s(i) \geq w_i \geq \frac{1}{3^{l_i}}
\end{displaymath}
We know that the amortized cost of $\textsc{splay}(i)$ is
$O\left(1+\log\frac{W}{s(i)}\right)$. Using the above two inequalities, this is
$O(1+l_i)$. Hence, the amortized cost of $\sigma$ is $O(m + \sum_i m_i l_i)
= O\left(m + C_T(\sigma)\right)$.

We now want to bound the range of the potential function $\Phi
= \sum_{i=1}^n\log(s_i)$. We use:
\begin{displaymath}
    \frac{1}{n}\leq w_i\leq s_i\leq s_{root} = W = O(1)
\end{displaymath}
To obtain that $\Phi\in[-C'n\log n, Cn]$ for some constants $C$ and $C'$.  As
a consequence $-\Delta\Phi$, the opposite of the change in potential, is
$O(n\log n)$ for any sequence of operations

The true cost $S(\sigma)$ is simply the amortized cost minus $\Delta\Phi$. From
what precedes:
\begin{displaymath}
    S(\sigma) = O\left(m+C_T(\sigma) + n\log n\right)
\end{displaymath}

\paragraph{(b)} It suffices to show that $C_T(\sigma) = \sum_{i=1}^n m_il_i
= \Omega (n\log n)$. Since $m_i\geq 1$ for all $i$, it suffices to show $\sum_{i=1}^n
l_i = \Omega(n\log n)$. Without loss of generality, we can assume that the $l_i$'s
are ranked in increasing order: $l_1\leq\ldots\leq l_n$. It is easy to see that
$l_i\geq \log i$ (the maximum height of an element in a tree containing $i$
elements is at least $\log i$). Hence:
\begin{displaymath}
    \sum_{i=1}^n l_i \geq \sum_{i=1}^n \log i = \Theta(n\log n)
\end{displaymath}
The last equality coming from $\sum_{i=1}^n \log i = \log n! \sim n\log n$
using Stirling's approximation. This concludes the question.

\section*{Problem 2}
Let us consider a Fibonacci heap of rank $k$ and let us call $x_1,\ldots,x_k$
the $k$ children of the root in increasing order of degrees, and let us denote
by $d_1\leq\ldots\leq d_k$ these degrees.

First, we observe that for $i\geq 2$, $d_i\geq i-2$. Indeed, when the tree
rooted at $x_i$ was linked to the root during a consolidation we had $d_i
= i-1$ (since we always merge trees of the same rank). Since then $x_i$ has
lost at most one node (otherwise it would have been cut).

Let us denote by $m_k$ the minimum size of our Fibonacci heap of rank $k$. We
have:
\begin{displaymath}
    m_k = 2 + \sum_{i=2}^k m_{d_i} \geq 2 + \sum_{i=2}^k m_{i-2}
\end{displaymath}
Further more we have $m_1 = 2$ and $m_0 = 1$. We now prove by induction $m_k
\geq F_{k+2}$. $k=0$ and $k=1$ are trivial. Let us assume that the result is
true for some $k$, then:
\begin{displaymath}
    m_{k+1} = 2 + \sum_{i=2}^{k+1} m_{i-2} \geq 2 + \sum_{i=2}^{k+1} F_i
    = 1+\sum_{i=0}^{k+1} F_i = F_{k+3}
\end{displaymath}
The last equality is a common property of Fibonacci numbers which is easily
proved by induction. This concludes the proof of this problem.

\section*{Problem 3}

\paragraph{(a)}
The amortized cost of \textsc{decrease-key} is reduced as $k$ increases.
Intuitively, cuts occur less frequently. Formally, we consider the modified
potential function:
\begin{displaymath}
    \Phi(T) = \#trees + \frac{2}{k-1}\sum_{x\in T} w(x)
\end{displaymath}
where $w(x)$ is the number of children that $x$ has lost (instead of being
a binary mark as in standard Fibonacci heaps). We reset $w(x)$ to 0 when $x$
cuts itself from its parent.

The analysis of the amortized cost of \textsc{decrease-key} is now 
straightforward:
\begin{itemize}
    \item either there is no cut, in which case the actual cost as well as
        $\Delta\Phi$ are $O(1)$.
    \item or there is a cascade of $c$ cuts: $x_1,\ldots, x_c$. Then:
        \begin{displaymath}
            \Delta\Phi = \Delta_{trees} - \frac{2}{k-1}\sum_{i=1}^{c-1} (k-1)
            + \frac{2}{k-1}
        \end{displaymath}
        since we reset the weights of $c-1$ nodes (we are cutting them so their
        weight was exactly $k-1$) and increment the weight of one node (the
        parent of the last node to be cut). $\Delta_{trees}$ is simply $c$, so
        we get:
        \begin{displaymath}
            \Delta\Phi = c - 2(c-1) + \frac{2}{k-1}
        \end{displaymath}
        since the actual cost is $c$, we get that in this case the amortized
        cost is $O(1+\frac{1}{k})$.
\end{itemize}
Overall the amortized cost is $O(1+\frac{1}{k})$ and decreases as $k$
increases.

\paragraph{(b)} The cost of \textsc{delete-min} however increases. Indeed, we
proved in class that the amortized cost of this operation is equal to the
number of trees $t$ after consolidation (this proof carries). Our Fibonacci
trees of a given rank now contain less elements so we have to use more trees to
store all the elements.

Formally, let $m_r$ be the minimum size of a ($k$-relaxed) Fibonacci heap of
rank $r$. We can prove exactly as in Problem 2 that:
\begin{displaymath}
    m_r \geq k + \sum_{i=k}^r m_{i-k}
\end{displaymath}
which implies:
\begin{displaymath}
    m_r \geq c_k^r
\end{displaymath}
where $c_k$ is the largest root of the polynomial $x^k - x^{k-1}
-1$. This implies that $t\leq \log_{c_k}n$. It is easy to see that $\log c_k
= O(\frac{\log k}{k})$, hence the amortized cost of \textsc{delete-min} is
$O(\frac{k\log n}{\log k})$.

\section*{Problem 4}

\paragraph{(a)}
\textsc{find} can be implemented in worst-case $O(\log n)$ time. We simply use
the standard \textsc{find} procedure in a BST, ignoring the marks of the nodes.
The only difference being that we return ``not present'' if the search ends on
a marked node. It is easy to see that a tree of $n$ elements has depth $O(\log
2n)$ (the worse which can happen is that it was obtained from $n$ deletions in
a balanced binary tree of size $2n$), giving a time complexity $O(\log n)$ in
the worst case.

\paragraph{}

A rebuilding occurs when there are $2m+1 = \frac{n-1}{2} +1$ elements left in
the tree. We first note that this rebuilding procedure can be implemented in
time $O(n)$:
\begin{itemize}
    \item by doing a left-first depth-first traversal, we obtain the $2m + 1$
        remaining elements in the tree in increasing sorted order:
        $x_1\leq\ldots\leq x_{2m+1}$
    \item we reconstruct the balanced binary tree $B(x_1,\ldots,x_{2m+1})$
        using a simple divide-and-conquer algorithm, by choosing $x_m$ as the
        root, and defining the left subtree of $x_m$ to be
        $B(x_1,\ldots,x_{m-1})$ and the right subtree of $x_m$ to be
        $B(x_{m+1},\ldots,x_{2m+1})$.
\end{itemize}
The running time of this procedure is $O(m) = O(n)$.

The amortized running time of \textsc{delete} is $O(\log n)$. Intuitively,
rebuildings occur every $O(n)$ deletions for a cost $O(n)$: their amortized
cost is $O(1)$. Formally, we introduce the potential function $\Phi(T) = |T|
- \textsc{size}(\text{root})$, the number of marked nodes in $T$.  There are
two cases to compute the amortized cost of a deletion:
\begin{itemize*}
    \item either no rebuilding occurs, the actual cost is $O(\log n)$ (we find
        the node to be deleted and mark it) and $\Delta\Phi = 1$
    \item or a rebuilding occurs, in this case the actual cost is $O(n + \log
        n)$ and $\Delta\Phi = -Cn$ for some $C>0$ (since a rebuilding
        occurs, $\Phi_{\text{initial}} = O(n)$ and $\Phi_{\text{final}} = 0$.
        By adapting the constants We see that  $\Delta\Phi$ compensates the
        rebuilding cost and the amortized cost is $O(\log n)$ again in this
        case.
\end{itemize*}

\paragraph{(b)} The height of this tree, $h(T)$ always satisfies $h(T) \leq
\alpha\log n$ (the balancedness constraint is true for the root in particular).
As a consequence \textsc{find} can be implemented in worst-case time
$O(\alpha\log n) = O(\log n)$.

We now introduce a potential function to analyze the amortized cost of
\textsc{insert}. For a node $x$ we define $x_l$ and $x_r$ the left and right
subtrees of $x$, and its weight $w(x)$:
\begin{displaymath}
    w(x) \eqdef \max\left(|\textsc{size}(x_l)-\textsc{size}(x_r)| -1, 0\right)
\end{displaymath}
and the potential function $\Phi(T)$:
\begin{displaymath}
    \Phi(T) = \sum_{x\in T} w(x)
\end{displaymath}

We first note that if the subtree rooted at $x$ is perfectly balanced, $w(x)
= 0$ (the size of $x_l$ and $x_r$ defer by one at most).

Second, we show that whenever a rebalancing occurs at $x$, $w(x) \geq
C\cdot\textsc{size}(x)$ for some $C>0$. If a rebalancing occurs at $x$, this is
because an element was inserted in the subtree rooted at $x$. Without loss of
generality, we can assume that this element was inserted in $x_l$, so that:
\begin{equation}\label{eq:1}
    \textsc{height}(x) = 1 + \textsc{height}(x_l)
\end{equation}
Furthermore, since a rebalancing occurs, we have:
\begin{equation}\label{eq:2}
    \textsc{height}(x) > \alpha \log \textsc{size}(x)
\end{equation}
Finally, if a rebalancing occurs at $x$ it means that $x_l$ is balanced
(rebalancing are done from the inserted node to the root), hence:
\begin{equation}\label{eq:3}
    \textsc{height}(x_l) \leq \alpha \log \textsc{size}(x_l)
\end{equation}
Combining \cref{eq:1,eq:2,eq:3} we obtain:
\begin{displaymath}
    \alpha \log \textsc{size}(x_l)\geq \textsc{height}(x_l)
    = \textsc{height}(x) - 1 > \alpha \log \textsc{size}(x) - 1
\end{displaymath}
Hence:
\begin{equation}\label{eq:4}
    \textsc{size}(x_l)\geq \frac{1}{2^{1/\alpha}}\textsc{size}(x)
\end{equation}
Using $\textsc{size}(x) = 1 + \textsc{size}(x_l) + \textsc{size}(x_r)$ and
\cref{eq:4} we obtain:
\begin{equation}\label{eq:5}
    \textsc{size}(x_r)\leq \left(1-\frac{1}{2^{1/\alpha}}\right)\textsc{size}(x)
    - 1
\end{equation}
Finally, combining \cref{eq:4,eq:5}:
\begin{displaymath}
    w(x) = \textsc{size}(x_l) - \textsc{right}(x_l) -1
    \geq \left(2^{1-1/\alpha}-1\right)\textsc{size}(x)
\end{displaymath}

We can now analyze the amortized cost of \textsc{insert}. Let us assume that
the insertion of an element caused a cascade of rebalancing at nodes
$x_1,\ldots, x_k$. Then the actual cost of this insertion is:
\begin{displaymath}
    O(\log n) + \textsc{size}(x_1) + \ldots + \textsc{size}(x_k) 
\end{displaymath}
since we saw in \textbf{(a)} that rebuilding a BST of $n$ elements can be done in
$O(n)$ time. Furthermore the difference in potential is:
\begin{displaymath}
    - w(x_1) - \ldots - w(x_k) 
\end{displaymath}
since the weight of a perfectly balanced node is 0 and only the weights of
$x_1$ to $x_k$ are changing. Using the fact that $w(x_i) \geq
C\textsc{size}(x_i)$ and adjusting the constants, we see that the difference in
potential compensates for the cost of rebuilding the substrees. This shows that
the amortized cost of \textsc{insert} is $O(\log n)$.

\section*{Problem 5}

Problem 1: 2h, Problem 2: 45min, Problem 3: 3h, Problem 4: 2h15min. Total: 8h.
\end{document}