\documentclass[10pt]{article}
\usepackage{fullpage}
\usepackage[utf8x]{inputenc}
\usepackage{amsmath,amsfonts,amsthm}
\usepackage[english]{babel}
\usepackage[capitalize, noabbrev]{cleveref}
\usepackage{algorithm}
\usepackage{algpseudocode}

\newenvironment{enumerate*}%
  {\vspace{-2ex} \begin{enumerate} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{enumerate}}
 
\newenvironment{itemize*}%
  {\vspace{-2ex} \begin{itemize} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{itemize}}
 
\newenvironment{description*}%
  {\vspace{-2ex} \begin{description} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{description}}

\DeclareMathOperator{\E}{\mathbb{E}}
\let\P\relax
\DeclareMathOperator{\P}{\mathbb{P}}
\newcommand{\ex}[1]{\E\left[#1\right]}
\newcommand{\prob}[1]{\P\left[#1\right]}
\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
\newcommand{\R}{\mathbf{R}}
\newcommand{\N}{\mathbf{N}}
\newcommand{\C}{\mathcal{C}}
\newcommand{\eps}{\varepsilon}
\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}}
\newcommand{\llbracket}{[\![}

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}

\author{Thibaut Horel}
\title{CS 224 Problem Set 5 -- Solutions}
\date{October 27, 2014}

\begin{document}
\maketitle

\section*{Problem 1}

\paragraph{(a)}We are going to prove that $\text{OPT}\geq p_{max}$ and that
$\text{OPT}\geq\frac{1}{m}\sum_{i=1}^m p_i$.

The machine to which the job of maximum processing time is
allocated in $\text{OPT}$ will have a completion time of at least $p_{max}$.
Hence $\text{OPT}\geq p_{max}$. 

Let us assume by contradiction that $\text{OPT} < \frac{1}{m}\sum_{i=1}^m p_i$,
then this implies that each machine has a completion time bounded from above by
$\frac{1}{m}\sum_{i=1}^m p_i$, but then the total amount of processing time in
this allocation is strictly bounded from above by
$m\times\frac{1}{m}\sum_{i=1}^m p_i = \sum_{i=1}^m p_i$. This contradicts the
fact that all the tasks are processed and we conclude that
$\text{OPT}\geq\frac{1}{m}\sum_{i=1}^m p_i$.

\paragraph{(b)} Let us consider the machine of maximum completion time in the
allocation given by the greedy algorithm and let us consider the last time
a job was allocated to this machine. Without loss of generality, we can rename
the jobs so that the last job allocated to this machine is $p_n$.

Right before $p_n$ is allocated to this machine, its load is bounded from above
by $\frac{1}{m}\sum_{i=1}^{n-1}p_i$. Otherwise, by contradiction, since this
machine is currently the least loaded, it would implie that the total
processing time on all machines is bounded from below by $\sum_{i=1}^{n-1}
p_i$, this is a contradiction since at most, we have allocated jobs $p_1$ to
$p_{n-1}$.  Hence, after adding task $p_n$ to this machine, we can bound its
load, which is also the completion time of the greedy algorithm by:
\begin{align*}
    \textsc{Greedy} &\leq \frac{1}{m}\sum_{i=1}^{n-1}p_i + p_n
    \leq \frac{1}{m}\sum_{i=1}^n p_i +p_n\left(1-\frac{1}{m}\right)
    \leq \frac{1}{m}\sum_{i=1}^n p_i +p_{max}\left(1-\frac{1}{m}\right)\\
    &\leq \max\left\{\frac{1}{m}\sum_{i=1}^n p_i,
p_{max}\right\}\left(2-\frac{1}{m}\right)\leq\left(2-\frac{1}{m}\right)\text{OPT}
\end{align*}
where the last inequality comes from question \textbf{(a)}. This concludes the
proof of the approximation ratio of \textsc{Greedy}.

\paragraph{(c)} Consider the allocation after greedily assigning the jobs
in $S$ (on top of an allocation of load $L$ for the jobs in $B$). We
distinguish two cases:
\begin{itemize*}
    \item either the load remains $L$ after allocating the jobs in $B$.
    \item either the load is greater than $L$: in this case, the machine of
        maximum load (greater than $L$) has an element in $S$ assigned to it.
        Let us consider the last time a job in $S$ was assigned to it. Without
        loss of generality we can rename the jobs so that this job is named
        $p_n$.  Similarly to \textbf{(b)}, we can prove that the load of this
        machine at that time was bounded from above by
        $\frac{1}{m}\sum_{i=1}^{n-1}p_i$ (otherwise we get a contradiction
        using that it is the least loaded machine). Hence we can bound the load
        of this machine after adding this last job in $S$:
        \begin{displaymath}
            \text{Max Load} \leq \frac{1}{m}\sum_{i=1}^{n-1}p_i + p_n
            \leq \frac{1}{m}\sum_{i=1}^n p_i
            + \eps\text{OPT}\leq(1+\eps)\text{OPT}
        \end{displaymath}
        where the second inequality uses that $p_n\leq \eps\text{OPT}$ since it
        is a job in $S$ and the last inequality uses the bound of question
        \textbf{(a)}.
\end{itemize*}

\paragraph{(d)} An instance of the job scheduling problem with $k$ distinct job
processing times can be described by a $k$-tuple $(n_1,\ldots, n_k)$ with
$n_1+\ldots+n_k= n$ and $n_i$ is the number of jobs of type $i$ ($1\leq i\leq
k)$. We observe that this problem can be reduced to the bin packing problem:
there exists an allocation of load at most $T$ if and only if there exist
a packing of the jobs using at most $m$ bins where each bin has capacity at
most $T$.

This suggests the following dynamic programming algorithm: let us
denote by $M(i_1,\ldots,i_k)$ the minimum number of machines required to
complete the instance $(i_1,\ldots,i_k)$ in time less than $T$, then we have
the following induction formula:
\begin{equation}\label{eq:induction}
    1 + \min_{(j_1,\ldots,j_k)\in S(i_1,\ldots,i_k)} M(i_1 - j_1,\ldots,
    i_k-j_k)
\end{equation}
where $S(i_1,\ldots,i_k) = \{(j_1,\ldots,j_k)\in \{0,\ldots,
i_1\}\times\ldots\times\{0,\ldots, i_k\}\,|\, M(j_1,\ldots,j_k)=1\}$ is the set
of all subsets of the instance $(i_1,\ldots,i_k)$ which can fit on single
machine. The correctness of this induction formula is easy to establish: for
a given instance, we can first ``guess'' (i.e. try all possible) a sub-instance
to be allocated on a single machine, and then find the optimal allocation on
the remaining instance.

The set $S(i_1,\ldots, i_k)$ is easy to compute since $M(j_1,\ldots,j_k)=1$ if
and only if $\sum_{l=1}^k j_j t_l \leq T$, where $t_l$ is the processing time
of jobs of type $l$.

Then, we note that by computing the function $M$ in lexicographic order of its
argument, the induction is well-defined (i.e. we never depend on a value which
hasn't been computed yet). Hence, we can compute $M$ using a dynamic
programming algorithm: fill a $n_1\times\ldots\times n_k$ multi-array in
lexicographic order. When we are done computing $M$, either $M(n_1,\ldots,
n_k)\leq m$ and the allocation computed by the algorithm is an allocation of
load less than $T$, otherwise we report ``not feasible''.

Finally, the running time of equation \cref{eq:induction} is $O(n^k)$, since
$S(i_1,\ldots,i_k)$ can be computed in time $i_1\times\ldots\times i_k\leq
(n+1)^k$.  There are at $n_1\times\ldots\times n_k\leq n^k$ entries in the
multi-array.  Hence the total running time of this algorithm is $O(n^k\times
n^k) = O(n^{2k})$.

\paragraph{(e)} If we know $\text{OPT}$, we can compute $B$ and $S$. Then we
can round the jobs in $B$: for job $p_i$, we round it to
$\tilde{p}_i=\eps(1+\eps)^j$ where
$j=\lceil\log_{1+\eps}\frac{p_i}{\eps}\rceil$. By doing so, we have at most
$\lceil\log_{1+\eps}\frac{p_{max}}{\eps}\rceil$ distinct values for jobs in
$B$.

We can apply the dynamic programing algorithm of \textbf{(d)} to the rounded
jobs in $B$ and obtain an allocation of load $L'$. I claim that this leads to an
allocation of the (unrounded) jobs in $B$ of load at most $(1+\eps)\text{OPT}$.
Indeed, let us denote by $p_1,\ldots,p_k$ the jobs in $B$ in the maximally
loaded machine in the allocation given by $\text{OPT}$. Then:
\begin{displaymath}
    \text{OPT}\geq \sum_{i=1}^k p_k\geq \frac{1}{1+\eps}\sum_{i=1}^k
    \tilde{p_i}\geq \frac{1}{1+\eps} L'
\end{displaymath}
where the last inequality comes from the fact that $L'$ is the optimal load
(given by the dynamic algorithm) and thus beats the allocation given by
$\text{OPT}$ on the rounded jobs. Now, note that $L'$ is an upper bound on the
load for the un-rounded jobs (since $\tilde{p_i}\geq p_i$ for all $i$). Hence
we obtained an allocation of the jobs in $B$ of load $L\leq
(1+\eps)\text{OPT}$.

We can then complete the allocation by greedily allocation jobs in $S$, by
question \textbf{(c)} this leads to a load bounded by $\max\{L,
(1+\eps)\text{OPT}\} = (1+\eps)\text{OPT}$ since we have just proved that
$L\leq(1+\eps)\text{OPT}$.

The running time of the DP algorithm will be
$n^{O(\log_{1+\eps}\frac{1}{\eps})} = n^{O(\frac{1}{\eps}\log\frac{1}{\eps})}$
(for $\eps\leq 1$, but for $\eps>1$, we already know a $2$-approximation
aglorithm using $\textbf{(a)}$).

The greedy allocation of the jobs in $S$ takes time $O(nm) = O(n^2)$ since we
can assume $m\leq n$ (if $n<m$, the problem is trivial). Overall the running
time is $n^{O(\frac{1}{\eps}\log\frac{1}{\eps})}$.

\paragraph{(f)} If we don't know $\text{OPT}$, let us denote by $B
= \max\{p_{max}, \frac{1}{m}\sum_{i=1}^n p_i\}$. By question \textbf{(a)}, we
know that $B\leq \text{OPT}$. But we also know that
$\text{OPT}\leq\textsc{Greedy}\leq 2B$. Hence \text{OPT} lies in the interval
$[B, 2B]$. We can find the right value of \text{OPT} by doing a binary search
on this interval:
\begin{enumerate*}
    \item initially, we guess \text{OPT} to be the upper end of the interval.
    \item if the DP algorithm finds an allocation of load $(1+\eps)\text{OPT}$,
        our guess for \text{OPT} was too pessimistic: we set the lower end of
        the interval to its midpoint.
    \item If the DP algorithm fails to return an allocation of load at most
        $(1+\eps)\text{OPT}$, it means that our guess was too optimistic, hence
        we set the upper end of the interval to its midpoint.
    \item we repeat until the size of the interval becomes less than $\eps B$.
\end{enumerate*}
At the end of this process, we have an allocation of load $(1+\eps)L$ where $L$
is the current upper end of our interval. But by the stopping criterion,
$L\leq \text{OPT} + \eps B\leq\text{OPT} + \eps\text{OPT}$ where the last
inequality uses that $B\leq\text{OPT}$. Since we have an allocation of load
$(1+\eps)L$, our final approximation ratio is $(1+\eps)^2$.

This can be turned into a $(1+\eps)$ approximation ratio by noting that
$(1+\eps)^2\leq (1+3\eps)$ for $\eps\leq 1$. Hence we just had to take
$\eps'=\frac{\eps}{3}$ at the beginning.

The total number of iteration of the binary search is $\log\frac{1}{\eps}$,
increasing the overall running time of our algorithm to
$n^{O(\frac{1}{\eps}\log\frac{1}{\eps})}\log\frac{1}{\eps}$.

\section*{Problem 2} We consider a cycle of length 3.

It is obvious that the optimal solution of the \textsc{MaxCut} problem is 2 in
this case (one vertex will be on one side of the cut, and the other two on the
other side).

By choosing the three vectors $v_1 = (1, 0)$, $v_2 = (-\frac{1}{2},
\frac{\sqrt{3}}{2})$, $v_3 = (-\frac{1}{2}, -\frac{\sqrt{3}}{2})$, the value of
the SDP relaxation for these three vectors will be $3\times \frac{1+1/2}{2}
= \frac{9}{4}$ since the inner product between any two of these vectors is
$-\frac{1}{2}$. Hence the optimal solution of the SDP relaxation is at least
$\frac{9}{4}$. This shows that the integrality gap is at most
$\frac{8}{9}\simeq 0.888$.

\section*{Problem 3}

\paragraph{(a)} We can obtain a $\Delta+1$ coloring using a greedy algorithm:
while there exists an uncolored vertex, consider such a vertex; at least one
color is not used by one its neighbors (there are at most $\Delta$ neighbors,
and we have $\Delta+1$ colors), apply such a color to this vertex. The running
time of this algorithm is $O(|V| + |E|)$.

\paragraph{(b)} Let $N_\delta$ be the number of times we remove a vertex of
degree at least $\delta$ until all remaining nodes have degree less than
$\delta$. The number of colors used by this algorithm is $3 N_\delta + \delta$.
We have the trivial bound $N_\delta = O(\frac{n}{\delta})$ (since we remove at
least $\delta$ nodes at each step). Hence the number of colors used is
$O(3\frac{n}{\delta} + \delta)$. Minimizing the term inside the big-Oh as
a function of $\delta$, we obtain $\delta = \frac{1}{\sqrt{3n}}$. Leading to
a number of colors used being $O(\sqrt{n})$.

\paragraph{(c)} If $G$ is 3-colorable, we will represent the three colors with
three vectors in $\mathbb{R}^2$: $v_1 = (1, 0)$, $v_2 = (-\frac{1}{2},
\frac{\sqrt{3}}{2})$, $v_3 = (-\frac{1}{2}, -\frac{\sqrt{3}}{2})$. It is easy
to see that these three vectors lie on the unit sphere. Furthermore, direct
verification shows that the inner product between any two (distinct) of these
vectors is equal to $-\frac{1}{2}$. As a consequence, if $G$ is 3-colorable, by
assigning these three vectors to the vertices according to a 3-coloring, we see
that for each edge, the inner product of the vectors at the endpoints of this
edge will be equal to $-\frac{1}{2}$ (since the colors are different at the
endpoints). This gives a feasible solution to the vector programming
relaxation of the coloring problem.

\paragraph{(d)} Given a feasible solution of the vector programming relaxation,
we round it according to the hint. We start by choosing $r_1,\ldots, r_t$
independently and uniformly at random on the unit sphere (in $\mathbb{R}^2$)
and color vertex $i$ with vector $v_i$ using a $t$-bits color where the $j$-th
bit is given by $sgn(\inprod{v_i,r_j})$.

For a given edge $(i,j)$ with vectors $v_i, v_j$, we bound the probability that
it is monochromatic:
\begin{align*}
    \Pr[(i,j)\text{ is monochromatic}] &= \Pr[\forall 1\leq l\leq
    t,\;sgn(\inprod{v_i, r_l}) = sgn(\inprod{v_j, r_l})]\\
    &=\Pr[sgn(\inprod{v_i, r_1}) = sgn(\inprod{v_j, r_1})]^t
\end{align*}
where the last inequality comes the fact that the vectors $r_1,\ldots,r_t$ are
chosen independently.

A simple examination of a circle shows that if we take two vectors on a circle
with an angle of $\alpha$ between them, then the probability that a vector
chosen uniformly at random on the circle is going to ``separate'' them (that
is, the sign of their projections on this vector are going to be opposite) is
$\frac{\alpha}{\pi}$. Indeed if we denote by $-\frac{\alpha}{2}$ and
$\frac{\alpha}{2}$ the original two vectors, the uniformly at random chosen
vector has to lie in one of two circular sectors of angular length $\alpha$:
$[-\frac{\alpha}{2} + \frac{\pi}{2}, \frac{\pi}{2} + \frac{\alpha}{2}]$ and
$[-\frac{\alpha}{2} + \frac{-\pi}{2}, \frac{-\pi}{2} + \frac{\alpha}{2}]$
respectively. Hence the probability of separating the two vectors is
$\frac{2\alpha}{2\pi} = \frac{\alpha}{\pi}$.

Going back to the feasible solution for the vector programming relaxation of the
3-coloring problem, by feasibility, we know that the angle between any two
vectors lying at the endpoints of an edge is at least $\frac{2\pi}{3}$, hence
the probability of a vector chosen uniformly at random on the circle separating
them is at least $\frac{2}{3}$. We can now bound:
\begin{align*}
    \Pr[(i,j)\text{ is monochromatic}] 
    &=\Pr[sgn(\inprod{v_i, r_1}) = sgn(\inprod{v_j, r_1})]^t\\
    &\leq\left(1-\frac{2}{3}\right)^t = \frac{1}{3^t}
\end{align*}

Since there are at most $\frac{n\Delta}{2}$ edges in the graph, by linearity of
expectation, the expected number of monochromatic edges in the graph, by using
the rounding scheme above is:
\begin{displaymath}
\E[\#\text{monochromatic edges}] \leq \frac{n\Delta}{2.3^t}
\end{displaymath}
by choosing $t = \log_3\Delta + 1$, this expected number becomes at most
$\frac{n\Delta}{2.3.\Delta} = \frac{n}{6}$, and the number of colors is $2^t
= 2^{\log_3 \Delta +1 } = 2\Delta^{\log_3 2} = O(\Delta^{\log_3 2})$, which
concludes the proof of the question.

\paragraph{(e)} Using Markov's inequality, let $X$ be the number of
monochromatic edges after applying the previous rounding scheme. We have:
\begin{displaymath}
    \Pr\left[X\geq \frac{n}{4}\left] \leq \frac{n/6}{n/4} = \frac{2}{3}
\end{displaymath}
Hence, by repeating the previous rounding scheme $O(\log n)$ times, we
know that at least once we will have less that $\frac{n}{4}$ monochromatic edges
remaining with probability at least $1-\frac{1}{n^c}$ for constant $c$.

Once we have less than $\frac{n}{4}$ monochromatic edges remaining, we reapply
the previous algorithm (SDP relaxation plus at most $O(\log n)$ trials of
rounding) on graph induced by these monochromatic edges, using a new set of
colors.  There will be $O(\log n)$ such repetitions, hence we use
$O(\Delta^{\log_3 2}\log n)$ colors.

The probability of failing is at most $\frac{\log n}{n^c}$ which is
$O(\frac{1}{n^{c'}})$ for any $c'< c$. Hence with $O(\log n)$ rounds, each
round solving an SDP problem and trying to round at most $O(\log n)$ times, we
find with high probability a coloring satisfying the requirement on the number
of colors.

\paragraph{(f)} We combining \textbf{(e)} with the approach of \textbf{(b)}:
instead of using $\delta$ colors to finish coloring the graph once all vertices
have degree less than $\delta$, we apply the algorithm in \textbf{(e)}. The
number of colors used is:
\begin{displaymath}
    O\left(\frac{3n}{\delta} + \delta^{\log_3 2}\log n\right)
\end{displaymath}
optimizing on $\delta$, we see that we have to choose $\delta
= O(n^{1/1.63+\eps})$, which leads to a number of colors used
$O(n^{0.39+\eps})$ (the $\eps$ is simple there to account for $\log$ factors
and can be made arbitrarily small).

\section*{Problem 4}

Time spent: Problem 1 (4 hours), Problem 2 (1 hour), Problem 3 (2 hour). Total
time 7 hours.

\end{document}