\documentclass[10pt]{article}
\usepackage{fullpage}
\usepackage[utf8x]{inputenc}
\usepackage{amsmath,amsfonts,amsthm}
\usepackage[english]{babel}
\usepackage[capitalize, noabbrev]{cleveref}
\usepackage{algorithm}
\usepackage{algpseudocode}

\newenvironment{enumerate*}%
  {\vspace{-2ex} \begin{enumerate} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{enumerate}}
 
\newenvironment{itemize*}%
  {\vspace{-2ex} \begin{itemize} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{itemize}}
 
\newenvironment{description*}%
  {\vspace{-2ex} \begin{description} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{description}}

\DeclareMathOperator{\E}{\mathbb{E}}
\let\P\relax
\DeclareMathOperator{\P}{\mathbb{P}}
\newcommand{\ex}[1]{\E\left[#1\right]}
\newcommand{\prob}[1]{\P\left[#1\right]}
\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
\newcommand{\R}{\mathbf{R}}
\newcommand{\N}{\mathbf{N}}
\newcommand{\C}{\mathcal{C}}
\newcommand{\eps}{\varepsilon}
\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}}
\newcommand{\llbracket}{[\![}

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}

\author{Thibaut Horel}
\title{CS 224 Problem Set 4 -- Solutions}
\date{October 15, 2014}

\begin{document}
\maketitle

Collaborators: Jean Pouget-Abadie and Eric Balkanski

\section*{Problem 1}

Let $x$ (resp. $y$) be a solution to the primal (resp. the dual) such that $x$
and $y$ satisfy the approximate complementary slackness conditions. Then:
\begin{align*}
    c^T x &= \sum_i x_i c_i \leq \sum_i \alpha x_i (A^T)_i\cdot y\\
          &= \alpha\left(\sum_i x_i (A^T)_i\right)\cdot y
    = \alpha\left(Ax\right)\cdot y = \alpha\sum_j (A_j\cdot x)y_j\\
    &\leq \alpha\sum_j \beta b_jy_j = \alpha\beta b^Ty
\end{align*}
where we used the first approximate complementary slackness condition in the
first line. In the second line, we first used that the dot product is linear in
its first coordinate, then the definition of the matrix-vector product. Finally
we used the second complementary slackness condition in the last line.

\section*{Problem 2}

\paragraph{(a)} We introduce dual variables $\alpha_t$ for the constraints
$-x_{r(t)}^t \geq 0$, $\beta_i^t$ for the constraints $-x_i^t\geq -1$,
$\gamma_i^t$ for the constraints $z_i^t + x_i^t - x_i^{t+1} \geq 0$, and
$\delta_t$ for the constraint $\sum_i x_i^t\geq n-k$.

The dual can now be written:
\begin{align*}
    \max &\quad(n-k)\sum_{t=1}^T \delta_t - \sum_{t=1}^T\sum_{i=1}^n \beta_i^t \\
    \text{s.t.}&\quad\begin{aligned}
    \gamma_i^t & \leq 1&\forall\,1\leq i\leq n,\forall\,1\leq t\leq T\\
    \beta_i^t & \geq \delta_t + \gamma_i^{t+1} - \gamma_i^t&\forall\,1\leq
    t\leq T,\;\forall i\neq r(t)\\
    \beta_{r(t)}^t +\alpha_t& \geq \delta_t + \gamma_{r(t)}^{t+1}
    - \gamma_{r(t)}^t&\forall\,1\leq t\leq T\\
    \alpha_t,\beta_{i,t},\gamma_{i,t},\delta_t&\geq 0
\end{aligned}
\end{align*}
It is not hard to see that the third constraint can be removed without changing
the problem ($\alpha_t$ only appears in this constraint and can always be
chosen large enough so that it is satisfied).

\paragraph{(b)} We will use the dual variable $\gamma_i^t$ in the problem above
to keep track of whether or not page $i$ is marked at time $t$. When request
for page $i$ at time $t$ arrives:
\begin{enumerate*}
    \item if $i$ is in cache, mark it. Set $\gamma_i^t$ to 1 and keep all the
        other variables the same.
    \item if $i$ is not in cache and not all pages are marked. Evict an
        unmarked page, say $j$. We set $\gamma_i^t$, $x_j^t$ and $z_j^t$ to 1,
        and $\gamma_j^t$, $x_i^t$ and $z_i^t$ to 0.
    \item if $i$ is not in cache and all pages are marked. We unmark all
        pages. We set $\gamma_j^t$ to 0 for all $j\neq i$. We set $\delta_t$
        to 1. This forces to set $\beta_j^t$ to 1 for all pages outside the
        cache which are not $i$ to maintain dual feasibility.
\end{enumerate*}
We show that at time $t$, $P(t) \leq k D(t) + \big(\Phi(t) - \Phi(t-1)\big)$ in
all three cases above. $P(t)$ is simply one or zero depending on whether or not there
has been an eviction. $\Delta\Phi$ is the difference in the number of marked
pages:
\begin{enumerate*}
    \item $P(t) = 0$ (no eviction), $D(t) = 0$ and $\Delta\Phi = 1$ (one page
        marked). This is ok. 
    \item $P(t) = 1$ (one eviction), $D(t) = 0$ and $\Delta\Phi = 1$ (one page
        marked). This is still ok.
    \item $P(t) = 1$ (one eviction), $D(t) = (n-k) - (n-k-1) = 1$ (there are
        $n-k+1$ pages outside the cache which are not $i$) and $\Delta\phi = -k
        + 1$. This is again ok.
\end{enumerate*}

If we sum the previous inequality over $t$, we have $\sum_t P(t)$ on the
left-hand side, this is the cost of the $1$-bit LRU algorithm. On the right
hand side, we get $k\sum_t D(t) + \Phi(T) - \Phi(1)$. The difference in
potential is at most $k$, and $\sum_t D(t)$, the value of our dual solution is
less than the value of any feasible solution for the primal (weak duality) and
in particular the optimal integral solution $\text{OPT}$. Hence, we have proved
that the cost attained by $1$-bit LRU is bounded by $k\text{OPT} + k=
k(\text{OPT} +1)$

\section*{Problem 3}

\paragraph{(a)} We introduce a variable $x_v$ for each vertex indicating
whether or not we include it in the solution. The primal has the following
form:
\begin{align*}
    \min_{x\geq 0}&\quad \sum_{v\in V} c_v x_v\\
    \text{s.t.}&\quad x_u + x_v \geq 1,\quad \forall\,(u,v)\in E
\end{align*}
For the dual, we introduce one variable $y_e$ for each edge $e\in E$. The dual
can be written as:
\begin{align*}
    \max_{y\geq 0}&\quad \sum_{e\in E} y_e\\
    \text{s.t.}&\quad \sum_{e: v\in e} y_e\leq c_v,\quad\forall v\in E
\end{align*}

\paragraph{(b)} We propose the following algorithm. We iterate over all edges.
At each iteration we increase the dual variable of that edge until one or both
of the two dual constraints for its end points become tight. Whenever
a constraint becomes tight, we set the primal variable of its associated vertex
to 1.
\begin{algorithm}
    \begin{algorithmic}[1]
        \For{$e=(u,v)\in E$}
        \State $y_e \leftarrow \min(c_u-\sum_{e'\neq e:u\in e'}y_{e'},
            c_v-\sum_{e'\neq e:v\in e'}y_{e'})$
            \If{$c_u = \sum_{e'\neq e:u\in e' }y_{e'}$}
            \State $x_u\leftarrow 1$
            \EndIf
            \If{$c_v = \sum_{e'\neq e:v\in e' }y_{e'}$}
            \State $x_v\leftarrow 1$
            \EndIf
        \EndFor
    \end{algorithmic}
\end{algorithm}

It is easy to see that at the end of the algorithm we have both a primal
feasible and dual feasible solution (we never violate dual constraints, and at
each iteration of the algorithm, we satisfy a new primal constraint).

We can now conclude by observing that:
\begin{displaymath}
    2\sum_e y_e = \sum_{v\in V}\sum_{e:v\in e} y_e
    \geq \sum_v c_v x_v
\end{displaymath}
The equality comes from the fact that the double summation counts each edge
twice. The inequality comes from the fact that $x_v = 1$ implies that that the
associated dual constraint was tight, that is $c_v  = \sum_{e: v\in e} y_e$.

But by weak duality, $\sum_e y_e$ is smaller than any primal feasible solution,
and in particular the optimal integral solution. Hence $\sum_e y_e\leq
\text{OPT}$. This concludes the proof.

\section*{Problem 4}

\paragraph{(a)} We take a graph with three vertices $(s,u,v)$ with $L(s,u)
= 1$, $L(s, v) = 2$ and $L(u,v) = 1$. Let us denote by $x = f_{s,v}$, $y
= f_{s,u}$ and $z = f_{u,v}$. The minimum flow problem can be written as:
\begin{align*}
    \min&\quad 2x + y + z\\
    \text{s.t.}&\quad x +y=2,\; z= y-1,\; x+z=1
\end{align*}
By substitution in the objective function. We see that all feasible flows have
the same value 3. Thus every feasible solution is optimal. As a consequence,
the fractional solution $x =\frac{1}{2}, y = \frac{3}{2}, z =\frac{1}{2}$ is
optimal and has the same value as the integral solution $x = 1, y = 1, z=0$ (the
other integral solution is $x=0, y=1, z=1$).

\paragraph{(b)} We introduce a dual variable $p_v$ for each vertex. The dual
problem can be written:
\begin{align*}
    \max&\quad \sum_{v\neq s} d_v - (n-1) d_s\\
    \text{s.t}&\quad d_v - d_u \leq L(u,v),\quad \forall (u,v)\in E
\end{align*}
Note that the dual variables are unrestricted because of the equality
constraints in the primal.

\paragraph{(c)} We will follow the execution of Dijkstra's algorithm while
maintaining an integral primal and dual solution.  We will maintain the
following two properties throughout the execution:
\begin{itemize*}
    \item $f_{u,v}$ is the flow travelling along the edge ${u,v}$ in the tree
        constructed by Dijkstra's algorithm thus far: the flow constraints
        impose that this is the number of descendants of $v$ (including $v$)
        in the tree.
    \item $d_v$ is the distance from $s$ to $v$ in the tree constructed thus far.
\end{itemize*}

At the beginning, we set $f_{u,v} = 0$ for all $(u,v)\in E$, $d_s = 0$ and $d_v
= +\infty$ for all $v$. Let us consider a step of Dijkstra's algorithm where we
are examining edge $(u,v)$ in $E$. If $p_v> p_u + L(u,v)$ then:
\begin{enumerate}
    \item set $p_v = p_u + L(u,v)$ and set $f_{u,v} = 1$
    \item add $-1$ to $f_e$ for all edges $e$ in the previous path from $s$
        to $v$ in the tree, prior to the addition of edge $(u,v)$.
    \item add +1 to $f_e$ for all edges $e$ in the path from $s$
        to $v$ in the tree after the addition of edge $(u,v)$.
\end{enumerate}

If we denote by $G'$ the graph induced by all the edges examined thus far
(including $(u,v)$), then we have the following properties:
\begin{enumerate}
    \item $\sum_{e\in E(G')} L(e)f_e = \sum_{v\in G'} d_v$. Indeed, on the
        left-hand side, we are counting each edge in the tree constructed so
        far with multiplicity equal to the number of its descendants: for each
        vertex $v$ in the tree, we are adding $1$ for all edges on the path from
        $s$ to $v$. This gives the right-hand side.
    \item the (primal) flow constraints are satisfied for all edges in $E(G')$.
    \item the (dual) constraints are satisfied for all vertices in $V(G')$.
\end{enumerate}
As we can see, these properties are simple consequences of the fact that
Dijkstra's algorithm is maintaining a tree which is optimal for the set of
edges already examined and the fact that we update the primal and dual
variables to reflect the tree constructed by Dijkstra's algorithm.

At the end of the execution of Dijsktra's algorithm, we know that all the edges
have been examined. The three properties above imply:
\begin{enumerate}
    \item the primal and dual solutions are equal
    \item the primal solution is feasible
    \item the dual solution is feasible
\end{enumerate}

By weak duality, this implies that both solutions are optimal and in particular
that Dikstra's algorithm is correct.

\section*{Problem 5}

Problem 1: 20min, Problem 2: 2h, Problem 3: 1h30, Problem 4: 2h.
\textbf{Total:} 6h
\end{document}