ps4/main.tex


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254

\documentclass[10pt]{article}
\usepackage{fullpage}
\usepackage[utf8x]{inputenc}
\usepackage{amsmath,amsfonts,amsthm}
\usepackage[english]{babel}
\usepackage[capitalize, noabbrev]{cleveref}
\usepackage{algorithm}
\usepackage{algpseudocode}

\newenvironment{enumerate*}%
  {\vspace{-2ex} \begin{enumerate} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{enumerate}}
 
\newenvironment{itemize*}%
  {\vspace{-2ex} \begin{itemize} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{itemize}}
 
\newenvironment{description*}%
  {\vspace{-2ex} \begin{description} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{description}}

\DeclareMathOperator{\E}{\mathbb{E}}
\let\P\relax
\DeclareMathOperator{\P}{\mathbb{P}}
\newcommand{\ex}[1]{\E\left[#1\right]}
\newcommand{\prob}[1]{\P\left[#1\right]}
\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
\newcommand{\R}{\mathbf{R}}
\newcommand{\N}{\mathbf{N}}
\newcommand{\C}{\mathcal{C}}
\newcommand{\eps}{\varepsilon}
\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}}
\newcommand{\llbracket}{[\![}

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}

\author{Thibaut Horel}
\title{CS 224 Problem Set 4 -- Solutions}
\date{October 15, 2014}

\begin{document}
\maketitle

Collaborators: Jean Pouget-Abadie and Eric Balkanski

\section*{Problem 1}

Let $x$ (resp. $y$) be a solution to the primal (resp. the dual) such that $x$
and $y$ satisfy the approximate complementary slackness conditions. Then:
\begin{align*}
    c^T x &= \sum_i x_i c_i \leq \sum_i \alpha x_i (A^T)_i\cdot y\\
          &= \alpha\left(\sum_i x_i (A^T)_i\right)\cdot y
    = \alpha\left(Ax\right)\cdot y = \alpha\sum_j (A_j\cdot x)y_j\\
    &\leq \alpha\sum_j \beta b_jy_j = \alpha\beta b^Ty
\end{align*}
where we used the first approximate complementary slackness condition in the
first line. In the second line, we first used that the dot product is linear in
its first coordinate, then the definition of the matrix-product vector. Finally
we used the second complementary slackness condition in the last line.

\section*{Problem 2}

\paragraph{(a)} We introduce dual variables $\alpha_t$ for the constraints
$-x_{r(t)}^t \geq 0$, $\beta_i^t$ for the constraints $-x_i^t\geq -1$,
$\gamma_i^t$ for the constraints $z_i^t + x_i^t - x_i^{t+1} \geq 0$, and
$\delta_t$ for the constraint $\sum_i x_i^t\geq n-k$.

The dual can now be written:
\begin{align*}
    \max &\quad(n-k)\sum_{t=1}^T \delta_t - \sum_{t=1}^T\sum_{i=1}^n \beta_i^t \\
    \text{s.t.}&\quad\begin{aligned}
    \gamma_i^t & \leq 1&\forall\,1\leq i\leq n,\forall\,1\leq t\leq T\\
    \beta_i^t & \geq \delta_t + \gamma_i^{t+1} - \gamma_i^t&\forall\,1\leq
    t\leq T,\;\forall i\neq r(t)\\
    \beta_{r(t)}^t +\alpha_t& \geq \delta_t + \gamma_{r(t)}^{t+1}
    - \gamma_{r(t)}^t&\forall\,1\leq t\leq T\\
    \alpha_t,\beta_{i,t},\gamma_{i,t},\delta_t&\geq 0
\end{aligned}
\end{align*}
It is not hard to see that the third constraint can be removed without changing
the problem ($\alpha_t$ only appears in this constraint and can always be
chosen large enough so that it is satisfied).

\paragraph{(b)} We will use the dual variable $\gamma_i^t$ in the problem above
to keep track of whether or not page $i$ is marked at time $t$. When request
for page $i$ at time $t$ arrives:
\begin{enumerate*}
    \item if $i$ is in cache, mark it. Set $\gamma_i^t$ to 1 and keep all the
        other variables the same.
    \item if $i$ is not in cache and not all pages are marked. Evict an
        unmarked page, say $j$. We set $\gamma_i^t$, $x_j^t$ and $z_j^t$ to 1,
        and $\gamma_j^t$, $x_i^t$ and $z_i^t$ to 0.
    \item if $i$ is not in cache and all pages are marked. We unmark all
        pages. We set $\gamma_j^t$ to 0 for all $j\neq i$. We set $\delta_t$
        to 1. This forces to set $\beta_j^t$ to 1 for all pages outside the
        cache which are not $i$ to maintain dual feasibility.
\end{enumerate*}
We show that at time $t$, $P(t) \leq k D(t) + \big(\Phi(t) - \Phi(t-1)\big)$ in
all three cases. $P(t)$ is simply one or zero depending on whether or not ther
has been an eviction. $\Delta\Phi$ is the difference in the number of marked
pages:
\begin{enumerate*}
    \item $P(t) = 0$ (no eviction), $D(t) = 0$ and $\Delta\Phi = 1$ (one page
        marked). This is ok. 
    \item $P(t) = 1$ (one eviction), $D(t) = 0$ and $\Delta\Phi = 1$ (one page
        marked). This is still ok.
    \item $P(t) = 1$ (one eviction), $D(t) = (n-k) - (n-k-1)$ (there are
        $n-k+1$ pages outside the cache which are not $i$) and $\Delta\phi = -k
        + 1$. This is still ok.
\end{enumerate*}

If we some the previous inequality over $t$, we have $\sum_t P(t)$ on the
left-hand side, this is the cost of the $1$-bit LRU algorithm. On the right
hand side, we get $k\sum_t D(t) + \Phi(T) - \Phi(1)$. The difference in
potential is at most $k$, and $\sum_t D(t)$, the value of our dual solution is
less than the value of any feasible solution for the primal (weak duality) and
in particual the optimal integral solution $\text{OPT}$. Hence, we have proved
that the cost attained by $1$-bit LRU is bounded by $k\text{OPT} + k=
k(\text{OPT} +1)$

\section*{Problem 3}

\paragraph{(a)} We introduce a variable $x_v$ for each vertex indicating
whether or not we include it in the solution. The primal has the following
form:
\begin{align*}
    \min_{x\geq 0}&\quad \sum_{v\in V} c_v x_v\\
    \text{s.t.}&\quad x_u + x_v \geq 1,\quad \forall\,(u,v)\in E
\end{align*}
For the dual, we introduce one variable $y_e$ for each edge $e\in E$. The dual
can be written as:
\begin{align*}
    \max_{y\geq 0}&\quad \sum_{e\in E} y_e\\
    \text{s.t.}&\quad \sum_{e: v\in e} y_e\leq c_v,\quad\forall v\in E
\end{align*}

\paragraph{(b)} We propose the following algorithm. We iterate over all edges.
At each iteration we increase the dual variable of that edge until one or both
of the two dual constraints for its edge points become tight. Whenever
a constraint becomes tight, we set the primal variable its vertex to 1.
\begin{algorithm}
    \begin{algorithmic}[1]
        \For{$e=(u,v)\in E$}
        \State $y_e \leftarrow \min(c_u-\sum_{e'\neq e:u\in e'}y_{e'},
            c_v-\sum_{e'\neq e:v\in e'}y_{e'})$
            \If{$c_u = \sum_{e'\neq e:u\in e' }y_{e'}$}
            \State $x_u\leftarrow 1$
            \EndIf
            \If{$c_v = \sum_{e'\neq e:v\in e' }y_{e'}$}
            \State $x_v\leftarrow 1$
            \EndIf
        \EndFor
    \end{algorithmic}
\end{algorithm}

It is easy to see that at the end of the algorithm we have both a primal
feasible and dual feasible solution (we never violate dual constraints, and at
each iteration of the algorithm, we satisfy a new primal constraint).

We can now conclude by observing that:
\begin{displaymath}
    2\sum_e y_e = \sum_{v\in V}\sum_{e:v\in e} y_e
    \geq \sum_v c_v x_v
\end{displaymath}
The equality comes from the fact that the double summation counts each
edge twice. The inequality comes from the fact that when $x_v = 1$ it means
that the dual constraint was tight, that is $c_v  = \sum_{e: v\in e} y_e$.

But by weak duality, $\sum_e y_e$ is smaller than any primal feasible solution,
and in particular the optimal integral solution. Hence $\sum_e y_e\leq
\text{OPT}$. This concludes the proof.

\section*{Problem 4}

\paragraph{(a)} We take a graph with three vertices $(s,u,v)$ with $L(s,u)
= 1$, $L(s, v) = 2$ and $L(u,v) = 1$. Let us denote by $x = f_{s,v}$, $y
= f_{s,u}$ and $z = f_{u,v}$. The minimum flow problem can be written as:
\begin{align*}
    \min&\quad 2x + y + z\\
    \text{s.t.}&\quad x +y=2,\; z= y-1,\; x+z=1
\end{align*}
By substitution in the objective function. We see that all feasible flows have
the same value 3. Thus every feasible solution is optimal. As a consequence,
the fractional solution $x =\frac{1}{2}, y = \frac{3}{2}, z =\frac{1}{2}$ is
optimal and has the same value as the integral solution $x = 1, y = 1$ (the
other integral solution is $y=1, z=1$).

\paragraph{(b)} We introduce a dual variable $p_v$ for each vertex. The dual
problem can be written:
\begin{align*}
    \max&\quad \sum_{v\neq s} d_v - (n-1) d_s\\
    \text{s.t}&\quad d_v - d_u \leq L(u,v),\quad \forall (u,v)\in E
\end{align*}
Note that the dual variables are unrestricted because of the equality
constraints in the primal.

\paragraph{(c)} We will follow the execution of Dijkstra's algorithm while
maintaining an integral primal and dual solution.  We will maintain the
following two properties throughout the execution:
\begin{itemize*}
    \item $f_{u,v}$ is the flow travelling along the edge ${u,v}$ in the tree
        constructed by Dijkstra's algorithm thus far: the flow constraints
        impose that this is the number of descendants of $v$ (including $v$)
        in the tree.
    \item $d_v$ is the distance from $s$ to $v$ in the tree constructed thus far.
\end{itemize*}

At the beginning, we set $f_{u,v} = 0$ for all $(u,v)\in E$, $d_s = 0$ and $d_v
= +\infty$ for all $v$. Let us consider a step of Dijkstra's algorithm where we are examining edge
$(u,v)$ in $E$. If $p_v> p_u + L(u,v)$ then:
\begin{enumerate}
    \item set $p_v = p_u + L(u,v)$ and set $f_{u,v} = 1$
    \item add $-1$ to $f_e$ for all edges $e$ in the previous path from $s$
        to $v$ in the tree, prior to the addition of edge $(u,v)$.
    \item add +1 to $f_e$ for all edges $e$ in the path from $s$
        to $v$ in the tree after the addition of edge $(u,v)$.
\end{enumerate}

If we denote by $G'$ the graph induced by all the edges examined thus far
(including $(u,v)$), then we have the following properties:
\begin{enumerate}
    \item $\sum_{e\in E(G')} L(e)f_e = \sum_{v\in G'} d_v$. Indeed, on the
        left-hand side, we are counting each edge in the tree constructed so
        far with multiplicity equal to the number of its descendants: for each
        vertex $v$ in the tree, we are adding $1$ for all edges on the path from
        $s$ to $v$. This gives the right-hand side.
    \item the (primal) flow constraints are satisfied for all edges in $E(G')$.
    \item the (dual) constraints are satisfied for all vertices in $V(G')$.
\end{enumerate}
As we can see, these properties are simple consequences of the fact that
Dijkstra's algorithm is maintaining a tree which is optimal for the set of
edges already examined and the fact that we update the primal and dual
variables to reflect the tree constructed by Dijkstra's algorithm.

At the end of the execution of Dijsktra's algorithm, we know that all the edges
have been examined. The three properties above imply:
\begin{enumerate}
    \item the primal and dual solutions are equal
    \item the primal solution is feasible
    \item the dual solution is feasible
\end{enumerate}

By weak duality, this implies that both solutions are optimal and in particular
that Dikstra's algorithm is correct.

\section*{Problem 5}

Problem 1: 20min, Problem 2: 2h, Problem 3: 1h30, Problem 4: 2h.
\textbf{Total:} 6h
\end{document}