ps3/main.tex


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291

\documentclass[10pt]{article}
\usepackage{fullpage}
\usepackage[utf8x]{inputenc}
\usepackage{amsmath,amsfonts,amsthm}
\usepackage[english]{babel}
\usepackage[capitalize, noabbrev]{cleveref}
\usepackage{algorithm}
\usepackage{algpseudocode}

\newenvironment{enumerate*}%
  {\vspace{-2ex} \begin{enumerate} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{enumerate}}
 
\newenvironment{itemize*}%
  {\vspace{-2ex} \begin{itemize} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{itemize}}
 
\newenvironment{description*}%
  {\vspace{-2ex} \begin{description} %
     \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}%
  {\end{description}}


\DeclareMathOperator{\E}{\mathbb{E}}
\let\P\relax
\DeclareMathOperator{\P}{\mathbb{P}}
\DeclareMathOperator*{\argmin}{\arg\min}
\newcommand{\ex}[1]{\E\left[#1\right]}
\newcommand{\prob}[1]{\P\left[#1\right]}
\newcommand{\inprod}[1]{\left\langle #1 \right\rangle}
\newcommand{\R}{\mathbf{R}}
\newcommand{\poly}{\text{poly}}
\newcommand{\N}{\mathbf{N}}
\newcommand{\C}{\mathcal{C}}
\newcommand{\eps}{\varepsilon}
\newcommand{\cl}[1]{\text{\textbf{#1}}}
\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}}
\newcommand{\llbracket}{[\![}


\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\algrenewcommand\algorithmicensure{\textbf{Output:}}
\algrenewcommand\algorithmicrequire{\textbf{Input:}}

\author{Thibaut Horel}
\title{CS 225 Problem Set 3 -- Solutions}

\begin{document}
\maketitle

\section*{Problem 4.2}

\paragraph{1.} Let $S$ be an independent set of $G$. I apply the first
inequality of Lemma 4.15 to the sets $S$ and $S$ and obtain:
\begin{displaymath}
    \left(\frac{|S|}{N}\right)^2
    \leq \lambda \frac{|S|}{N}\left(1-\frac{|S|}{N}\right)
\end{displaymath}
since $e(S, S) = 0$ by definition of an independent set. Reordering the terms:
\begin{displaymath}
    |S|\leq \lambda (N-|S|)
\end{displaymath}
or equivalently $|S| \leq \lambda N/(1+\lambda)$. Taking the maximum over all
independent sets yields the required bound on $\alpha(G)$.

\paragraph{2.}Let us consider a coloring of $G$ with $\chi(G)$ colors. One
color must contain at least $N/\chi(G)$ vertices. This set of $N/\chi(G)$
vertices form an independent set of $G$ by definition of a coloring. Applying
the result from \textbf{1.}:
\begin{displaymath}
    \frac{N}{\chi(G)}\leq \frac{\lambda N}{1+\lambda}
\end{displaymath}
which immediately yields $\chi(G)\geq (1+\lambda)/\lambda$ as required.

\paragraph{3.} Let $D$ be the diameter of $G$ and let us consider two vertices
$i$ and $j$ distant from $D$ in $G$. Let us now consider a random walk on $G$
starting from $i$ and denote by $p_j^t$ the probability of being at vertex
$j$ after $t$ steps. Denoting by $M$ the random walk matrix of $G$ and by
$\pi_i$ the indicator vector of $\{i\}$, we have:
\begin{displaymath}
    p_j^t = (\pi_i M^t)_j
\end{displaymath}
by Lemma 2.51:
\begin{displaymath}
    \left|p_j^t-\frac{1}{n}\right| = \left|(\pi_i M^t)_j-\frac{1}{n}\right|
    \leq \|\pi_iM^t - u\|\leq \lambda^t
\end{displaymath}
For $t= \lceil D/2\rceil$, we have $p_j^t = 0$ (we cannot reach $j$ in less
than $D$ steps). This implies:
\begin{displaymath}
    \lambda^{\lceil D/2\rceil}\geq \frac{1}{n}
\end{displaymath}
solving for $D$:
\begin{displaymath}
    D\leq 2\log_{1/\lambda} N = O(\log_{1/\lambda}N)
\end{displaymath}

\section*{Problem 4.5}

For a given $\eps$ and $\delta$, we construct the following items:
\begin{enumerate}
    \item an expander $G$ of constant expansion $1-\lambda$ on $N$ vertices,
        with $\lambda = \frac{1}{8}$. By Theorem 4.22, we have the guarantee
        that for any function $g:[N]\to[0,1]$, for a random walk
        $x_1,\dots,x_t$ in $G$ starting from a uniformly random $x_1\in [N]$:
        \begin{displaymath}
            \P\left[\frac{1}{t}\sum_t g(x_t) \geq \mu(g) + \lambda
            + \frac{1}{4}\right]\leq e^{-\Omega(t)}
        \end{displaymath}
    \item a pairwise independent sampler $\text{Samp}$ which given $x\in[N]$
        outputs $\ell$ samples in $\{0,1\}^m$. The guarantee we have by
        Proposition 3.28 is that:
        \begin{displaymath}
            \P_{x\gets U_{[N]}}\left[\left|\frac{1}{\ell}\sum_{j=1}^{\ell}
            f\big(\text{Samp}(x)_j\big)- \mu(f)\right|\geq \eps\right]\leq
            \frac{1}{\ell\eps^2}
        \end{displaymath}
        Such a sampler can be obtained by having $\text{Samp}(x)$ be a function
        drawn from a pairwise independent family using the coin tosses in $x$.
        We will have the above guarantee as long as $x$ provides enough random
        bits.  This will be the case with, for example $\log N = 2(\log \ell
        + m)$.
\end{enumerate}

For a vertex $x\in G$, we denote by $\mu_x(f)$ the quantity
$\frac{1}{\ell}\sum_{j=1}^{\ell} f\big(\text{Samp}(x)_j\big)$. That is, the
estimate of $\mu(f)$ computed using the samples provided by $\text{Samp}(x)$.
We denote by $g(x)$ the indicator function of the event
$\left\{|\mu_x(f)-\mu(f)|\geq\eps\right\}$. Another way of reformulating the
guarantee on $\text{Samp}$ is to say that the average of $g$
satisfies $\mu(g)\leq \frac{1}{\ell\eps^2}$.

We construct our $(\delta,\eps)$ sampler as follows:
\begin{enumerate}
    \item we pick $\ell = \frac{8}{\eps^2}$ and $\log N = 2(\log\ell + m)$.
    \item we construct a random walk $x_1,\dots,x_t$ on $G$ starting from
        a uniformly random $x_1\in [N]$
    \item we output the median $M$ of $\big\{\mu_{x_1}(f),\dots,\mu_{x_t}(f)\big\}$.
\end{enumerate}

Let us now consider the quantity $\P[|M-\mu(f)|\geq \eps]$. If the median is
more than $\eps$ away from $\mu(f)$, this implies that more than half of the
numbers $\mu_{x_1}(f),\dots,\mu_{x_t}(f)$ are $\eps$ away from $\mu(f)$. That
is, more than half of the variables $g(x_1),\dots,g(x_t)$ are equal to $1$:
\begin{displaymath}
    \frac{1}{t}\sum_t g(x_t) \geq \frac{1}{2}
\end{displaymath}
But because of our choice of $\ell$, we have $\mu(g)\leq \frac{1}{8}$. Since
$\lambda = \frac{1}{8}$, we have $\mu(g)+\lambda + \frac{1}{4} \leq
\frac{1}{2}$. The Chernoff bound for our expander then implies that this event
(the mean of the $g(x_i)$ being larger than $1/2$) is less than
$e^{-\Omega(t)}$. To summarize:
\begin{displaymath}
\P[|M-\mu(f)|\geq \eps]
\leq 
\P\left[\frac{1}{t}\sum_t g(x_t) \geq \frac{1}{2}\right]\leq
\P\left[\frac{1}{t}\sum_t g(x_t) \geq \mu(g)+\lambda+\frac{1}{4}\right]\leq e^{-\Omega(t)}
\end{displaymath}
Hence, choosing $t = O(\log\frac{1}{\delta})$ gives the correct success
probability for our estimator $M$. Furthermore:
\begin{itemize}
    \item the number of random bits needed is $\log N = 2(\log\ell + m)
        = O(\log\frac{1}{\eps} + m)$ for the initial vertex $x_1$ and
        $O(\log\frac{1}{\delta})$ for the $t-1$ remaining steps. This is
        $O(\log\frac{1}{\eps} + m +\log\frac{1}{\delta})$ overall.
    \item the number of queries to $f$ is $t\cdot
        l = O(\frac{1}{\eps^2}\log\frac{1}{\delta})$.
\end{itemize}
which satisfies all the requirements.

\section*{Problem 4.8}

For lack of Latex skills, I will denote by $G_1\circledR G_2$ the replacement
product and by $G_1\circ G_2$ the zig-zag product.

\paragraph{1.} Following the hint, it is easy to see that $G_1\circ G_2$ is
a subgraph of ($G_1\circledR G_2)^3$. Indeed, one step from $(u,i)$ to $(v, j)$
in $G_1\circ G_2$ can be seen as the following three steps in $G_1\circledR
G_2$:
\begin{enumerate}
        \item first a step from $(u,i)$ to $(u,i')$
        \item then a step from $(u, i')$ to $(v, j')$
        \item then a step from $(v, j')$ to $(v, j)$
\end{enumerate}
with the notation that $v$ is the $i'$th neighbor of $u$ and $u$ is the $j'$th
neighbor of $u$.

Denoting by $Z$ the random walk matrix of $G_1\circ G_2$ and $M$ the random
walk matrix of $G_1\circledR G_2$ and since $G_1\circ G_2$ is a $D_2^2$
regular subgraph of $(G_1\circledR G_2)^3$ which is $(D_2+1)^3$ regular, we
have:
\begin{displaymath}
M^3 = \frac{D_2^2}{(D_2+1)^3}Z + \left(1-\frac{D_2^2}{(D_2+1)^3}\right) E
\end{displaymath}
where $E$ is the “remainder” and corresponds to random steps in $(G_1\circledR
G_2)^3$ which are not random steps in $G_1\circ G_2$. Since $Z$ is regular, so
is $E$, in particular, $\|E\|\leq 1$.

This implies that:
\begin{displaymath}
    \lambda(M^3)\leq \frac{D_2^2}{(D_2+1)^3}(1-\gamma_1\gamma_2^2)
    +\left(1-\frac{D_2^2}{(D_2+1)^3}\right)
\end{displaymath}
where we used that the expansion of $G_1\circ G_2$ is at least
$\gamma_1\gamma_2^2$ using Theorem 4.35. This in turn implies that $G_1\circledR
G_2$ has expansion at least:
\begin{displaymath}
    \left(\gamma_1\gamma_2^2\frac{D_2^2}{(D_2+1)^3}\right)^{1/3}
\end{displaymath}

\paragraph{2.} Let us consider $G$ a $(N, D, \gamma)$ expander. First if the
degree of $G$ is even, we can make it odd by adding a new vertex connected to
all the previous vertices. $N$ increases by one, $D$ increases by one.

Once we have a graph with odd degree $D$, we can take the replacement product
with the cycle $C$ on $D$ vertices. The cycle is $2$-regular and nonbipartite
(since $D$ is odd). Hence $G\circledR C$ will be a $(ND, 3, \gamma')$ expander.
The cycle being nonbipartite, it will have non zero expansion. Part \textbf{1.}
implies that $\gamma'>0$.

\paragraph{4.} Without loss of generality, assume that $N_1$ is even. Let us
consider the set $S$ of $N_1D_1/2$ vertices in $G_1\circledR G_2$ where for
half the clouds, we include all their vertices in $S$. The number of edges
$e(S, S^C)$ is at most $N_1D_1/2$ since the edges in the cut are the edges from
the “included” clouds to the non-included ones and each vertex in an included
cloud is connected to exactly one vertex in an other cloud. Hence $S$ does not
have edge expansion $\eps$ for $\eps>1/(D_2+1)$. This implies that
$h(\eps_1,\eps_2, D_2)$ depends on $D_2$. This implies that $g(\gamma_1,
\gamma_2, D_2)$ also depends on $D_2$ by Theorem 4.14.

\section*{Problem 4.10}

\paragraph{1.} We use a simple counting argument. A set $S$ of at most $K$
vertices is incident to exactly $D|S|$ edges. We can count these edges by
summing over all vertices in $N(S)$, the number of parents they have in $S$. If
$U$ is the set of unique neighbors (they have a single parent) and if we
denote by $p_S(u)$ the number of parents of $u$ in $S$, we have:
\begin{displaymath}
    D|S| = \sum_{u\in N(S)} p_S(u) = |U| + \sum_{u\in N(S)\setminus U} p_S(u) 
\end{displaymath}
using that $p_S(u)\geq 2$ for $u\in N(S)\setminus U$ and $|N(S)|\geq
(1-\eps)D|S|$ by expansion:
\begin{displaymath}
    D|S| \geq |U| + 2\big((1-\eps)D|S|-|U|\big)
\end{displaymath}
solving for $|U|$ gives $|U|\geq (1-2\eps)D|S|$ as required.

\paragraph{2.} We prove this by contradiction. Assume the result is false, then
there is a set $S$ of size $\leq K/2$ and $T$ disjoint from $S$ of size
$|S|/2$ such that the vertices in $T$ have $\delta D$ neighbors in $N(S)$. Then
the set $S\cup T$ has size $3|S|/2\leq 3/4K$ and thus has expansion
$(1-\eps)D$:
\begin{displaymath}
    |N(S\cup T)| \geq (1-\eps)\frac{3}{2}D|S|
\end{displaymath}
on the other hand, we can upper bound $|N(S\cup T)|$:
\begin{displaymath}
    |N(S\cup T)|\leq D|S| + (1-\delta)D|T|
    = D\frac{3}{2}|S|-\frac{\delta}{2} D |S|
\end{displaymath}
combining the above two inequalities, we obtain:
\begin{displaymath}
    \delta D|S|\leq  3\eps D|S|
\end{displaymath}
which is a contradiction, for example when $\delta = 4\eps$.

\paragraph{3.} The algorithm to test membership is very simple: for query $x\in
N$, pick a random neighbor of $x$ in $G$ and output the bit assigned to this
neighbor (“1” means $x\in S$, “0” means $x\notin S$). The property $\Pi$ immediately
implies that the error probability is at most $\delta$.

\paragraph{4.} I follow the hint: if I assign $1$ to $N(S)$, and $0$ elsewhere,
then using \textbf{2.}, the set $T$ of vertices for which the error probability
of the query algorithm described in \textbf{3.} is larger than $\delta$ (they
have at least $\delta D$ neighbors in $N(S)$) is of size at most $|S|/2$.

For each vertex $u\in T$, we can fix it by simply assigning $0$ to $N(u)\cap
N(S)$. Now this breaks the vertices in $S$ which have at least $\delta D$
vertices in $N(T)$. But by \textbf{2.} there are at most $|T|/2\leq |S|/4$ such
vertices, we can fix them by assigning $1$ to the neighbors of these vertices.

By repeating this process, we show that the number of mistakes to fix (number
of vertices for which the probability of failure is greater than $\delta$) is
halved each time. Repeating this process at most $\log K/2$ times will lead to
no mistakes and a valid assignment.


\end{document}