From 896f459898c8ba2eed53fcab1fbd60472d7938f4 Mon Sep 17 00:00:00 2001 From: Thibaut Horel Date: Sun, 15 Mar 2015 14:36:04 -0400 Subject: [ps3] problems 1,2, 4 --- ps3/main.tex | 213 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 213 insertions(+) create mode 100644 ps3/main.tex (limited to 'ps3/main.tex') diff --git a/ps3/main.tex b/ps3/main.tex new file mode 100644 index 0000000..2cb4a1f --- /dev/null +++ b/ps3/main.tex @@ -0,0 +1,213 @@ +\documentclass[10pt]{article} +\usepackage{fullpage} +\usepackage[utf8x]{inputenc} +\usepackage{amsmath,amsfonts,amsthm} +\usepackage[english]{babel} +\usepackage[capitalize, noabbrev]{cleveref} +\usepackage{algorithm} +\usepackage{algpseudocode} + +\newenvironment{enumerate*}% + {\vspace{-2ex} \begin{enumerate} % + \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}% + {\end{enumerate}} + +\newenvironment{itemize*}% + {\vspace{-2ex} \begin{itemize} % + \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}% + {\end{itemize}} + +\newenvironment{description*}% + {\vspace{-2ex} \begin{description} % + \setlength{\itemsep}{-1ex} \setlength{\parsep}{0pt}}% + {\end{description}} + +\DeclareMathOperator{\E}{\mathbb{E}} +\let\P\relax +\DeclareMathOperator{\P}{\mathbb{P}} +\DeclareMathOperator*{\argmin}{\arg\min} +\newcommand{\ex}[1]{\E\left[#1\right]} +\newcommand{\prob}[1]{\P\left[#1\right]} +\newcommand{\inprod}[1]{\left\langle #1 \right\rangle} +\newcommand{\R}{\mathbf{R}} +\newcommand{\poly}{\text{poly}} +\newcommand{\N}{\mathbf{N}} +\newcommand{\C}{\mathcal{C}} +\newcommand{\eps}{\varepsilon} +\newcommand{\cl}[1]{\text{\textbf{#1}}} +\newcommand{\eqdef}{\mathbin{\stackrel{\rm def}{=}}} +\newcommand{\llbracket}{[\![} + +\newtheorem{theorem}{Theorem} +\newtheorem{lemma}{Lemma} +\algrenewcommand\algorithmicensure{\textbf{Output:}} +\algrenewcommand\algorithmicrequire{\textbf{Input:}} + +\author{Thibaut Horel} +\title{CS 225 Problem Set 3 -- Solutions} + +\begin{document} +\maketitle + +\section*{Problem 4.2} + +\paragraph{1.} Let $S$ be an independent set of $G$. I apply the first +inequality of Lemma 4.15 to the sets $S$ and $S$ and obtain: +\begin{displaymath} + \left(\frac{|S|}{N}\right)^2 + \leq \lambda \frac{|S|}{N}\left(1-\frac{|S|}{N}\right) +\end{displaymath} +since $e(S, S) = 0$ by definition of an independent set. Reordering the terms: +\begin{displaymath} + |S|\leq \lambda (N-|S|) +\end{displaymath} +or equivalently $|S| \leq \lambda N/(1+\lambda)$. Taking the maximum over all +independent sets yields the required bound on $\alpha(G)$. + +\paragraph{2.}Let us consider a coloring of $G$ with $\chi(G)$ colors. One +color must contain at least $N/\chi(G)$ vertices. This set of $N/\chi(G)$ +vertices form an independent set of $G$ by definition of a coloring. Applying +the result from \textbf{1.}: +\begin{displaymath} + \frac{N}{\chi(G)}\leq \frac{\lambda N}{1+\lambda} +\end{displaymath} +which immediately yields $\chi(G)\geq (1+\lambda)/\lambda$ as required. + +\paragraph{3.} Let $D$ be the diameter of $G$ and let us consider two vertices +$i$ and $j$ distant from $D$ in $G$. Let us now consider a random walk on $G$ +starting from $i$ and denote by $p_j^t$ the probability of being at vertex +$j$ after $t$ steps. Denoting by $M$ the random walk matrix of $G$ and by +$\pi_i$ the indicator vector of $\{i\}$, we have: +\begin{displaymath} + p_j^t = (\pi_i M^t)_j +\end{displaymath} +by Lemma 2.51: +\begin{displaymath} + \left|p_j^t-\frac{1}{n}\right| = \left|(\pi_i M^t)_j-\frac{1}{n}\right| + \leq \|\pi_iM^t - u\|\leq \lambda^t +\end{displaymath} +For $t= \lceil D/2\rceil$, we have $p_j^t = 0$ (we cannot reach $j$ in less +than $D$ steps). This implies: +\begin{displaymath} + \lambda^{\lceil D/2\rceil}\geq \frac{1}{n} +\end{displaymath} +solving for $D$: +\begin{displaymath} + D\leq 2\log_{1/\lambda} N = O(\log_{1/\lambda}N) +\end{displaymath} + +\section*{Problem 4.5} + +For a given $\eps$ and $\delta$, we construct the following items: +\begin{enumerate} + \item an expander $G$ of constant expansion $1-\lambda$ on $N$ vertices, + with $\lambda = \frac{1}{8}$. By Theorem 4.22, we have the guarantee + that for any function $g:[N]\to[0,1]$, for a random walk + $x_1,\dots,x_t$ in $G$ starting from a uniformly random $x_1\in [N]$: + \begin{displaymath} + \P\left[\frac{1}{t}\sum_t g(x_t) \geq \mu(g) + \lambda + + \frac{1}{4}\right]\leq e^{-\Omega(t)} + \end{displaymath} + \item a pairwise independent sampler $\text{Samp}$ which given $x\in[N]$ + outputs $\ell$ samples in $\{0,1\}^m$. The guarantee we have by + Proposition 3.28 is that: + \begin{displaymath} + \P_{x\gets U_{[N]}}\left[\left|\frac{1}{\ell}\sum_{j=1}^{\ell} + f\big(\text{Samp}(x)_j\big)- \mu(f)\right|\geq \eps\right]\leq + \frac{1}{\ell\eps^2} + \end{displaymath} + Such a sampler can be obtained by having $\text{Samp}(x)$ be a function + drawn from a pairwise independent family using the coin tosses in $x$. + We will have the above guarantee as long as $x$ provides enough random + bits. This will be the case with, for example $\log N = 2(\log \ell + + m)$. +\end{enumerate} + +For a vertex $x\in G$, we denote by $\mu_x(f)$ the quantity +$\frac{1}{\ell}\sum_{j=1}^{\ell} f\big(\text{Samp}(x)_j\big)$. That is, the +estimate of $\mu(f)$ computed using the samples provided by $\text{Samp}(x)$. +We denote by $g(x)$ the indicator function of the event +$\left\{|\mu_x(f)-\mu(f)|\geq\eps\right\}$. Another way of reformulating the +guarantee on $\text{Samp}$ is to say that the average of $g$ +satisfies $\mu(g)\leq \frac{1}{\ell\eps^2}$. + +We construct our $(\delta,\eps)$ sampler as follows: +\begin{enumerate} + \item we pick $\ell = \frac{8}{\eps^2}$ and $\log N = 2(\log\ell + m)$. + \item we construct a random walk $x_1,\dots,x_t$ on $G$ starting from + a uniformly random $x_1\in [N]$ + \item we output the median $M$ of $\big\{\mu_{x_1}(f),\dots,\mu_{x_t}(f)\big\}$. +\end{enumerate} + +Let us now consider the quantity $\P[|M-\mu(f)|\geq \eps]$. If the median is +more than $\eps$ away from $\mu(f)$, this implies that more than half of the +numbers $\mu_{x_1}(f),\dots,\mu_{x_t}(f)$ are $\eps$ away from $\mu(f)$. That +is, more than half of the variables $g(x_1),\dots,g(x_t)$ are equal to $1$: +\begin{displaymath} + \frac{1}{t}\sum_t g(x_t) \geq \frac{1}{2} +\end{displaymath} +But because of our choice of $\ell$, we have $\mu(g)\leq \frac{1}{8}$. Since +$\lambda = \frac{1}{8}$, we have $\mu(g)+\lambda + \frac{1}{4} \leq +\frac{1}{2}$. The Chernoff bound for our expander then implies that this event +(the mean of the $g(x_i)$ being larger than $1/2$) is less than +$e^{-\Omega(t)}$. To summarize: +\begin{displaymath} +\P[|M-\mu(f)|\geq \eps] +\leq +\P\left[\frac{1}{t}\sum_t g(x_t) \geq \frac{1}{2}\left]\leq +\P\left[\frac{1}{t}\sum_t g(x_t) \geq \mu(g)+\lambda+\frac{1}{4}\left]\leq e^{-\Omega(t)} +\end{displaymath} +Hence, choosing $t = O(\log\frac{1}{\delta})$ gives the correct success +probability for our estimator $M$. Furthermore: +\begin{itemize} + \item the number of random bits needed is $\log N = 2(\log\ell + m) + = O(\log\frac{1}{\eps} + m)$ for the initial vertex $x_1$ and + $O(\log\frac{1}{\delta})$ for the $t-1$ remaining steps. This is + $O(\log\frac{1}{\eps} + m +\log\frac{1}{\delta})$ overall. + \item the number of queries to $f$ is $t\cdot + l = O(\frac{1}{\eps^2}\log\frac{1}{\delta})$. +\end{itemize} +which satisfies all the requirements. + +\section*{Problem 4.10} + +\paragraph{1.} We use a simple counting argument. A set $S$ of at most $K$ +vertices is incident to exactly $D|S|$ edges. We can count these edges by +summing over all vertices in $N(S)$, the number of parents they have in $S$. If +$U$ is the set of unique neighbors (they have a single parent) and if we +denote by $p_S(u)$ the number of parents of $u$ in $S$, we have: +\begin{displaymath} + D|S| = \sum_{u\in N(S)} p_S(u) = |U| + \sum_{u\in N(S)\setminus U} p_S(u) +\end{displaymath} +using that $p_S(u)\geq 2$ for $u\in N(S)\setminus U$ and $|N(S)|\geq +(1-\eps)D|S|$ by expansion: +\begin{displaymath} + D|S| \geq |U| + 2\big((1-\eps)D|S|-|U|\big) +\end{displaymath} +solving for $|U|$ gives $|U|\geq (1-2\eps)D|S|$ as required. + +\paragraph{2.} We prove this by contradiction. Assume the result is false, then +there is a set $S$ of size $\leq K/2$ and $T$ disjoint from $S$ of size +$|S|/2$ such that the vertices in $T$ have $\delta D$ neighbors in $N(S)$. Then +the set $S\cup T$ has size $3|S|/2\leq 3/4K$ and thus has expansion +$(1-\eps)D$: +\begin{displaymath} + |N(S\cup T)| \geq (1-\eps)\frac{3}{2}D|S| +\end{displaymath} +on the other hand, we can upper bound $|N(S\cup T)|$: +\begin{displaymath} + |N(S\cup T)|\leq |N(S)| + |N(T)|\leq D|S| + (1-\delta)D|T| + = D\frac{3}{2}|S|-\frac{\delta}{2} D |S| +\end{displaymath} +combining the above two inequalities, we obtain: +\begin{displaymath} + \delta D|S|\leq 3\eps D|S| +\end{displaymath} +which is a contradiction, for example when $\delta = 4\eps$. + +\paragraph{3.} The algorithm to test membership is very simple: for query $x\in +N$, pick a random neighbor of $x$ in $G$ and output the bit assigned to this +neighbor (“1” means $x\in S$, “0” means $x\notin S$). The property $\Pi$ immediately +implies that the error probability is at most $\delta$. + +\end{document} -- cgit v1.2.3-70-g09d2