summaryrefslogtreecommitdiffstats
path: root/ps2/main.tex
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2014-09-25 00:01:56 -0400
committerThibaut Horel <thibaut.horel@gmail.com>2014-09-25 00:01:56 -0400
commitb89c767264b3ea4d16c3a0e660fcc25013d1e195 (patch)
tree75f03f24ca2fba0dd97aab4b18646be68e8a7e39 /ps2/main.tex
parent19cb1a6d624db0b5ab903d983916a8116b3c0a9f (diff)
downloadcs224-b89c767264b3ea4d16c3a0e660fcc25013d1e195.tar.gz
[ps2] end. Just in time!
Diffstat (limited to 'ps2/main.tex')
-rw-r--r--ps2/main.tex42
1 files changed, 41 insertions, 1 deletions
diff --git a/ps2/main.tex b/ps2/main.tex
index 45638ab..c4a4ad3 100644
--- a/ps2/main.tex
+++ b/ps2/main.tex
@@ -39,6 +39,7 @@
\begin{document}
\maketitle
+Collaborators: Eric Balkanski and Jean Pouget-Abadie
\section*{Problem 1}
\paragraph{(a)} Let us consider the function $f$ defined for $x>-1$ by:
@@ -141,6 +142,14 @@ a global Chernoff bound.}:
where $C=\frac{1}{5e}$. The right-hand side can be bounded from above by
$2e^{-C'\mu\eps^2}$ where $C' = C\log(5/4)>0$. This concludes the proof.
+\paragraph{(c)} The case were the $p_i$ are not all the same can be reduced to
+the previous case by considering the random variables $X_i' = X - p_i$. These
+random variables have mean zero and variance $p_i(1-p_i)\leq p_i$. The previous
+analysis applies almost verbatim. The only difference being that
+$\E[X_{i_1}\ldots X_{i_{2l}}]$ can now be bounded from above by $p_{j_1}\ldots
+p_{j_k}$ where $j_1,\ldots j_k$ are the unique indices (they are repeated an
+even number of times so the moments of even orders are bounded by the
+variance).
\section*{Problem 2}
@@ -302,7 +311,38 @@ choosing $k=\left\lceil\frac{3}{4\eps^2}\right\rceil$ is enough to obtain:
\paragraph{(b)}
\begin{itemize*}
- \item In case we want a probability of success of $\delta$,
+ \item In case we want a probability of success of $\delta$, the previous
+ analysis shows that we can choose $k=\frac{1}{4\eps^2\delta}$.
+ \item If the $s_i$ were fully independent, then we would be able to apply
+ the Chernoff bound and obtain:
+ \begin{displaymath}
+ \Pr[|Z-J(A,B)|\geq \eps]\leq 2e^{-C\eps^2\frac{k}{p}}\leq2e^{-C\eps^2k}
+ \end{displaymath}
+ Hence, choosing
+ $k=\left\lceil\frac{1}{C\eps^2}\log\frac{2}{\delta}\right\rceil$ will
+ yield a probability of failure of at most $\delta$.
+ \item We note that if the median of $Z_1\ldots Z_t$ (denoted by $Z$) is at
+ distance more than $\eps$ of $J(A,B)$, this implies that at least half
+ of the $Z_i$'s are at distance at least $\eps$ of $J(A,B)$. This
+ implies that there is a subset of size $\frac{t}{2}$ of the $Z_i$'s at
+ distance at most $\eps$ of $J(A,B)$. Since there are $\binom{t}{t/2}$
+ such subsets, applying a union bound:
+ \begin{displaymath}
+ \Pr[|Z-J(A,B)|\geq \eps]\leq\binom{t}{\frac{t}{2}}\frac{1}{3^t}
+ \end{displaymath}
+ where $\frac{1}{3^t}$ is the probability that all the $Z_i$'s in a set
+ of size $\frac{t}{2}$ are at distance at least $\eps$ of $J(A,B)$ (the
+ $Z_i's$ are all independent). Using the standard bound on binomial
+ coefficients:
+ \begin{displaymath}
+ \Pr[|Z-J(A,B)|\geq \eps]\leq\left(\frac{\sqrt{2e}}{3}\right)^t
+ \end{displaymath}
+ This implies that choosing:
+ \begin{displaymath}
+ t = \left\lceil\frac{\log\frac{1}{\delta}}{\log\frac{\sqrt{2e}}{3}}\right\rceil
+ = O\left(\log\frac{1}{\delta}\right)
+ \end{displaymath}
+ will give us a probability of failure of at most $\delta$.
\end{itemize*}
\section*{Problem 4}