1 files changed, 34 insertions, 24 deletions
diff --git a/results.tex b/results.tex
index 6a2e684..d5fc5cb 100644
--- a/results.tex
+++ b/results.tex
@@ -223,7 +223,15 @@ fraction of the non-zero sets is a big problem
 
 \subsection{Average weight method}
 
-\section{Passive Optimization can be easier than Passive Learning}
+\section{Passive Optimization vs. Passive Learning}
+
+\subsection{Returning max of observations}
+
+This doesn't work. Give examples as to why! Remember that there are strong
+concentration results for submodular functions -> look at expected value of
+observed sets
+
+\subsection{Passive Optimization can be easier than Passive Learning}
 
 Recall the matroid construction from~\cite{balcan2011learning}:
 \begin{theorem}
@@ -235,39 +243,41 @@ Recall the matroid construction from~\cite{balcan2011learning}:
     \item For every $\cal{B} \subseteq\cal{A}$ and every $A \in \cal{A}$, we
       have:
       \begin{align*}
-        \text{rank}_{M_{\cal{B}}}(A) & = 8 \log k \ if A\in\cal{B} \\
-                                     & = |A| \ if A\in \cal{A}\backslash\cal{B}
+        \text{rank}_{M_{\cal{B}}}(A) & = 8 \log k \ if A\in{\cal B} \\
+                                     & = |A| \ if A\in {\cal A}\backslash{\cal B}
       \end{align*}
   \end{itemize}
 \end{theorem}
 
-Consider the following subset of the above family of matroids: ${\cal M}^1 =
-\{M_{\cal B} : {\cal B} \subseteq{\cal A} \wedge |{\cal A}\backslash{\cal B}|
-\geq \frac{1}{n}{\cal A}\}$ for $k = 2^{n^{1/6}}$. Let $D_u$ be the uniform
-distribution over all sets of ${\cal A}$. Consider an \emph{unknown} function
-$f$, corresponding to a rank function of one of the matroids from ${\cal M}^1$.
-After $n$ observations $O \equiv (S_i, f(S_i))_i$ for $S_i \sim D_u$, we will
+Consider the following subset of the above family of matroids: ${\cal
+M}^{\epsilon} = \{M_{\cal B} : {\cal B} \subseteq{\cal A} \wedge |{\cal
+A}\backslash{\cal B}| \geq \epsilon|{\cal A}|\}$ for $k = 2^{n^{1/6}}$.
+Consider an \emph{unknown} function $f$, corresponding to the rank function of
+one of the matroids $M_{\cal B}$ from ${\cal M}^{\epsilon}$.  Note that as long
+as we observe \emph{one} set from ${\cal A} \backslash {\cal B}$, we can
+optimize $f$ exactly! Indeed, the largest possible value for $f$ under
+cardinality constraint $n^{1/3}$ is $\max_{A\in 2^{[n]}} |A| = n^{1/3}$.
+
+One example of a distribution under which this occurs with probability at least
+a constant is $D_u$, the uniform distribution over all sets of ${\cal A}$.
+For $c>0$, after $n^c$ observations $O \equiv (S_i, f(S_i))_i$ for $S_i \sim D_u$, we will
 observe at least one element from $\cal{A}\backslash\cal{B}$ with constant
 probability:
 
-\begin{equation}
-  \nonumber \mathbb{P}(\bigcup_{S_i} S_i \in {\cal A}\backslash{\cal B}) \geq 1 -
-  (1 - \frac{1}{n})^{n} \geq 1 - \frac{1}{e}
+\begin{equation} \nonumber \mathbb{P}(\bigcup_{S_i} S_i \in {\cal
+A}\backslash{\cal B}) \geq 1 - (1 - \epsilon)^{n^c} \approx \epsilon n^c
 \end{equation}
 
-Note that as long as we observe \emph{one} set from ${\cal A} \backslash {\cal
-B}$, we can optimize $f$ exactly! However, it follows from the analysis
-of~\cite{balcan2011learning} that we cannot learn $f$ under any distribution,
-even with active value queries! Indeed, for any polynomially-sized set of
-observations, there exists a super-polynomially number of functions in ${\cal
-M^1}$ which coincide on this set of observations, but which take very different
-values outside of this set of observations: $8n^{1/6}$ for $A \in {\cal B}$ and
-$n^{1/3}$ for $A \in {\cal A}\backslash {\cal B}$.
+However, it follows from the analysis of~\cite{balcan2011learning} that we
+cannot learn $f$ under any distribution, even with active value queries!
+Indeed, for any polynomially-sized set of observations, there exists a
+super-polynomially number of functions in ${\cal M^1}$ which coincide on this
+set of observations, but which take very different values outside of this set
+of observations: $8n^{1/6}$ for $A \in {\cal B}$ and $n^{1/3}$ for $A \in {\cal
+A}\backslash {\cal B}$.
+
+{\bf TODO:} A cleaner simpler example would be nice.
 
-{\bf TODO:} Notice how the distribution we observe to optimize doesn't cover
-every possible set of size $k$. This is \emph{okay} because we are evaluating
-the learning procedure for this distribution. But a cleaner simpler example
-would be nice.
 
 
 \bibliography{optimize} \bibliographystyle{apalike}