2 files changed, 63 insertions, 31 deletions
diff --git a/paper/main.tex b/paper/main.tex
index 0b33bdd..8c4f70b 100644
--- a/paper/main.tex
+++ b/paper/main.tex
@@ -1,27 +1,34 @@
 \documentclass[10pt]{article}
 \usepackage{nips15submit_e}
-\usepackage{amsmath, amssymb, amsthm, bbm, microtype}
 \usepackage[utf8x]{inputenc}
-\usepackage[capitalize, noabbrev]{cleveref}
 \usepackage[pagebackref=false,breaklinks=true,colorlinks=true]{hyperref}
+\usepackage[capitalize, noabbrev]{cleveref}
+\usepackage{amsmath, amssymb, amsthm, bbm, microtype}
 \usepackage{natbib}
 \usepackage{url}
 
 \DeclareMathOperator{\E}{\mathbb{E}}
 \let\P\relax
 \DeclareMathOperator{\P}{\mathbb{P}}
+\DeclareMathOperator*{\argmax}{arg\,max}
 \newcommand{\ex}[1]{\E\left[#1\right]}
 \newcommand{\prob}[1]{\P\left[#1\right]}
 \newcommand{\reals}{\mathbb{R}}
 \newcommand{\ints}{\mathbb{N}}
 \renewcommand{\O}{\mathcal{O}}
+\newcommand{\eps}{\varepsilon}
 
-\newtheorem{proposition}{Proposition}
-\newtheorem{corollary}{Corollary}
-\newtheorem{problem}{Problem}
 \newtheorem{theorem}{Theorem}
-\newtheorem{claim}{Claim}
-\newtheorem{remark}{Remark}
+\newtheorem{proposition}[theorem]{Proposition}
+\newtheorem{corollary}[theorem]{Corollary}
+
+\theoremstyle{definition}
+\newtheorem{definition}[theorem]{Definition}
+
+\theoremstyle{remark}
+\newtheorem*{claim}{Claim}
+\newtheorem*{remark}{Remark}
+\newtheorem*{problem}{Problem}
 
 \title{Optimizing Submodular Functions in the Face of Uncertainty}
 \author{}
diff --git a/paper/sections/negative.tex b/paper/sections/negative.tex
index 555aa1d..51ea1a3 100644
--- a/paper/sections/negative.tex
+++ b/paper/sections/negative.tex
@@ -1,32 +1,57 @@
 \subsection{Model}
 
-Let $\Omega$ be the universe of elements and $f$ a function defined on subsets
-of $\Omega$: $f : S \in 2^{[\Omega]} \mapsto f(S) \in \mathbb{R}$. Let $K$ be a
-collection of sets of $2^{[\Omega]}$, which we call \emph{constraints}. Let
-$S^*_K$ be any solution to $\max_{S \in K} f(S)$, which we will also denote by
-$S^*$ when there is no ambiguity. Let $L$ be the problem size, which is often
-(but not always) equal to $|\Omega|$.
+We consider a submodular function $f$ defined over a ground set $N$ of size
+$n$. We assume that $f$ is non-negative and normalized:
+\begin{displaymath}
+    f:2^N\to \reals^+,\quad f(\emptyset) = 0
+\end{displaymath}
+
+Given a subset $K$ of $2^N$ representing our constraints (e.g. $K$ is an
+independent system), we are approximating $S^*$ solution to the following
+optimization problem:
+\begin{displaymath}
+    S^*\in\argmax_{S\in K} f(S)
+\end{displaymath}
+
+Unlike the standard submodular optimization framework, we do not assume access
+to a value query oracle for $f$. Denoting by $U_K$ the uniform distribution
+over $K$, we assume that we are given access to polynomially many independent
+samples drawn from $U_K$, this is the only thing which we observe about $f$.
+The goal is to understand whether or not it is possible to optimize $f$ given
+these observations. This leads to the following definition.
 
-In general, we say we can efficiently optimize a function $f$ under constraint
-$K$ when we have a polynomial-time algorithm making adaptive value queries to
-$f$,which returns a set $S$ such that $S \in K$ and $f(S) \geq \alpha f(S^*)$
-with high probability and $\alpha$ an absolute constant.
+\begin{definition}
+    Let $\mathcal{F}$ be a family of functions $2^N\to\reals^+$. For
+    $\alpha:\reals^+\to\reals^+_*$ and constraints $K$, we say that
+    $\mathcal{F}$ is $(\alpha, K)$-optimizable iff for any $f\in\mathcal{F}$
+    and $\eps>0$, there exists $\ell = \mathrm{poly}(n,\frac{1}{\eps})$ and an
+    algorithm $A$ running in time $\mathrm{poly}(\ell)$ such that given random
+    inputs $\mathcal{S} = \{(S_i, f(S_i)\}_{1\leq i\leq \ell}$, where the
+    variables $S_1,\dots, S_\ell$ are drawn independently from $U_K$, $A$
+    outputs a set $S$ such that:
+    \begin{displaymath}
+        \P_\mathcal{S}\big[f(S)\geq \alpha(\eps) f(S^*)\big]\geq 1-\eps
+    \end{displaymath}
+\end{definition}
 
-Here, we consider the scenario where we cannot make adaptive value queries, and
-in fact, where we cannot make queries at all! Instead, we suppose that we
-observe a polynomial number of set-value pairs $(S, f(S))$ where $S$ is taken
-from a known distribution $D$. We say we can efficiently \emph{passively
-optimize} $f$ under distribution $D$ or $D-$optimize $f$ under constraints $K$
-when, after observing ${\cal O}(L^c)$ set-value pairs from $D$ where $c > 0$ is
-an absolute constant, we can return a set $S$ such that $S \in K$ and $f(S)
-\geq \alpha f(S^*)$ with high probability and $\alpha$ an absolute constant.
+One of the goals of the present work is to compare this notion $(\alpha,
+K)$-optimizability, to the following notion of learnability.
 
-In the case of \emph{passive} observations of set-value pairs under a
-distribution $D$ for a function $f$, recent research has focused on whether we
-can efficiently and approximately \emph{learn} $f$. This was formalized in the
-PMAC model from \cite{balcan2011learning}. When thinking about passive
-optimization, it is necessary to understand the link between being able to
- $D-PMAC$ learn $f$ and being able to $D-$optimize $f$.
+\begin{definition}
+    \label{def:learnable}
+    Let $\mathcal{F}$ be a family of functions $2^N\to\reals^+$. For
+    $\alpha:\reals^+\to\reals^+_*$  and constraints $K$, we say that
+    $\mathcal{F}$ is $(\alpha, K)$-learnable iff for any $f\in\mathcal{F}$,
+    $\eps>0$ and $\delta>0$, there exists $\ell
+    = \mathrm{poly}(n,\frac{1}{\eps},\frac{1}{\delta})$ and an
+    algorithm $A$ such that given random inputs $\mathcal{S} = \{(S_i,
+    f(S_i)\}_{1\leq i\leq \ell}$, where the variables $S_1,\dots, S_\ell$ are
+    drawn independently from $U_K$, $A$ outputs a function $\tilde{f}$ such that:
+    \begin{displaymath}
+        \P_\mathcal{S}\left[\P_{S\sim U_K} [f(S)\geq \tilde{f}(S)\geq
+        \alpha(\eps) f(S)]\geq 1-\eps\right]\geq 1-\delta
+    \end{displaymath}
+\end{definition}
 
 \subsection{Learnable $\nRightarrow$ Optimizable}