From 22e254972aaf4cbb08d4925695d344c2835d6aae Mon Sep 17 00:00:00 2001 From: Thibaut Horel Date: Tue, 14 Apr 2015 18:40:10 -0400 Subject: Model: definitions of optimizable and learnable --- paper/main.tex | 21 ++++++++---- paper/sections/negative.tex | 79 +++++++++++++++++++++++++++++---------------- 2 files changed, 66 insertions(+), 34 deletions(-) (limited to 'paper') diff --git a/paper/main.tex b/paper/main.tex index 0b33bdd..8c4f70b 100644 --- a/paper/main.tex +++ b/paper/main.tex @@ -1,27 +1,34 @@ \documentclass[10pt]{article} \usepackage{nips15submit_e} -\usepackage{amsmath, amssymb, amsthm, bbm, microtype} \usepackage[utf8x]{inputenc} -\usepackage[capitalize, noabbrev]{cleveref} \usepackage[pagebackref=false,breaklinks=true,colorlinks=true]{hyperref} +\usepackage[capitalize, noabbrev]{cleveref} +\usepackage{amsmath, amssymb, amsthm, bbm, microtype} \usepackage{natbib} \usepackage{url} \DeclareMathOperator{\E}{\mathbb{E}} \let\P\relax \DeclareMathOperator{\P}{\mathbb{P}} +\DeclareMathOperator*{\argmax}{arg\,max} \newcommand{\ex}[1]{\E\left[#1\right]} \newcommand{\prob}[1]{\P\left[#1\right]} \newcommand{\reals}{\mathbb{R}} \newcommand{\ints}{\mathbb{N}} \renewcommand{\O}{\mathcal{O}} +\newcommand{\eps}{\varepsilon} -\newtheorem{proposition}{Proposition} -\newtheorem{corollary}{Corollary} -\newtheorem{problem}{Problem} \newtheorem{theorem}{Theorem} -\newtheorem{claim}{Claim} -\newtheorem{remark}{Remark} +\newtheorem{proposition}[theorem]{Proposition} +\newtheorem{corollary}[theorem]{Corollary} + +\theoremstyle{definition} +\newtheorem{definition}[theorem]{Definition} + +\theoremstyle{remark} +\newtheorem*{claim}{Claim} +\newtheorem*{remark}{Remark} +\newtheorem*{problem}{Problem} \title{Optimizing Submodular Functions in the Face of Uncertainty} \author{} diff --git a/paper/sections/negative.tex b/paper/sections/negative.tex index 555aa1d..51ea1a3 100644 --- a/paper/sections/negative.tex +++ b/paper/sections/negative.tex @@ -1,32 +1,57 @@ \subsection{Model} -Let $\Omega$ be the universe of elements and $f$ a function defined on subsets -of $\Omega$: $f : S \in 2^{[\Omega]} \mapsto f(S) \in \mathbb{R}$. Let $K$ be a -collection of sets of $2^{[\Omega]}$, which we call \emph{constraints}. Let -$S^*_K$ be any solution to $\max_{S \in K} f(S)$, which we will also denote by -$S^*$ when there is no ambiguity. Let $L$ be the problem size, which is often -(but not always) equal to $|\Omega|$. - -In general, we say we can efficiently optimize a function $f$ under constraint -$K$ when we have a polynomial-time algorithm making adaptive value queries to -$f$,which returns a set $S$ such that $S \in K$ and $f(S) \geq \alpha f(S^*)$ -with high probability and $\alpha$ an absolute constant. - -Here, we consider the scenario where we cannot make adaptive value queries, and -in fact, where we cannot make queries at all! Instead, we suppose that we -observe a polynomial number of set-value pairs $(S, f(S))$ where $S$ is taken -from a known distribution $D$. We say we can efficiently \emph{passively -optimize} $f$ under distribution $D$ or $D-$optimize $f$ under constraints $K$ -when, after observing ${\cal O}(L^c)$ set-value pairs from $D$ where $c > 0$ is -an absolute constant, we can return a set $S$ such that $S \in K$ and $f(S) -\geq \alpha f(S^*)$ with high probability and $\alpha$ an absolute constant. - -In the case of \emph{passive} observations of set-value pairs under a -distribution $D$ for a function $f$, recent research has focused on whether we -can efficiently and approximately \emph{learn} $f$. This was formalized in the -PMAC model from \cite{balcan2011learning}. When thinking about passive -optimization, it is necessary to understand the link between being able to - $D-PMAC$ learn $f$ and being able to $D-$optimize $f$. +We consider a submodular function $f$ defined over a ground set $N$ of size +$n$. We assume that $f$ is non-negative and normalized: +\begin{displaymath} + f:2^N\to \reals^+,\quad f(\emptyset) = 0 +\end{displaymath} + +Given a subset $K$ of $2^N$ representing our constraints (e.g. $K$ is an +independent system), we are approximating $S^*$ solution to the following +optimization problem: +\begin{displaymath} + S^*\in\argmax_{S\in K} f(S) +\end{displaymath} + +Unlike the standard submodular optimization framework, we do not assume access +to a value query oracle for $f$. Denoting by $U_K$ the uniform distribution +over $K$, we assume that we are given access to polynomially many independent +samples drawn from $U_K$, this is the only thing which we observe about $f$. +The goal is to understand whether or not it is possible to optimize $f$ given +these observations. This leads to the following definition. + +\begin{definition} + Let $\mathcal{F}$ be a family of functions $2^N\to\reals^+$. For + $\alpha:\reals^+\to\reals^+_*$ and constraints $K$, we say that + $\mathcal{F}$ is $(\alpha, K)$-optimizable iff for any $f\in\mathcal{F}$ + and $\eps>0$, there exists $\ell = \mathrm{poly}(n,\frac{1}{\eps})$ and an + algorithm $A$ running in time $\mathrm{poly}(\ell)$ such that given random + inputs $\mathcal{S} = \{(S_i, f(S_i)\}_{1\leq i\leq \ell}$, where the + variables $S_1,\dots, S_\ell$ are drawn independently from $U_K$, $A$ + outputs a set $S$ such that: + \begin{displaymath} + \P_\mathcal{S}\big[f(S)\geq \alpha(\eps) f(S^*)\big]\geq 1-\eps + \end{displaymath} +\end{definition} + +One of the goals of the present work is to compare this notion $(\alpha, +K)$-optimizability, to the following notion of learnability. + +\begin{definition} + \label{def:learnable} + Let $\mathcal{F}$ be a family of functions $2^N\to\reals^+$. For + $\alpha:\reals^+\to\reals^+_*$ and constraints $K$, we say that + $\mathcal{F}$ is $(\alpha, K)$-learnable iff for any $f\in\mathcal{F}$, + $\eps>0$ and $\delta>0$, there exists $\ell + = \mathrm{poly}(n,\frac{1}{\eps},\frac{1}{\delta})$ and an + algorithm $A$ such that given random inputs $\mathcal{S} = \{(S_i, + f(S_i)\}_{1\leq i\leq \ell}$, where the variables $S_1,\dots, S_\ell$ are + drawn independently from $U_K$, $A$ outputs a function $\tilde{f}$ such that: + \begin{displaymath} + \P_\mathcal{S}\left[\P_{S\sim U_K} [f(S)\geq \tilde{f}(S)\geq + \alpha(\eps) f(S)]\geq 1-\eps\right]\geq 1-\delta + \end{displaymath} +\end{definition} \subsection{Learnable $\nRightarrow$ Optimizable} -- cgit v1.2.3-70-g09d2