From f5a0b1da9a7ff3346ac3af4ad9c1dd0fd9e71f46 Mon Sep 17 00:00:00 2001 From: Jean Pouget-Abadie Date: Wed, 8 Apr 2015 19:49:02 -0400 Subject: added remark about l1-condition for additive functions --- results.tex | 56 ++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 8 deletions(-) (limited to 'results.tex') diff --git a/results.tex b/results.tex index 4968e4b..31801fc 100644 --- a/results.tex +++ b/results.tex @@ -280,10 +280,11 @@ adopt the notation $w \equiv f(\Omega)$. \subsubsection{Inverting the system}\label{subsec:invert_the_system} Suppose we have observed $n^c$ set-value pairs ${(S_j, f(S_j))}_{j \in N}$ with -$S_j \sim D$ where $n \equiv |\Omega|$. Consider the $n^c \times n$ matrix $A$ where for all $i$, the row -$A_i = \chi_{S_i}$, the indicator vector of set $S_i$. Let $B$ be the $n^c -\times 1$ vector such that $\forall i, b_j \equiv f(S_j)$. It is easy to see -that if $w$ is the $n \times 1$ corresponding weight vector for $f$ then: +$S_j \sim D$ where $n \equiv |\Omega|$. Consider the $n^c \times n$ matrix $A$ +where for all $i$, the row $A_i = \chi_{S_i}$, the indicator vector of set +$S_i$. Let $B$ be the $n^c \times 1$ vector such that $\forall i, b_j \equiv +f(S_j)$. It is easy to see that if $w$ is the $n \times 1$ corresponding weight +vector for $f$ then: \begin{displaymath} A w = B \end{displaymath} @@ -310,7 +311,9 @@ g(S^*) \geq \alpha f(S^*) \geq \alpha f(OPT) \geq where $OPT \in \arg \max_{S \in K} g(S)$. This can be taken one step further by considering a function $g$ such that there exists $\alpha, \beta >0$, an additive function $f$ and a bijective univariate function $\phi$, such that -$\forall S, \alpha \phi(f(S)) \leq g(S) \leq \beta \phi(f(S))$. In this case, we solve the system $A w = \phi^{-1}(B)$ and obtain once again an $\alpha/\beta$-approximation to the optimum of $g$. +$\forall S, \alpha \phi(f(S)) \leq g(S) \leq \beta \phi(f(S))$. In this case, we +solve the system $A w = \phi^{-1}(B)$ and obtain once again an +$\alpha/\beta$-approximation to the optimum of $g$. \begin{remark} Note that here $D-$optimizing $f$ is easy because $D-$learning $f$ is easy. We @@ -343,11 +346,12 @@ If $D$ is a product distribution such that $ \exists c > 0, \forall i, \mathbb{P}(i) \geq c$, it is easy to show that $\forall i \in \Omega,\ \hat W_i \rightarrow_{|O| \rightarrow +\infty} w_i$ -By using standard concentration bounds, we can hope to obtain within $poly(n, \frac{1}{\epsilon})$ observations: +By using standard concentration bounds, we can hope to obtain within $poly(n, +\frac{1}{\epsilon})$ observations: -%For every node -%$i$, we compute the \emph{average weight of every set containing element $i$}. Let $w_i$ be +%For every node $i$, we compute the \emph{average weight of every set containing +%element $i$}. Let $w_i$ be %the weight of element $i$, $w \equiv f(\Omega) = \sum_{i \in \Omega} w_i$ and %$p \equiv \frac{k}{n}$, then $$\forall i \in \Omega, \mathbb{E}_{S \sim %D_p}\left[f(S)| i \in S\right] = pw + (1 -p)w_i$$ @@ -569,6 +573,42 @@ guarantees are sufficient for optimization. First look at learning weights in a cover function. Maybe facility location? Sums of concave over modular are probably too hard because of the connection to neural networks. +\section{Which learning guarantees imply optimization?} + +Here, we consider the following question: which learning guarantees imply +optimization? For example, \cite{TODO} provides the following guarantee for +cover functions: + +\begin{theorem} +There exists an algorithm such that for any $\epsilon>0$ given random and +uniform examples of a coverage function $c$ outputs a hypothesis, which is also +a coverage function $h$, such that with probability $2/3$: $\mathbb{E}_{\cal +U}[|h(x) - c(x)|] \leq \epsilon$. The algorithm runs in time $\tilde {\cal O}(n) +\cdot \text{poly}(s/\epsilon)$ and uses $\log(n)\cdot \text{poly}(s/epsilon)$ +and examples, where $s = \min\{size(c), (1/\epsilon)^{\log(1/\epsilon)}\}$. +\end{theorem} + +We would like to understand to what extent this $\ell1$-bound allows us to +optimize the coverage function $c$ under cardinality constraints using the +hypothesis $h$. Let us consider the simpler case of an additive function, and +suppose we have a similar guarantee: $\mathbb{E}_{x \sim {\cal U}}[|h(x) - +c(x)|]$ where $\forall x, c(x) \equiv \sum_i w_i x_i$ and $h(x) \equiv \sum_i +\hat w_i x_i$. Can we find a bound on $\|w - \hat w\|_{\infty}$? + +As it turns out, yes. Let $\forall i, w_i - \hat w_i \equiv \alpha_i$ and let +$V(x) \equiv |h(x) - c(x)| = |\sum_{i} \alpha_i x_i |$. Consider the collection +of good sets $\cal G \equiv \{S : v(S) < 4\epsilon\}$. We claim that $|{\cal G}| +\geq \frac{3}{4}c\dot2^n$. Suppose the contrary, there is at least a quarter of +the sets which have value $v(S) > 4\epsilon$ such that $\mathbb{E}_{x \sim {\cal +U}}[|v(x)|] \geq \frac{1}{2^n}\sum_{S \in {\cal G}^c} |v(S)| > +\frac{1}{4}\cdot4\epsilon = \epsilon$ which is a contradiction. Consider element +$i$ such that $|\alpha_i| \equiv \|\alpha\|_{\infty}$. Consider the collection +of sets which contain $i$: ${\cal S}_i \equiv \{S : i \in S\}$. Notice that +$|{\cal S}_i| = |{\cal S}_i^c| = 2^{n-1}$. Therefore, $|{\cal G} \cap {\cal +S}_i^c| \geq \frac{1}{4}\cdot 2^n$. For all sets $S$ in ${\cal G} \cap {\cal +S}_i^c$, $v(S\cup\{j\}) \geq \alpha_i - 4\epsilon$. It follows that +$\mathbb{E}_{x \sim {\cal U}}[|v(x)|] \geq \frac{1}{4}(\alpha_j - 4\epsilon)$ +and therefore we have $\|w - \hat w\|_{\infty} \leq 8 \epsilon$. \bibliography{optimize} \bibliographystyle{apalike} -- cgit v1.2.3-70-g09d2