From f5a0b1da9a7ff3346ac3af4ad9c1dd0fd9e71f46 Mon Sep 17 00:00:00 2001
From: Jean Pouget-Abadie <jean.pougetabadie@gmail.com>
Date: Wed, 8 Apr 2015 19:49:02 -0400
Subject:  added remark about l1-condition for additive functions

---
 results.tex | 56 ++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 48 insertions(+), 8 deletions(-)

(limited to 'results.tex')

diff --git a/results.tex b/results.tex
index 4968e4b..31801fc 100644
--- a/results.tex
+++ b/results.tex
@@ -280,10 +280,11 @@ adopt the notation $w \equiv f(\Omega)$.
 \subsubsection{Inverting the system}\label{subsec:invert_the_system}
 
 Suppose we have observed $n^c$ set-value pairs ${(S_j, f(S_j))}_{j \in N}$ with
-$S_j \sim D$ where $n \equiv |\Omega|$. Consider the $n^c \times n$ matrix $A$ where for all $i$, the row
-$A_i = \chi_{S_i}$, the indicator vector of set $S_i$. Let $B$ be the $n^c
-\times 1$ vector such that $\forall i, b_j \equiv f(S_j)$. It is easy to see
-that if $w$ is the $n \times 1$ corresponding weight vector for $f$ then:
+$S_j \sim D$ where $n \equiv |\Omega|$. Consider the $n^c \times n$ matrix $A$
+where for all $i$, the row $A_i = \chi_{S_i}$, the indicator vector of set
+$S_i$. Let $B$ be the $n^c \times 1$ vector such that $\forall i, b_j \equiv
+f(S_j)$. It is easy to see that if $w$ is the $n \times 1$ corresponding weight
+vector for $f$ then:
 \begin{displaymath}
   A w = B
 \end{displaymath}
@@ -310,7 +311,9 @@ g(S^*) \geq \alpha f(S^*) \geq \alpha f(OPT) \geq
 where $OPT \in \arg \max_{S \in K} g(S)$. This can be taken one step further by
 considering a function $g$ such that there exists $\alpha, \beta >0$, an
 additive function $f$ and a bijective univariate function $\phi$, such that
-$\forall S, \alpha \phi(f(S)) \leq g(S) \leq \beta \phi(f(S))$. In this case, we solve the system $A w = \phi^{-1}(B)$ and obtain once again an $\alpha/\beta$-approximation to the optimum of $g$.
+$\forall S, \alpha \phi(f(S)) \leq g(S) \leq \beta \phi(f(S))$. In this case, we
+solve the system $A w = \phi^{-1}(B)$ and obtain once again an
+$\alpha/\beta$-approximation to the optimum of $g$.
 
 \begin{remark}
 Note that here $D-$optimizing $f$ is easy because $D-$learning $f$ is easy. We
@@ -343,11 +346,12 @@ If $D$ is a product distribution such that $ \exists c > 0, \forall i,
 \mathbb{P}(i) \geq c$, it is
 easy to show that $\forall i \in
 \Omega,\ \hat W_i \rightarrow_{|O| \rightarrow +\infty} w_i$
-By using standard concentration bounds, we can hope to obtain within $poly(n, \frac{1}{\epsilon})$ observations:
+By using standard concentration bounds, we can hope to obtain within $poly(n,
+\frac{1}{\epsilon})$ observations:
 
 
-%For every node
-%$i$, we compute the \emph{average weight of every set containing element $i$}. Let $w_i$ be
+%For every node $i$, we compute the \emph{average weight of every set containing
+%element $i$}.  Let $w_i$ be
 %the weight of element $i$, $w \equiv f(\Omega) = \sum_{i \in \Omega} w_i$ and
 %$p \equiv \frac{k}{n}$, then $$\forall i \in \Omega, \mathbb{E}_{S \sim
 %D_p}\left[f(S)| i \in S\right] = pw + (1 -p)w_i$$
@@ -569,6 +573,42 @@ guarantees are sufficient for optimization. First look at learning weights in
 a cover function. Maybe facility location? Sums of concave over modular are
 probably too hard because of the connection to neural networks.
 
+\section{Which learning guarantees imply optimization?}
+
+Here, we consider the following question: which learning guarantees imply
+optimization? For example, \cite{TODO} provides the following guarantee for
+cover functions:
+
+\begin{theorem}
+There exists an algorithm such that for any $\epsilon>0$ given random and
+uniform examples of a coverage function $c$ outputs a hypothesis, which is also
+a coverage function $h$, such that with probability $2/3$: $\mathbb{E}_{\cal
+U}[|h(x) - c(x)|] \leq \epsilon$. The algorithm runs in time $\tilde {\cal O}(n)
+\cdot \text{poly}(s/\epsilon)$ and uses $\log(n)\cdot \text{poly}(s/epsilon)$
+and examples, where $s = \min\{size(c), (1/\epsilon)^{\log(1/\epsilon)}\}$.
+\end{theorem}
+
+We would like to understand to what extent this $\ell1$-bound allows us to
+optimize the coverage function $c$ under cardinality constraints using the
+hypothesis $h$. Let us consider the simpler case of an additive function, and
+suppose we have a similar guarantee: $\mathbb{E}_{x \sim {\cal U}}[|h(x) -
+c(x)|]$ where $\forall x, c(x) \equiv \sum_i w_i x_i$ and $h(x) \equiv \sum_i
+\hat w_i x_i$. Can we find a bound on $\|w - \hat w\|_{\infty}$?
+
+As it turns out, yes. Let $\forall i, w_i - \hat w_i \equiv \alpha_i$ and let
+$V(x) \equiv |h(x) - c(x)| = |\sum_{i} \alpha_i x_i |$. Consider the collection
+of good sets $\cal G \equiv \{S : v(S) < 4\epsilon\}$. We claim that $|{\cal G}|
+\geq \frac{3}{4}c\dot2^n$. Suppose the contrary, there is at least a quarter of
+the sets which have value $v(S) > 4\epsilon$ such that $\mathbb{E}_{x \sim {\cal
+U}}[|v(x)|] \geq \frac{1}{2^n}\sum_{S \in {\cal G}^c} |v(S)| >
+\frac{1}{4}\cdot4\epsilon = \epsilon$ which is a contradiction. Consider element
+$i$ such that $|\alpha_i| \equiv \|\alpha\|_{\infty}$. Consider the collection
+of sets which contain $i$: ${\cal S}_i \equiv \{S : i \in S\}$.  Notice that
+$|{\cal S}_i| = |{\cal S}_i^c| = 2^{n-1}$. Therefore, $|{\cal G} \cap {\cal
+S}_i^c| \geq \frac{1}{4}\cdot 2^n$. For all sets $S$ in ${\cal G} \cap {\cal
+S}_i^c$, $v(S\cup\{j\}) \geq \alpha_i - 4\epsilon$. It follows that
+$\mathbb{E}_{x \sim {\cal U}}[|v(x)|] \geq \frac{1}{4}(\alpha_j - 4\epsilon)$
+and therefore we have $\|w - \hat w\|_{\infty} \leq 8 \epsilon$.
 
 \bibliography{optimize} \bibliographystyle{apalike}
 
-- 
cgit v1.2.3-70-g09d2