aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjeanpouget-abadie <jean.pougetabadie@gmail.com>2015-03-17 13:15:03 -0400
committerjeanpouget-abadie <jean.pougetabadie@gmail.com>2015-03-17 13:15:03 -0400
commit26b8d4e2c8239875a6dd0991ecd01cb8defd858f (patch)
treee46455c3c7afb6b275396f695fcd348e101e6861
parentbb86707991d89cb49c2dfcc2c100d3df0d3620c9 (diff)
downloadlearn-optimize-26b8d4e2c8239875a6dd0991ecd01cb8defd858f.tar.gz
average weight method sort of added
-rw-r--r--results.tex45
1 files changed, 43 insertions, 2 deletions
diff --git a/results.tex b/results.tex
index 648f8f6..d46a771 100644
--- a/results.tex
+++ b/results.tex
@@ -49,7 +49,7 @@ It follows that for any vector $x \in \mathbb{R}^n$, $|\frac{1}{N}\|A x\|^2_2 -\
Note that this is clearly not optimal. For example, note that for $k=1$, the solution to the coupon collector problem states that only ${\cal O}(n\log n)$ rows are sufficient before the matrix $A$ has full row rank with high probability, and not ${\cal O}(n^2)$ as stated by Corollary~\ref{cor:number_of_rows_needed}.
-\section{Optimizing Additive Functions}
+\section{Passive Optimization of Additive Functions}
Let $\Omega$ be a universe of elements of size $n$. For $k \in [n]$, let $D_k$ be the uniform probability distribution over all sets of size $k$, and let $D_p$ be the product probability distribution over sets such that $\forall j \in \Omega, \mathbb{P}_{S \sim D_p}(j \in S) = p$. Note that $D_{k}$ and $D_{p}$ for $p \equiv \frac{k}{n}$ are very similar, and will be considered almost interchangeably throughout the following notes. Consider the following problem:
@@ -57,7 +57,48 @@ Let $\Omega$ be a universe of elements of size $n$. For $k \in [n]$, let $D_k$ b
Let $f = (w_i)_{i \in \Omega}$ be an unknown additive function: $f(S) \equiv \sum_{u \in S} w_u$. How many (set, value) pairs $(S, f(S))$ where $S \sim D_k$ must be observed until we can optimize $f$ exactly or to a constant approximation factor under cardinality constraint $k$?
\end{problem}
-Note that for observations $(S_i, f(S_i))_{i \in N}$ we can write the system $A w = b$, where each row of $A_i$ corresponds to the indicator vector of the set $S_i$ and $b_i \equiv f(S_i)$. From Corollary~\ref{cor:number_of_rows_needed}, it is easy to see that $\Omega(n^2)$ rows are sufficient for the system $Aw =b$ to be non-degenerate. Therefore, with $\Omega(n^2)$ observations from $D_{p}$ where $p \equiv \frac{k}{n}$, we can optimize any additive function exactly.
+\subsection{Inverting the system}
+
+Note that for observations $(S_j, f(S_j))_{j \in N}$ we can write the system $A w = b$, where each row of $A_j$ corresponds to the indicator vector of the set $S_j$ and $b_j \equiv f(S_j)$. From Corollary~\ref{cor:number_of_rows_needed}, it is easy to see that $(C_K+1)^2 \frac{n^3}{k(n - k)}$ rows are sufficient for $A$ to have full row rank with probability $1-e^{-c_k n}$. If $A$ has full row rank, then it is easy to invert the system in polynomial time, and both learn and optimize $f$ for any cardinality constraint.
+
+\begin{proposition}
+Assume that $N$ pairs of set-value $(S_j, f(S_j))_{j \in N}$ are observed, where $S_j \sim D_p$ and $p \equiv \frac{k}{n}$. If $N > (C_k +1)^2 \frac{n}{\sigma^2}$, then we can learn $f$ exactly, and therefore optimize it under any cardinality constraint, with probability $1-e^{-cn}$.
+\end{proposition}
+
+\subsection{Average weight method}
+
+Another method is to compute for every node $i$ the \emph{average weight of every set containing element $i$}. Let $w_i$ be the weight of element $i$, $w \equiv f(\Omega) = \sum_{i \in \Omega} w_i$ and $p \equiv \frac{k}{n}$, then
+$$\forall i \in \Omega, \mathbb{E}_{S \sim D_p}\left[f(S)| i \in S\right] = pw + (1 -p)w_i$$
+
+Note that the average weight of every set containing element $i$ preserves the ranking of the weights of the elements. For observations $(S_j, f(S_j))_{j \in N}$ and for $N_i \equiv |\{S : i \in S\}|$, we define the following estimator:
+
+\begin{equation}
+\forall i \in \Omega, w_i^{N_i} \equiv \frac{1}{N_i}\sum_{S | i \in S} \frac{f(S) - pw}{1-p}
+\end{equation}
+
+As shown above, $w_i^{N_i} \rightarrow w_i$ as $N_i \rightarrow +\infty$. We can obtain a concentration bound of $w_i^{N_i}$ around $w_i$, using Hoeffding's lemma:
+
+\begin{equation}
+\mathbb{P}\left(\middle|w_i^{N_i} - w_i \middle| \geq \epsilon w_i \right) \leq 2e^{-2(1-p)N_i\frac{\epsilon^2 w_i^2}{w^2}}
+\end{equation}
+
+For $N_i$ sufficiently large for each element $i$, we have $\forall i \in \Omega, (1-\epsilon) w_i \leq w_i^{N_i} \leq (1 + \epsilon) w_i$. Under this assumption, if we choose the $k$ elements with largest estimated weight $W_i^{N_i}$, we obtain a $\frac{1-\epsilon}{1+\epsilon}$-approximation to OPT, where OPT is the value of the maximum weight set of $k$ elements for the function $f$. To ensure that $N_i$ is sufficiently large for each element, we note that $\mathbb{E}(N_i) = pN$ and use a Chernoff bound coupled with a classic union bound:
+
+\begin{equation}
+\mathbb{P}\left(\bigcup_{i \in \Omega} \left[N_i \leq \frac{pN}{2}\right]\right) \leq \sum_{i\in \Omega} \mathbb{P}\left(N_i \leq \frac{pN}{2}\right) \leq n e^{-\frac{pN}{8}}
+\end{equation}
+
+As such, for $C > 0$ and $N \geq (C+1)\frac{8}{p}\log n$, we have $\forall i \in \Omega, N_i \geq \frac{pN}{2}$ with probability at least $1-\frac{1}{n^C}$
+
+\begin{proposition}
+Assume that $N$ pairs of set-value $(S_j, f(S_j))_{j \in N}$ are observed, where $S_j \sim D_p$ and $p \equiv \frac{k}{n}$. If $N > (C_k +1)^2 \frac{n}{\sigma^2}$, then we can $\epsilon$-learn $f$ and optimize it to a $(1+\epsilon)/(1-\epsilon)$ factor for any cardinality constraint with probability $XXX$.
+\end{proposition}
+
+\section{Passive Optimization of Submodular functions}
+
+\subsection{Inverting the system}
+
+\subsection{Average weight method}
\bibliography{optimize}
\bibliographystyle{plain}