summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStratis Ioannidis <stratis@stratis-Latitude-E6320.(none)>2012-11-05 00:33:09 -0800
committerStratis Ioannidis <stratis@stratis-Latitude-E6320.(none)>2012-11-05 00:33:09 -0800
commit69117425b86e1b407f093daf2282155c9f4e0f8d (patch)
tree0241a806bccc16fa3f32d4a6884cc3ff3eac7319
parentcec9b298f548ce44fe5eaecf71c04d65c95de993 (diff)
downloadrecommendation-69117425b86e1b407f093daf2282155c9f4e0f8d.tar.gz
proof
-rw-r--r--main.tex58
1 files changed, 38 insertions, 20 deletions
diff --git a/main.tex b/main.tex
index 7faec7e..269bc7a 100644
--- a/main.tex
+++ b/main.tex
@@ -454,15 +454,15 @@ For all $\lambda\in[0,1]^{n},$
\end{lemma}
\begin{proof}
The bound $F_{\mathcal{N}}(\lambda)\leq L_{\mathcal{N}(\lambda)}$ follows by the concavity of the $\log\det$ function.
- We will prove that $\frac{1}{2}$ is a lower bound of the ratio $\partial_i
+ To show the lower bound,
+ we first prove that $\frac{1}{2}$ is a lower bound of the ratio $\partial_i
F(\lambda)/\partial_i L(\lambda)$, where
$\partial_i\, \cdot$ denotes the partial derivative with respect to the
$i$-th variable.
Let us start by computing the derivatives of $F$ and
$L$ with respect to the $i$-th component.
- For $F$, it suffices to look at the derivative of
- $P_\mathcal{N}^\lambda(S)$:
+ Observe that:
\begin{displaymath}
\partial_i P_\mathcal{N}^\lambda(S) = \left\{
\begin{aligned}
@@ -484,13 +484,21 @@ For all $\lambda\in[0,1]^{n},$
\begin{multline*}
\partial_i F =
\sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}}
- P_{\mathcal{N}\setminus\{i\}}^\lambda(S)V(S\cup\{i\})
- - \sum_{\substack{S\subseteq\mathcal{N}\\ i\in \mathcal{N}\setminus S}}
- P_{\mathcal{N}\setminus\{i\}}^\lambda(S)V(S)
+ P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\big(V(S\cup\{i\})
+ - V(S)\big)
\end{multline*}
- By the Sherman-Morisson formula, the marginal contribution of $i$ to
- $S$ is
- $ V(S\cup\{i\}) - V(S) = \frac{1}{2}\log\left(1 + \T{x_i} A(S)^{-1}x_i\right)$.
+ The marginal contribution of $i$ to
+ $S$ can be written as
+\begin{align*}
+V(S\cup \{i\}) - V(S)& = \frac{1}{2}\log\det(I_d
+ + \T{X_S}X_S + x_i\T{x_i})\\
+ & - \frac{1}{2}\log\det(I_d + \T{X_S}X_S)\\
+ & = \frac{1}{2}\log\det(I_d + x_i\T{x_i}(I_d +
+\T{X_S}X_S)^{-1})\\
+& = \frac{1}{2}\log(1 + \T{x_i}A(S)^{-1}x_i)
+\end{align*}
+where $A(S) =I_d+ \T{X_S}X_S$.
+% $ V(S\cup\{i\}) - V(S) = \frac{1}{2}\log\left(1 + \T{x_i} A(S)^{-1}x_i\right)$.
Using this,
\begin{displaymath}
\partial_i F(\lambda) = \frac{1}{2}
@@ -498,22 +506,32 @@ Using this,
P_{\mathcal{N}\setminus\{i\}}^\lambda(S)
\log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big)
\end{displaymath}
- where $A(S) =I_d+ \T{X_S}X_S$. The computation of the derivative of $L$ uses standard matrix
+ The computation of the derivative of $L$ uses standard matrix
calculus and gives:
\begin{displaymath}
\partial_i L(\lambda)
=\frac{1}{2} \T{x_i}\tilde{A}(\lambda)^{-1}x_i
\end{displaymath}
- where $\tilde{A}(\lambda) = I_d+\sum_{i\in \mathcal{N}}\lambda_ix_i\T{x_i}$. Using the following inequalities (where $A\geq B$ implies $A-B$ is positive semi-definite):
+ where $\tilde{A}(\lambda) = I_d+\sum_{i\in \mathcal{N}}\lambda_ix_i\T{x_i}$.
+
+For two symmetric matrices $A$ and $B$, we write $A\succ B$ ($A\succeq B$) if $A-B$ is positive definite (positive semi-definite).
+This order allows us to define the notion of a \emph{decreasing} as well as \emph{convex}
+matrix function, similarly to their real counterparts. In particular,
+ matrix inversion is decreasing and convex over symmetric
+positive definite matrices.
+In particular,
+\begin{gather*}
+ \forall S\subseteq\mathcal{N},\quad A(S)^{-1} \succeq A(S\cup\{i\})^{-1}
+\end{gather*}
+ Observe that:
\begin{gather*}
\forall S\subseteq\mathcal{N}\setminus\{i\},\quad
P_{\mathcal{N}\setminus\{i\}}^\lambda(S)\geq
P_{\mathcal{N}\setminus\{i\}}^\lambda(S\cup\{i\})\\
\forall S\subseteq\mathcal{N},\quad P_{\mathcal{N}\setminus\{i\}}^\lambda(S)
- \geq P_\mathcal{N}^\lambda(S)\\
- \forall S\subseteq\mathcal{N},\quad A(S)^{-1} \geq A(S\cup\{i\})^{-1}
+ \geq P_\mathcal{N}^\lambda(S)
\end{gather*}
- we get:
+ Hence:
\begin{align*}
\partial_i F(\lambda)
% & = \sum_{\substack{S\subseteq\mathcal{N}\\ i\in\mathcal{N}\setminus S}} P_\mathcal{N}^\lambda(S) \log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big)\\
@@ -530,9 +548,9 @@ Using this,
P_\mathcal{N}^\lambda(S)
\log\Big(1 + \T{x_i}A(S)^{-1}x_i\Big)
\end{align*}
- Using that $A(S)\geq I_d$ we get that:
+ Using that $A(S)\succeq I_d$ we get that:
\begin{displaymath}
- \T{x_i}A(S)^{-1}x_i \leq \norm{x_i}_2^2 = 1
+ \T{x_i}A(S)^{-1}x_i \leq \norm{x_i}_2^2 \leq 1
\end{displaymath}
Moreover:
\begin{displaymath}
@@ -553,10 +571,10 @@ Using this,
\geq \frac{1}{2}
\partial_i L(\lambda) \end{align*}
-This will be enough to conclude, by observing the following cases.
+Having bound the ratio between the partial derivatives, we now bound the ratio $F(\lambda)/L(\lambda)$ from below. Consider the following cases.
First, if the minimum of the ratio
$F(\lambda)/L(\lambda)$ is attained at a point interior to the hypercube, then it is
- a critical point; such a critical point is
+ a critical point, \emph{i.e.}, $\partial_i \big(F(\lambda)/L(\lambda)\big)=0$ for all $i\in \mathcal{N}$; hence, at such a critical point is
characterized by:
\begin{equation}\label{eq:lhopital}
\frac{F(\lambda)}{L(\lambda)}
@@ -578,9 +596,9 @@ This will be enough to conclude, by observing the following cases.
0 or 1), without loss of generality, we can assume that the minimum is
attained on the face where the $n$-th variable has been fixed
to 0 or 1. Then, either the minimum is attained at a point interior to the
- face or on a boundary of the face. In the first case, relation
+ face or on a boundary of the face. In the first sub-case, relation
\eqref{eq:lhopital} still characterizes the minimum for $i< n$.
- In the second case, by repeating the argument again by induction, we see
+ In the second sub-case, by repeating the argument again by induction, we see
that all is left to do is to show that the bound holds for the vertices of
the cube (the faces of dimension 1). The vertices are exactly the binary
points, for which we know that both relaxations are equal to the value