summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--notes.bib19
-rw-r--r--notes.tex67
-rw-r--r--papers/lse.pdfbin0 -> 1966251 bytes
3 files changed, 78 insertions, 8 deletions
diff --git a/notes.bib b/notes.bib
index 785cf1e..db41d54 100644
--- a/notes.bib
+++ b/notes.bib
@@ -149,3 +149,22 @@
year={1950},
publisher={JSTOR}
}
+
+@article{lse,
+ jstor_articletype = {research-article},
+ title = {Least Squares and Grouping Method Estimators in the Errors in Variables Model},
+ author = {Richardson, David H. and Wu, De-Min},
+ journal = {Journal of the American Statistical Association},
+ jstor_issuetitle = {},
+ volume = {65},
+ number = {330},
+ jstor_formatteddate = {Jun., 1970},
+ pages = {pp. 724-748},
+ url = {http://www.jstor.org/stable/2284583},
+ ISSN = {01621459},
+ abstract = {The probability density function of the least squares estimator of the slope coefficient in the errors in variables model is presented. It is shown how the bias and mean-square error of the least squares estimator b depend on the parameters of the model. In particular, for a given sample size, b converges to the true parameter as one of the distribution parameters increased indefinitely. The analysis is supplemented with numerical computations of the relative bias and mean-square error. The distribution function of the grouping method estimator b̄ has the same form as that of b. The biases and mean-square errors of b and b̄ are compared. For the case of zero within-group variance, the use of b̄ always reduces the magnitude of the relative bias and generally reduces the mean-square error. For large values of the within-group variance, use of b̄ may result in an increase in mean-square error.},
+ language = {English},
+ year = {1970},
+ publisher = {American Statistical Association},
+ copyright = {Copyright © 1970 American Statistical Association},
+ }
diff --git a/notes.tex b/notes.tex
index 25ca121..c84f989 100644
--- a/notes.tex
+++ b/notes.tex
@@ -1,6 +1,7 @@
\documentclass{article}
\usepackage[utf8]{inputenc}
\usepackage{amsmath,amsthm,amsfonts}
+\usepackage{comment}
\newtheorem{lemma}{Lemma}
\newcommand{\var}{\mathop{\mathrm{Var}}}
\newcommand{\condexp}[2]{\mathop{\mathbb{E}}\left[#1|#2\right]}
@@ -9,6 +10,7 @@
\newcommand{\tr}[1]{#1^*}
\newcommand{\ip}[2]{\langle #1, #2 \rangle}
\newcommand{\mse}{\mathop{\mathrm{MSE}}}
+\newcommand{\trace}{\mathop{\mathrm{tr}}}
\begin{document}
\section{Understanding the recommender system}
@@ -16,7 +18,7 @@
\subsection{General problem}
We already have a database $D_n$ of $n$ users. For each user $i$ we
-have a set of explanatory variables (features), this is a vector
+have a set of $k$ explanatory variables (features), this is a vector
$x_i$.
The problem is the following: we are about to start an experiment where for
@@ -202,7 +204,62 @@ inequality becomes:
which is trivially true (a more direct proof for the one-dimensional
case is of course possible).
-\subparagraph{Useless attempt}
+In order to understand more precisely under which assumptions the
+above inequality could become true, it is convenient to look at it
+from the quadratic form perspective. Indeed this inequality can be
+rewritten as:
+
+\begin{equation}\label{eq-inequality}
+\tr x B x \geq 0
+\end{equation}
+
+with:
+\begin{align*}
+ B = &\, \left(1+\norm{x_0}^2\right)\left(1+\norm{z}^2\right)\ip{x_0}{z}
+ (x_0\tr z+z\tr x_0)\\
+& -\ip{x_0}{z}^2\Big( \left(1+\norm{x_0}^2\right)z\tr z + \left(1+\norm{z}^2\right)x_0\tr z\big)
+\end{align*}
+
+This quadratic form is degenerate, its kernel is $x_0^{\bot}\cap
+z^\bot$ which is of dimension $k-2$.
+
+\paragraph{Case when $\norm{x_0}=\norm{z}=1$} In this case, it suffices to study the quadratic form given by matrix
+$B'$ with:
+\begin{displaymath}
+ B' = 2\ip{x_0}{z}(x_0\tr z+z\tr x_0) -\ip{x_0}{z}^2(z\tr z + x_0\tr x_0)
+\end{displaymath}
+
+Writing $a = \ip{x_0}{z}$, the two non-zero eigenvalues are:
+\begin{align*}
+ \lambda_1 & = -2a^3 + 2a^2 + 4a = -2a(a+1)(a-2)\\
+ \lambda_2 & = 2a^3 + 2a^2 - 4a = 2a(a-1)(a+2)
+\end{align*}
+
+which are respectively associated with the eigenvectors:
+\begin{align*}
+ x_1 & = x_0+z\\
+ x_2 & = x_0 - z
+\end{align*}
+
+By the Cauchy-Schwarz inequality, $a\in]-1,1[$, and the two
+eigenvalues are of opposite sign on this interval. Thus inequality
+\eqref{eq-inequality} does not hold for all $x$.
+
+\paragraph{In expectation?} If we assume a prior knowledge on the
+distribution of $x$, writing $\Sigma$ the variance-covariance matrix
+of $x$ and $\mu$ its mean vector, then taking the expectation of
+\eqref{eq-inequality} we get:
+\begin{displaymath}
+ \expt{\tr x B' x} = \trace(B'\Sigma) + \tr\mu B'\mu
+\end{displaymath}
+
+\nocite{shapley,inverse,recommendation,cook,shapleyor,subsetselection11,lse}
+\bibliographystyle{plain}
+\bibliography{notes.bib}
+
+\section*{Appendix}
+
+\paragraph{Previous attempt at taming the submodularity}
The inequality only depends on the projection of $x$ on the plane
spanned by $x_0$ and $z$. Writing
@@ -237,11 +294,5 @@ This inequality will be true for all $\lambda$ and $\mu$ if and only
if the quadratic form is positive semidefinite. As its trace is
positive, this is equivalent to the positiveness of its determinant.
-\subparagraph{General derivation}
-
-
-\nocite{shapley,inverse,recommendation,cook,shapleyor,subsetselection11}
-\bibliographystyle{plain}
-\bibliography{notes.bib}
\end{document} \ No newline at end of file
diff --git a/papers/lse.pdf b/papers/lse.pdf
new file mode 100644
index 0000000..ad7385a
--- /dev/null
+++ b/papers/lse.pdf
Binary files differ