From 424a6e62941f77c0633beb46c1314679de69f366 Mon Sep 17 00:00:00 2001
From: Thibaut Horel <thibaut.horel@gmail.com>
Date: Mon, 16 Jan 2012 18:32:54 -0800
Subject: More details added to the notes

---
 notes.tex | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'notes.tex')

diff --git a/notes.tex b/notes.tex
index f75d2b7..b4460f3 100644
--- a/notes.tex
+++ b/notes.tex
@@ -25,19 +25,21 @@ vector of explanatory variables $x$.
 
 The cost of the regression error will be measured by the MSE:
 \begin{displaymath}
-  \mathrm{MSE}(f_n) = \expt{\big(f_n(x)-y\big)^2}
+  \mse(f_n) = \expt{\big(f_n(x)-y\big)^2}
 \end{displaymath}
 
 The general goal is to understand how the size of the database impacts
 the MSE of the derived regression function.
 
 \subsection{From the bivariate normal case to linear regression}
-If $(X,Y)$ is drawn from a bivariate normal distribution. Then, one can
+If $(X,Y)$ is drawn from a bivariate normal distribution with mean
+vector $\mu$ and covariance matrix $\Sigma$. Then, one can
 write:
 \begin{displaymath}
   Y = \condexp{Y}{X} + \big(Y-\condexp{Y}{X}\big)
 \end{displaymath}
-
+In this particular case, $\condexp{Y}{X}$ is a linear function of $X$: 
+writing $\varepsilon = Y-\condexp{Y}{X}$, it is easy to see that $\expt{X\varepsilon}=0$.
 \subsection{Linear regression}
 
 We assume a linear model:
-- 
cgit v1.2.3-70-g09d2