From fb90afefbf3e6d10164a4c1a496e71a688e600f0 Mon Sep 17 00:00:00 2001 From: Thibaut Horel Date: Wed, 23 Sep 2015 10:28:42 -0400 Subject: [hw1] Reporting results --- hw1/main.tex | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'hw1/main.tex') diff --git a/hw1/main.tex b/hw1/main.tex index 3aea165..ee9b915 100644 --- a/hw1/main.tex +++ b/hw1/main.tex @@ -20,6 +20,7 @@ \usepackage{amsfonts} \usepackage{listings} \usepackage{bm} +\usepackage{graphicx} % Some useful macros. \newcommand{\prob}{\mathbb{P}} @@ -398,6 +399,13 @@ training data using the QR decomposition (see Section 7.5.2 in Murphy's book). \end{itemize} \end{problem} +\paragraph{Solution} See file \textsf{main.py} for the code. I obtained: +\begin{align*} + \hat{w} = [&5.55782079, 2.25190765, 1.07880135, -5.91177796, -1.73480336,\\ + &-1.63875478 -0.26610556, 0.81781409, -0.65913397, 7.74153395] +\end{align*} +and a RMSE of 5.20880460. + \begin{problem}[14pts]\label{prob:numerical_linear_model} Write code that evaluates the log-posterior probability (up to an additive constant) of $\mathbf{w}$ according to the previous Bayesian linear model and the normalized training data. @@ -427,6 +435,15 @@ log-posterior probability to transform the maximization problem into a minimizat \end{itemize} \end{problem} +\paragraph{Solution} See file \textsf{main.py} for the code. I obtained: +\begin{align*} + \hat{w} = [&5.55780742, 2.2516761, 1.078904, -5.9118024, -1.73438477,\\ + &-1.63887755, -0.26611567, 0.81785313, -0.65901996, 7.7415115] +\end{align*} +and a RMSE of 5.20880242. + +\vspace{2em} + Linear regression can be extended to model non-linear relationships by replacing the original features $\mathbf{x}$ with some non-linear functions of the original features $\bm \phi(\mathbf{x})$. We can automatically generate one @@ -482,4 +499,21 @@ by the method L-BFGS for each value of $d$. Answer the following questions: \end{itemize} \end{problem} +\paragraph{Solution} See file \textsf{main.py} for the code. I obtained the following plot: +\begin{center} +\includegraphics[width=0.5\textwidth]{plot.pdf} +\end{center} + +We see that the non-linear transformations help reduce the prediction error: +the data is probably not very well modeled by a linear relation ship. Allowing +more complex relationships (as $d$ becomes larger the relationship becomes more +complex) fits it better. + +QR is faster than L-BFGS when the number of features is not too large ($d\leq +400$). For $d=600$, L-BFGS is faster. The asymptotic complexity of QR is +$O(nd^2 + d^3)$ whereas the complexity of L-BFGS is $O(nd)$ (for a fixed number +of iterations). So asymptotically, L-BFGS is be faster as we observe for +$d=600$ (for smaller values of $d$ the constants hidden in the $O$ do not allow +a formal comparison). + \end{document} -- cgit v1.2.3-70-g09d2