import numpy as np import scipy.linalg from scipy.optimize import minimize from math import sqrt, pi import time import matplotlib.pyplot as plt import seaborn seaborn.set_style("white") data = np.loadtxt('CASP.csv', delimiter=',', skiprows=1) y = data[:, 0] X = data[:, 1:] def split_train_test(X, y, fraction_train=9.0 / 10.0): end_train = round(X.shape[0] * fraction_train) X_train = X[0:end_train, ] y_train = y[0:end_train] X_test = X[end_train:, ] y_test = y[end_train:] return X_train, y_train, X_test, y_test def normalize_features(X_train, X_test): mean_X_train = np.mean(X_train, 0) std_X_train = np.std(X_train, 0) std_X_train[std_X_train == 0] = 1 X_train_normalized = (X_train - mean_X_train) / std_X_train X_test_normalized = (X_test - mean_X_train) / std_X_train return X_train_normalized, X_test_normalized X_train, y_train, X_test, y_test = split_train_test(X, y) X_train, X_test = normalize_features(X_train, X_test) X_train = np.concatenate((X_train, np.ones((X_train.shape[0], 1))), 1) X_test = np.concatenate((X_test, np.ones((X_test.shape[0], 1))), 1) def map_estimate(X, y, sigma=1.0, tau=10.): lamb = sqrt(tau) * np.identity(X.shape[1]) X_tilde = np.concatenate((X/sqrt(sigma), lamb)) Y_tilde = np.concatenate((y/sqrt(sigma), np.zeros(X.shape[1]))) q, r = np.linalg.qr(X_tilde) return scipy.linalg.solve_triangular(r, np.dot(q.T, Y_tilde), check_finite=False) def train_qr(X, y): return map_estimate(X, y) def log_posterior(w, X, y, sigma=1.0, tau=10): return (1 / (2 * sigma) * np.linalg.norm(y - np.dot(X, w)) ** 2 + 1 / (2 * tau) * np.linalg.norm(w) ** 2) def gradient(w, X, y, sigma=1.0, tau=10): return (- 1 / sigma * np.dot(X.T, y - np.dot(X, w)) + 1 / tau * w) def verify_gradient(eps=1e-10): w = np.ones(X_train.shape[1]) for i in range(X_train.shape[1]): epsilon = np.zeros(X_train.shape[1]) epsilon[i] = eps print (log_posterior(w + epsilon, X_train, y_train) - log_posterior(w - epsilon, X_train, y_train)) / (2 * eps) print gradient(w, X_train, y_train)[i] def rmse(X, y, w): predictions = np.dot(X, w) return np.linalg.norm(predictions - y) / sqrt(len(y)) def train_lbfgs(X, y): x0 = np.zeros(X.shape[1]) def f(w): return log_posterior(w, X, y) def g(w): return gradient(w, X, y) return minimize(f, x0, jac=g, method='L-BFGS-B', options={'maxiter': 100}).x def map_features(X, d): A = np.random.normal(0, 1, size=(d, X.shape[1])) b = np.random.uniform(0, 2 * pi, d) return np.cos(np.dot(A, X.T).T + b) if __name__ == "__main__": # problem 4 wqr = train_qr(X_train, y_train) print wqr print rmse(X_test, y_test, wqr) # problem 5 wlbfgs = train_lbfgs(X_train, y_train) print wlbfgs print rmse(X_test, y_test, wlbfgs) # problem 6 lqr = [] llbfgs = [] for d in [100, 200, 400, 600]: X = map_features(X_train, d) X_t = map_features(X_test, d) t_start = time.time() wqr = train_qr(X, y_train) rqr = time.time() - t_start t_start = time.time() wlbfgs = train_lbfgs(X, y_train) rlbfgs = time.time() - t_start lqr.append((rqr, rmse(X_t, y_test, wqr))) llbfgs.append((rlbfgs, rmse(X_t, y_test, wlbfgs))) plt.figure(figsize=(8, 8)) x, y = zip(*lqr) plt.plot(x, y, "-ro", label="QR") x, y = zip(*llbfgs) plt.plot(x, y, "-bo", label="L-BFGS") plt.xlabel("Time (s)") plt.ylabel("RMSE") plt.legend() plt.savefig("plot.pdf", bbox_inches="tight")