From 199c8fdee50243b59083bdd15ef3e411f158a17d Mon Sep 17 00:00:00 2001 From: Jon Whiteaker Date: Wed, 29 Feb 2012 19:00:04 -0800 Subject: reducing noise by half --- data/combined/scriptSkeleton.m | 128 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100755 data/combined/scriptSkeleton.m (limited to 'data/combined/scriptSkeleton.m') diff --git a/data/combined/scriptSkeleton.m b/data/combined/scriptSkeleton.m new file mode 100755 index 0000000..9752c48 --- /dev/null +++ b/data/combined/scriptSkeleton.m @@ -0,0 +1,128 @@ + +% trainD = dlmread('train_limbs.csv', ',', 1, 0); +% testD = dlmread('test_limbs.csv', ',', 1, 0); +% trainD = dlmread(argv(){1}, ',', 1, 0); +% testD = dlmread(argv(){2}, ',', 1, 0); +% D = [trainD; testD]; +% D = dlmread(argv(){1}, ',', 1, 0); +D = dlmread('mean.csv', ',', 1, 0); +X = D(:, 6 : end); +y = D(:, 2); +session = D(:, 2); +z = D(:, 5); + +% remove missing values and outliers +active = all(X ~= -1, 2); +active = active & (z > 2) & (z < 3); + +mostFreqClasses = 100; +tab = tabulate(y); +[nil, delc] = sort(tab(:, 2), 'descend'); +delc = delc(mostFreqClasses + 1 : end); +for c = delc' + active(y == c) = false; +end + +X = X(active, :); +y = y(active); +session = session(active); +z = z(active); +numClasses = max(y); +N = size(X, 1); +K = size(X, 2); + +% normalization +X = zscore(X); + +numEx = 10; +splits = [1; find(session(1 : end - 1) ~= session(2 : end)) + 1; N + 1]; +ns = length(splits) - 1; + +prec = []; +recall = []; +for ex = 1 : numEx + test = splits(round(ns * (ex - 1) / numEx) + 1) : ... + splits(round(ns * ex / numEx) + 1) - 1; + train = setdiff(1 : N, test); +% train = 1 : splits(round(ns * ex / numEx) + 1) - 1; +% test = splits(round(ns * ex / numEx) + 1) : ... +% splits(round(ns * (ex + 1) / numEx) + 1) - 1; + + % NB classifier with multivariate Gaussians + Py = zeros(numClasses, 1); + Pxy = zeros(numClasses, K); + Sigma = zeros(K, K); + for c = 1 : numClasses + sub = train(y(train) == c); + if (~isempty(sub)) + Py(c) = length(sub) / length(train); + Pxy(c, :) = mean(X(sub, :), 1); + Sigma = Sigma + Py(c) * cov(X(sub, :)); + end + end + % Sigma = diag(diag(Sigma)); + + solver = 'SHT'; + switch (solver) + case 'NB' + % NB inference + logp = repmat(log(Py)', N, 1); + for c = 1 : numClasses + if (Py(c) > 0) + logp(:, c) = log(Py(c)) + log(mvnpdf(X, Pxy(c, :), Sigma)); + end + end + + case 'SHT' + % sequential hypothesis testing + logp = zeros(N, numClasses); + for c = 1 : numClasses + if (Py(c) > 0) + logp(:, c) = log(mvnpdf(X, Pxy(c, :), Sigma)); + end + end + + nhyp = zeros(N, 1); + for i = 1 : N + if ((i == 1) || (session(i - 1) ~= session(i))) + logp(i, :) = logp(i, :) + log(Py'); + nhyp(i) = 2; + else + logp(i, :) = logp(i, :) + logp(i - 1, :); + nhyp(i) = nhyp(i - 1) + 1; + end + end + logp = logp ./ repmat(nhyp, 1, numClasses); + end + + % prediction + [conf, yp] = max(logp, [], 2); + conf = exp(conf) ./ sum(exp(logp), 2); + + % evaluation + for i = 1 : 50 + th = 1 - 1 / (1.33 ^ i); + sub = test(conf(test) > th); + prec(ex, i) = mean(y(sub) == yp(sub)); + recall(ex, i) = length(sub) / length(test); + end +end +prec(isnan(prec)) = 1; + +hold on; +plot(100 * mean(recall), 100 * mean(prec), '-ro', ... + 'LineWidth', 1, 'MarkerSize', 4, 'MarkerFaceColor', 'w') +xlabel('Recall [%]'); +ylabel('Precision [%]'); +hold off; +pause + +% A = X - Pxy(y, :); +% for k = 1 : 9 +% subplot(3, 3, k); +% hist(A(:, k), -5 : 0.1 : 5); +% h = findobj(gca, 'Type', 'patch'); +% set(h, 'FaceColor', [0.5, 1, 0.5], 'LineStyle', 'none') +% axis([-5, 5, 0, Inf]); +% title(sprintf('X_%i', k)); +% end -- cgit v1.2.3-70-g09d2