1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
% trainD = dlmread('train_limbs.csv', ',', 1, 0);
% testD = dlmread('test_limbs.csv', ',', 1, 0);
% trainD = dlmread(argv(){1}, ',', 1, 0);
% testD = dlmread(argv(){2}, ',', 1, 0);
% D = [trainD; testD];
% D = dlmread(argv(){1}, ',', 1, 0);
D = dlmread('mean.csv', ',', 1, 0);
X = D(:, 6 : end);
y = D(:, 2);
session = D(:, 2);
z = D(:, 5);
% remove missing values and outliers
active = all(X ~= -1, 2);
active = active & (z > 2) & (z < 3);
mostFreqClasses = 100;
tab = tabulate(y);
[nil, delc] = sort(tab(:, 2), 'descend');
delc = delc(mostFreqClasses + 1 : end);
for c = delc'
active(y == c) = false;
end
X = X(active, :);
y = y(active);
session = session(active);
z = z(active);
numClasses = max(y);
N = size(X, 1);
K = size(X, 2);
% normalization
X = zscore(X);
numEx = 10;
splits = [1; find(session(1 : end - 1) ~= session(2 : end)) + 1; N + 1];
ns = length(splits) - 1;
prec = [];
recall = [];
for ex = 1 : numEx
test = splits(round(ns * (ex - 1) / numEx) + 1) : ...
splits(round(ns * ex / numEx) + 1) - 1;
train = setdiff(1 : N, test);
% train = 1 : splits(round(ns * ex / numEx) + 1) - 1;
% test = splits(round(ns * ex / numEx) + 1) : ...
% splits(round(ns * (ex + 1) / numEx) + 1) - 1;
% NB classifier with multivariate Gaussians
Py = zeros(numClasses, 1);
Pxy = zeros(numClasses, K);
Sigma = zeros(K, K);
for c = 1 : numClasses
sub = train(y(train) == c);
if (~isempty(sub))
Py(c) = length(sub) / length(train);
Pxy(c, :) = mean(X(sub, :), 1);
Sigma = Sigma + Py(c) * cov(X(sub, :));
end
end
% Sigma = diag(diag(Sigma));
solver = 'SHT';
switch (solver)
case 'NB'
% NB inference
logp = repmat(log(Py)', N, 1);
for c = 1 : numClasses
if (Py(c) > 0)
logp(:, c) = log(Py(c)) + log(mvnpdf(X, Pxy(c, :), Sigma));
end
end
case 'SHT'
% sequential hypothesis testing
logp = zeros(N, numClasses);
for c = 1 : numClasses
if (Py(c) > 0)
logp(:, c) = log(mvnpdf(X, Pxy(c, :), Sigma));
end
end
nhyp = zeros(N, 1);
for i = 1 : N
if ((i == 1) || (session(i - 1) ~= session(i)))
logp(i, :) = logp(i, :) + log(Py');
nhyp(i) = 2;
else
logp(i, :) = logp(i, :) + logp(i - 1, :);
nhyp(i) = nhyp(i - 1) + 1;
end
end
logp = logp ./ repmat(nhyp, 1, numClasses);
end
% prediction
[conf, yp] = max(logp, [], 2);
conf = exp(conf) ./ sum(exp(logp), 2);
% evaluation
for i = 1 : 50
th = 1 - 1 / (1.33 ^ i);
sub = test(conf(test) > th);
prec(ex, i) = mean(y(sub) == yp(sub));
recall(ex, i) = length(sub) / length(test);
end
end
prec(isnan(prec)) = 1;
hold on;
plot(100 * mean(recall), 100 * mean(prec), '-ro', ...
'LineWidth', 1, 'MarkerSize', 4, 'MarkerFaceColor', 'w')
xlabel('Recall [%]');
ylabel('Precision [%]');
hold off;
pause
% A = X - Pxy(y, :);
% for k = 1 : 9
% subplot(3, 3, k);
% hist(A(:, k), -5 : 0.1 : 5);
% h = findobj(gca, 'Type', 'patch');
% set(h, 'FaceColor', [0.5, 1, 0.5], 'LineStyle', 'none')
% axis([-5, 5, 0, Inf]);
% title(sprintf('X_%i', k));
% end
|