summaryrefslogtreecommitdiffstats
path: root/data/combined/scriptSkeleton.m
blob: 9752c48b097b405d7adfb3610989f390761e1350 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
% trainD = dlmread('train_limbs.csv', ',', 1, 0);
% testD = dlmread('test_limbs.csv', ',', 1, 0);
% trainD = dlmread(argv(){1}, ',', 1, 0);
% testD = dlmread(argv(){2}, ',', 1, 0);
% D = [trainD; testD];
% D = dlmread(argv(){1}, ',', 1, 0);
D = dlmread('mean.csv', ',', 1, 0);
X = D(:, 6 : end);
y = D(:, 2);
session = D(:, 2);
z = D(:, 5);

% remove missing values and outliers
active = all(X ~= -1, 2);
active = active & (z > 2) & (z < 3);

mostFreqClasses = 100;
tab = tabulate(y);
[nil, delc] = sort(tab(:, 2), 'descend');
delc = delc(mostFreqClasses + 1 : end);
for c = delc'
  active(y == c) = false;
end

X = X(active, :);
y = y(active);
session = session(active);
z = z(active);
numClasses = max(y);
N = size(X, 1);
K = size(X, 2);

% normalization
X = zscore(X);

numEx = 10;
splits = [1; find(session(1 : end - 1) ~= session(2 : end)) + 1; N + 1];
ns = length(splits) - 1;

prec = [];
recall = [];
for ex = 1 : numEx
  test = splits(round(ns * (ex - 1) / numEx) + 1) : ...
    splits(round(ns * ex / numEx) + 1) - 1;
  train = setdiff(1 : N, test);
%   train = 1 : splits(round(ns * ex / numEx) + 1) - 1;
%   test = splits(round(ns * ex / numEx) + 1) : ...
%     splits(round(ns * (ex + 1) / numEx) + 1) - 1;
  
  % NB classifier with multivariate Gaussians
  Py = zeros(numClasses, 1);
  Pxy = zeros(numClasses, K);
  Sigma = zeros(K, K);
  for c = 1 : numClasses
    sub = train(y(train) == c);
    if (~isempty(sub))
      Py(c) = length(sub) / length(train);
      Pxy(c, :) = mean(X(sub, :), 1);
      Sigma = Sigma + Py(c) * cov(X(sub, :));
    end
  end
  % Sigma = diag(diag(Sigma));

  solver = 'SHT';
  switch (solver)
    case 'NB'
      % NB inference
      logp = repmat(log(Py)', N, 1);
      for c = 1 : numClasses
        if (Py(c) > 0)
          logp(:, c) = log(Py(c)) + log(mvnpdf(X, Pxy(c, :), Sigma));
        end
      end

    case 'SHT'
      % sequential hypothesis testing
      logp = zeros(N, numClasses);
      for c = 1 : numClasses
        if (Py(c) > 0)
          logp(:, c) = log(mvnpdf(X, Pxy(c, :), Sigma));
        end
      end

      nhyp = zeros(N, 1);
      for i = 1 : N
        if ((i == 1) || (session(i - 1) ~= session(i)))
          logp(i, :) = logp(i, :) + log(Py');
          nhyp(i) = 2;
        else
          logp(i, :) = logp(i, :) + logp(i - 1, :);
          nhyp(i) = nhyp(i - 1) + 1;
        end
      end
      logp = logp ./ repmat(nhyp, 1, numClasses);
  end
  
  % prediction
  [conf, yp] = max(logp, [], 2);
  conf = exp(conf) ./ sum(exp(logp), 2);
  
  % evaluation
  for i = 1 : 50
    th = 1 - 1 / (1.33 ^ i);
    sub = test(conf(test) > th);
    prec(ex, i) = mean(y(sub) == yp(sub));
    recall(ex, i) = length(sub) / length(test);
  end
end
prec(isnan(prec)) = 1;

hold on;
plot(100 * mean(recall), 100 * mean(prec), '-ro', ...
  'LineWidth', 1, 'MarkerSize', 4, 'MarkerFaceColor', 'w')
xlabel('Recall [%]');
ylabel('Precision [%]');
hold off;
pause

% A = X - Pxy(y, :);
% for k = 1 : 9
%   subplot(3, 3, k);
%   hist(A(:, k), -5 : 0.1 : 5);
%   h = findobj(gca, 'Type', 'patch');
%   set(h, 'FaceColor', [0.5, 1, 0.5], 'LineStyle', 'none')
%   axis([-5, 5, 0, Inf]);
%   title(sprintf('X_%i', k));
% end