From 028e3beb9d014b8daa0284ee3ed8c534bfa0e947 Mon Sep 17 00:00:00 2001
From: Thibaut Horel <thibaut.horel@gmail.com>
Date: Fri, 2 Mar 2012 09:50:35 -0800
Subject: Results section

---
 experimental.tex          | 158 +++++++++++++++++++++++++++++++++++++++++++++-
 graphics/10fold-naive.pdf | Bin 0 -> 18441 bytes
 graphics/back.pdf         | Bin 0 -> 20427 bytes
 graphics/online-sht.pdf   | Bin 0 -> 37620 bytes
 graphics/var.pdf          | Bin 0 -> 22068 bytes
 references.bib            |   4 +-
 uniqueness.tex            |   2 +-
 7 files changed, 159 insertions(+), 5 deletions(-)
 create mode 100644 graphics/10fold-naive.pdf
 create mode 100644 graphics/back.pdf
 create mode 100644 graphics/online-sht.pdf
 create mode 100644 graphics/var.pdf

diff --git a/experimental.tex b/experimental.tex
index 982fcf7..388e628 100644
--- a/experimental.tex
+++ b/experimental.tex
@@ -55,6 +55,7 @@ are the runs where the average distance from the skeleton joints to the camera
 is increasing.
 
 \subsection{Experiment design}
+\label{sec:experiment-design}
 
 Several reductions are then applied to the data set to extract \emph{features}
 from the raw data.  First, the lengths of 15 body parts are computed from the
@@ -82,7 +83,7 @@ with the same skeleton ID.
 \caption{Data set statistics. The right part of the table shows the
 average numbers for different intervals of $k$, the rank of a person
 in the ordering given by the number of frames}
-
+\label{tab:dataset}
 \begin{tabular}{|l|r||r|r|r|}
 \hline
 Number of people & 25 & $k\leq 5$ & $5\leq k\leq 20$ & $k\geq 20$\\
@@ -93,12 +94,165 @@ Number of runs & 244 & 18 & 8 & 4\\
 \hline
 \end{tabular}
 \end{center}
-\label{tab:dataset}
 \end{table}
 
 \subsection{Results}
 
+\paragraph{Offline setting.}
+
+The mixture of Gaussians model is evaluated on the whole dataset by
+doing 10-fold cross validation: the data set is partitioned into 10
+subsamples of equal size. For a given recall threshold, the algorithm
+is trained on 9 subsamples and trained on the last one. This is
+repeated for the 10 possible testing subsample. Averaging the
+prediction rate over these 10 training-testing experiments yields the
+prediction rate for the chosen threshold.
+
+Figure \ref{fig:mixture} shows the precision-recall plot as the
+threshold varies. Several curves are obtained for different group
+sizes: people are ordered based on their numbers of frames, and all
+the frames belonging to someone beyond a given rank in this ordering
+are removed from the data set. The decrease of performance when
+increasing the number of people in the data set can be explained
+by the overlaps between skeleton profiles due to the noise, as
+discussed in Section~\ref{sec:uniqueness}, but also by the very few
+number of runs available for the least present people, as seen in
+Table~\ref{tab:dataset}, which does not permit a proper training of
+the algorithm.
+
+\begin{figure}
+  \begin{center}
+    \includegraphics[width=0.80\textwidth]{graphics/10fold-naive.pdf}
+  \end{center}
+  \caption{Precision-Recall curve for the mixture of Gaussians model
+  with 10-fold cross validation. The data set is restricted to the top
+  $n$ most present people}
+  \label{fig:mixture}
+\end{figure}
+
+\paragraph{Online setting.}
+
+Even though the previous evaluation is standard, it does not properly
+reflect the reality. A real-life setting could be the following: the
+camera is placed at the entrance of a building. When a person enters
+the building, his identity is detected based on the electronic key
+system and a new labeled run is added to the data set. The
+identification algorithm is then retrained on the augmented data set,
+and the newly obtained classifier can be deployed in the building.
+
+In this setting, the Sequential Hypothesis Testing (SHT) algorithm is more
+suitable than the algorithm used in the previous paragraph, because it
+accounts for the fact that a person identity does not change across a
+run. The analysis is therefore performed by partitioning the dataset
+into 10 subsamples of equal size. For a given threshold, the algorithm
+is trained and tested incrementally: trained on the first $k$
+subsamples (in the chronological order) and tested on the $(k+1)$-th
+subsample. Figure~\ref{fig:sequential} shows the prediction-recall
+curve when averaging the prediction rate of the 10 incremental
+experiments.
+
+\begin{figure}
+  \begin{center}
+    \includegraphics[width=0.80\textwidth]{graphics/online-sht.pdf}
+  \end{center}
+  \caption{Precision-Recall curve for the sequential hypothesis
+  testing algorithm in the online setting. $n$ is the size of the
+  group as in Figure~\ref{fig:mixture}}
+  \label{fig:sequential}
+\end{figure}
+
+\paragraph{Face recognition.}
 
+We then compare the performance of skeleton recognition with the
+performance of face recognition as given by \textsf{face.com}
+\todo{REFERENCE NEEDED}. At the time of writing, this is the best
+performing face recognition algorithm on the LFW data set
+\cite{face-com}.
+
+We use the publicly available REST API of \textsf{face.com} to do face
+recognition on our data set: the training is done on half of the data
+and the testing is done on the remaining half. For comparison, the
+Gaussian mixture algorithm is run with the same training-testing
+partitioning of the data set. In this setting, the Sequential
+Hypothesis Testing algorithm is not relevant for the comparison,
+because \textsf{face.com} does not give the possibility to mark a
+sequence of frames as belonging to the same run. This additional
+information would be used by the SHT algorithm and would thus bias the
+results in favor of skeleton recognition.
+
+\todo{PLOT MISSING}
+
+\paragraph{People walking away from the camera.}
+
+The performance of face recognition and skeleton recognition are
+comparable in the previous setting \todo{is that really
+true?}. However, there are many cases where only skeleton recognition
+is possible. The most obvious one is when people are walking away from
+the camera. Coming back to the raw data collected during the
+experiment design, we manually label the runs of people walking away
+from the camera. In this case, it is harder to get the ground truth
+classification and some of runs are dropped because it is not possible
+to recognize the person. Apart from that, the data set reduction is
+performed exactly as explained in Section~\ref{sec:experiment-design}.
+
+\begin{figure}
+  \begin{center}
+    \includegraphics[width=0.80\textwidth]{graphics/back.pdf}
+  \end{center}
+  \caption{Precision-Recall curve for the sequential hypothesis
+  testing algorithm in the online setting with people walking away
+  from and toward the camera. All the people are included}
+  \label{fig:back}
+\end{figure}
+
+Figure~\ref{fig:back} compares the curve obtained in the online
+setting with people walking toward the camera, with the curve obtained
+by running the same experiment on the data set of runs of people
+walking away from the camera. The two curves are sensibly the
+same. However, one could argue that as the two data sets are
+completely disjoint, the SHT algorithm is not learning the same
+profile for a person walking toward the camera and for a person
+walking away from the camera. Figure~\ref{fig:back2} shows the
+Precision-Recall curve when training on runs toward the camera and
+testing on runs away from the camera.
+
+\todo{PLOT NEEDED}
+
+\paragraph{Reducing the noise.} Predicting potential improvements of
+the prediction rate of our algorithm is straightforward. The algorithm
+relies on 9 features only. Section~\ref{sec:uniqueness} shows that
+6 of these features alone are sufficient to perfectly distinguish two
+different skeletons at a low noise level. Therefore, the only source
+of classification error in our algorithm is the dispersion of the
+observed limbs' lengths away from the exact measurements.
+
+To simulate a possible reduction of the noise level, the data set is
+modified as follows: all the observations for a given person are
+homothetically contracted towards their average so as to divide their
+empirical variance by 2. Formally, if $x$ is an observation in the
+9-dimensional feature space for the person $i$, and if $\bar{x}$ is
+the average of all the observations available for this person in the
+data set, then $x$ is replaced by $x'$ defined by:
+\begin{equation}
+  x' = \bar{x} + \frac{x-\bar{x}}{\sqrt{2}}
+\end{equation}
+We believe that a reducing factor of 2 for the noise's variance is
+realistic given the relative low resolution of the Kinect's infrared
+camera. 
+
+Figure~\ref{fig:var} compares the Precision-Recall curve of
+Figure~\ref{fig:sequential} to the curve of the same experiment run on
+the newly obtained data set.
+
+\begin{figure}
+  \begin{center}
+    \includegraphics[width=0.80\textwidth]{graphics/var.pdf}
+  \end{center}
+  \caption{Precision-Recall curve for the sequential hypothesis
+  testing algorithm in the online setting for all the people with and
+  without halving the variance of the noise}
+  \label{fig:var}
+\end{figure}
 
 %%% Local Variables: 
 %%% mode: latex
diff --git a/graphics/10fold-naive.pdf b/graphics/10fold-naive.pdf
new file mode 100644
index 0000000..50dc6f8
Binary files /dev/null and b/graphics/10fold-naive.pdf differ
diff --git a/graphics/back.pdf b/graphics/back.pdf
new file mode 100644
index 0000000..9d9c9e6
Binary files /dev/null and b/graphics/back.pdf differ
diff --git a/graphics/online-sht.pdf b/graphics/online-sht.pdf
new file mode 100644
index 0000000..be41e6d
Binary files /dev/null and b/graphics/online-sht.pdf differ
diff --git a/graphics/var.pdf b/graphics/var.pdf
new file mode 100644
index 0000000..e5ea58c
Binary files /dev/null and b/graphics/var.pdf differ
diff --git a/references.bib b/references.bib
index 07472fe..e90c173 100644
--- a/references.bib
+++ b/references.bib
@@ -1,4 +1,4 @@
-
+﻿
 
 #@string{PROC = "Proc. "}
 @string{PROC = "Proceedings of the "}
@@ -271,7 +271,7 @@
   month =        {October}
 }
 
-@article{face.com,
+@article{face-com,
   author    = {Yaniv Taigman and
                Lior Wolf},
   title     = {Leveraging Billions of Faces to Overcome Performance Barriers
diff --git a/uniqueness.tex b/uniqueness.tex
index 90ac40a..fc53bab 100644
--- a/uniqueness.tex
+++ b/uniqueness.tex
@@ -73,7 +73,7 @@ defined as:
   \caption{Receiver operating characteristic (true positive rate
   vs. false positive rate) for several standard deviations of the
   noise and for the state-of-the-art \emph{Associate-Predict} face
-  detection algorithm.}
+  detection algorithm}
   \label{fig:roc}
 \end{figure}
 
-- 
cgit v1.2.3-70-g09d2