From 4777da8bcee956f100dcce83c8d0be565fc693cc Mon Sep 17 00:00:00 2001
From: Thibaut Horel <thibaut.horel@gmail.com>
Date: Mon, 5 Mar 2012 02:17:18 -0800
Subject: Some corrections. Adress some of Brano's comments.

Plots are now square
Proper separation of captions
---
 abstract.tex                  |   6 +--
 algorithm.tex                 |   5 +-
 conclusion.tex                |   4 +-
 data/combined/graphs/plots.py |   9 +++-
 data/pair-matching/roc.py     |   4 +-
 experimental.tex              | 108 +++++++++++++++++++++++-------------------
 graphics/back.pdf             | Bin 16456 -> 15441 bytes
 graphics/face.pdf             | Bin 15009 -> 13860 bytes
 graphics/frames.pdf           | Bin 10989 -> 10913 bytes
 graphics/limbs.pdf            | Bin 48019 -> 50499 bytes
 graphics/offline-nb.pdf       | Bin 17230 -> 17049 bytes
 graphics/offline-sht.pdf      | Bin 20555 -> 18640 bytes
 graphics/online-nb.pdf        | Bin 18002 -> 17585 bytes
 graphics/online-sht.pdf       | Bin 24737 -> 20768 bytes
 graphics/roc.pdf              | Bin 19394 -> 18167 bytes
 graphics/var.pdf              | Bin 16137 -> 14690 bytes
 uniqueness.tex                |   2 +-
 17 files changed, 80 insertions(+), 58 deletions(-)

diff --git a/abstract.tex b/abstract.tex
index 7aadce0..9386d11 100644
--- a/abstract.tex
+++ b/abstract.tex
@@ -1,11 +1,11 @@
 \begin{abstract}
   This paper explores a novel approach for person recognition based on
   skeletal measurements. After showing that exact measurements allow
-  for exact recognition, we study two algorithmic approaches for
+  for accurate recognition, we study two algorithmic approaches for
   identification in case of approximate measurements. A real-life
   experiment with 25 people and measurements obtained from the Kinect
   range camera gives us promising results and comparison with state of
-  the art facial recognition and validates the viability of
-  skeleton-base identification.
+  the art facial recognition validates the viability of
+  skeleton-based identification.
 \end{abstract}
 
diff --git a/algorithm.tex b/algorithm.tex
index 38e0c39..b3e7648 100644
--- a/algorithm.tex
+++ b/algorithm.tex
@@ -28,7 +28,10 @@ In this work, we use the mixture of Gaussians model for skeleton recognition. Sk
 \begin{figure}[t]
   \centering
   \includegraphics{graphics/limbs.pdf}
-  \caption{Histograms of differences between 9 skeleton measurements $x_k$ (Section~\ref{sec:experiment}) and their expectation given the class $y$.}
+  \caption{Histograms of differences between 9 skeleton measurements
+  $x_k$ (Section~\ref{sec:experiment}) and their expectation given the
+  class $y$. In red, the p.d.f. of a normal distribution with mean and
+  variance equal to the empirical mean and variance of the measurement}
   \label{fig:error marginals}
 \end{figure}
 
diff --git a/conclusion.tex b/conclusion.tex
index 7a3e9ed..c54bc9a 100644
--- a/conclusion.tex
+++ b/conclusion.tex
@@ -6,8 +6,8 @@ measurements are unique enough to distinguish individuals using a dataset of
 real skeletons.  We present a probabilistic model for recognition, and extend
 it to take advantage of consecutive frames. Finally we test our model by
 collecting data for a week in a real-world setting.  Our results show that
-skeleton recognition performs close to face recognition, and it can be used in
-many more scenarios.
+skeleton recognition performs close to face recognition, and it can be
+used in other scenarios.
 
 However, the Kinect SDK does have some limitations.  First of all, the Kinect
 SDK can only fit two skeletons at a time.  Therefore, when a group of people
diff --git a/data/combined/graphs/plots.py b/data/combined/graphs/plots.py
index 31e9638..fb2628c 100755
--- a/data/combined/graphs/plots.py
+++ b/data/combined/graphs/plots.py
@@ -16,7 +16,7 @@ mpl.rcParams['axes.linewidth'] = 0.5
 mpl.rcParams['figure.subplot.hspace'] = 0.4
 mpl.rcParams['figure.subplot.wspace'] = 0.4
 legend_width = 0.2
-#mpl.rcParams.update(params)
+
 out_dir = sys.argv[1]
 
 #limbs distribution
@@ -64,6 +64,7 @@ plt.ylabel("Precision [%]")
 leg =plt.legend(loc="best")
 leg.get_frame().set_linewidth(legend_width)
 plt.axis([0,100,50,100])
+plt.gca().set_aspect(2)
 plt.savefig(os.path.join(out_dir,"offline-nb.pdf"),bbox_inches="tight",pad_inches=0.05)
 
 #10-fold, SHT
@@ -76,6 +77,7 @@ plt.ylabel("Precision [%]")
 leg = plt.legend(loc="lower left")
 leg.get_frame().set_linewidth(legend_width)
 plt.axis([0,100,50,100])
+plt.gca().set_aspect(2)
 plt.savefig(os.path.join(out_dir,"offline-sht.pdf"),bbox_inches="tight",pad_inches=0.05)
 
 #online,NB
@@ -88,6 +90,7 @@ plt.ylabel("Precision [%]")
 leg = plt.legend(loc="best")
 leg.get_frame().set_linewidth(legend_width)
 plt.axis([0,100,50,100])
+plt.gca().set_aspect(2)
 plt.savefig(os.path.join(out_dir,"online-nb.pdf"),bbox_inches="tight",pad_inches=0.05)
 
 #online,SHT
@@ -100,6 +103,7 @@ plt.ylabel("Precision [%]")
 leg = plt.legend(loc="best")
 leg.get_frame().set_linewidth(legend_width)
 plt.axis([0,100,50,100])
+plt.gca().set_aspect(2)
 plt.savefig(os.path.join(out_dir,"online-sht.pdf"),bbox_inches="tight",pad_inches=0.05)
 
 #face
@@ -113,6 +117,7 @@ plt.ylabel("Precision [%]")
 leg = plt.legend(loc="best")
 leg.get_frame().set_linewidth(legend_width)
 plt.axis([0,100,50,100])
+plt.gca().set_aspect(2)
 plt.savefig(os.path.join(out_dir,"face.pdf"),bbox_inches="tight",pad_inches=0.05)
 
 #back
@@ -128,6 +133,7 @@ plt.ylabel("Precision [%]")
 leg = plt.legend(loc="best")
 leg.get_frame().set_linewidth(legend_width)
 plt.axis([0,100,50,100])
+plt.gca().set_aspect(2)
 plt.savefig(os.path.join(out_dir,"back.pdf"),bbox_inches="tight",pad_inches=0.05)
 
 #variance-reduction
@@ -141,4 +147,5 @@ plt.ylabel("Precision [%]")
 leg = plt.legend(loc="best")
 leg.get_frame().set_linewidth(legend_width)
 plt.axis([0,100,50,100])
+plt.gca().set_aspect(2)
 plt.savefig(os.path.join(out_dir,"var.pdf"),bbox_inches="tight",pad_inches=0.05)
diff --git a/data/pair-matching/roc.py b/data/pair-matching/roc.py
index 7396ab4..c0d7d83 100755
--- a/data/pair-matching/roc.py
+++ b/data/pair-matching/roc.py
@@ -51,7 +51,7 @@ if __name__ == "__main__":
     indices = [i for i in range(ap.shape[0]) if ap[i,1]<0.1]
     ap_false = ap[:,1][indices]
     ap_true = ap[:,0][indices]
-    plt.plot(ap_false,ap_true,label="Face recognition")
+    plt.plot(100*ap_false,100*ap_true,label="Face recognition")
     plt.xlabel("False positive rate [%]")
     plt.ylabel("True positive rate [%]")
     np.random.seed()
@@ -77,7 +77,7 @@ if __name__ == "__main__":
         false_pos = false_pos[indices]
         true_pos = np.array(true_pos)
         true_pos = true_pos[indices]
-        plt.plot(false_pos,true_pos,label="$\sigma$ = "+str(s))
+        plt.plot(100*false_pos,100*true_pos,label="$\sigma$ = "+str(s))
     leg = plt.legend(loc="lower right")
     leg.get_frame().set_linewidth(legend_width)
     plt.savefig("roc.pdf",bbox_inches="tight",pad_inches=0.05)
diff --git a/experimental.tex b/experimental.tex
index a0c98ae..d46270a 100644
--- a/experimental.tex
+++ b/experimental.tex
@@ -100,13 +100,14 @@ the same ID, it means that the skeleton-fitting algorithm was able to detect
 the skeleton in a contiguous way. This allows us to define the concept of a
 \emph{run}: a sequence of frames with the same skeleton ID.
 
-We perform five experiments.  First, we test the performance of skeleton
-recognition using traditional 10-fold cross validation, to represent an offline
-setting.  Second, we run our algorithms in an online setting by training and
-testing the data over time.  Third, we pit skeleton recognition against the
-state-of-the-art in face recognition.  Next, we test how our solution performs
-when people are walking away from the camera.  Finally, we study what happens
-if the noise from the Kinect is reduced.
+We perform five experiments.  First, we test the performance of
+skeleton recognition using traditional 10-fold cross validation, to
+represent an offline learning setting.  Second, we run our algorithms
+in an online learning setting by training and testing the data over
+time.  Third, we pit skeleton recognition against the state-of-the-art
+in face recognition.  Next, we test how our solution performs when
+people are walking away from the camera.  Finally, we study what
+happens if the noise from the Kinect is reduced.
 
 %\begin{table}
 %\begin{center}
@@ -131,7 +132,7 @@ if the noise from the Kinect is reduced.
     \includegraphics[]{graphics/frames.pdf}
   \end{center}
   \vspace{-1.5\baselineskip}
-  \caption{Distribution of the frame ratio of each individual in the
+  \caption{Distribution of the frequency of each individual in the
   data set}
   \label{fig:frames}
 \end{figure}
@@ -139,22 +140,23 @@ if the noise from the Kinect is reduced.
 \subsection{Offline learning setting}
 \label{sec:experiment:offline}
 
-In the first experiment, we study the accuracy of skeleton recognition using
-10-fold cross validation.  The data set is partitioned into 10 continuous time
-sequences of equal size. For a given recall threshold, the algorithm is trained
-on 9 continuous time sequences and trained on the last one. This is repeated
-for the 10 possible testing subsamples. Averaging the prediction rate over
-these 10 training-testing experiments yields the prediction rate for the chosen
-threshold. We test the mixture of Gaussians (MoG) and sequential hypothesis
-testing (SHT) models, and find that SHT generally performs better than MoG, and
-that accuracy increases as group size decreases.
+In the first experiment, we study the accuracy of skeleton recognition
+using 10-fold cross validation.  The data set is partitioned into 10
+continuous time sequences of equal size. For a given recall threshold,
+the algorithm is trained on 9 sequences and tested on the last
+one. This is repeated for all 10 possible testing sequences. Averaging
+the prediction rate over these 10 training-testing experiments yields
+the prediction rate for the chosen threshold. We test the mixture of
+Gaussians (MoG) and sequential hypothesis testing (SHT) models, and
+find that SHT generally performs better than MoG, and that accuracy
+increases as group size decreases.
 
 
 \fref{fig:offline} shows the precision-recall plot as the threshold varies.
-Both algrithms perform better than three times the majority class baseline of
+Both algrithms perform three times better than the majority class baseline of
 15\% with a recall of 100\% on all people.  Several curves are obtained for
 different group sizes: people are ordered based on their frequency of
-appearance (\fref{fig:frames}, and all the frames belonging to people beyond a
+appearance (\fref{fig:frames}), and all the frames belonging to people beyond a
 given rank in this ordering are removed.  The decrease of performance when
 increasing the number of people in the data set can be explained by the
 overlaps between skeleton profiles due to the noise, as discussed in
@@ -168,7 +170,7 @@ permit a proper training of the algorithm.
     \includegraphics[]{graphics/offline-nb.pdf}
     \label{fig:offline:nb}
 }
-\subfloat[Sequential Hypothesis Learning]{
+\subfloat[Sequential Hypothesis Testing]{
     \includegraphics[]{graphics/offline-sht.pdf}
     \label{fig:offline:sht}
 }
@@ -200,13 +202,13 @@ augmented data set, and the newly obtained classifier can be deployed in the
 building. 
 
 In this setting, the sequential hypothesis testing (SHT) algorithm is more
-suitable than the algorithm used in the previous paragraph, because it
+suitable than the algorithm used in Section~\ref{sec:experiment:offline}, because it
 accounts for the fact that a person identity does not change across a
 run. The analysis is therefore performed by partitioning the dataset
-into 10 subsamples of equal size. For a given threshold, the algorithm
+into 10 time sequences of equal size. For a given threshold, the algorithm
 is trained and tested incrementally: trained on the first $k$
-subsamples (in the chronological order) and tested on the $(k+1)$-th
-subsample. \fref{fig:online} shows the prediction-recall
+sequences (in the chronological order) and tested on the $(k+1)$-th
+sequence. \fref{fig:online} shows the prediction-recall
 curve when averaging the prediction rate of the 10 incremental
 experiments.
 
@@ -220,17 +222,22 @@ experiments.
 \begin{center}
     \includegraphics[width=0.49\textwidth]{graphics/online-sht.pdf}
 \end{center}
-    \label{fig:online:sht}
-  \vspace{-1.5\baselineskip}
-    \caption{Results for the online setting, where $n_p$ is the size of
-    the group as in Figure~\ref{fig:offline}}
-    \label{fig:online}
 }
 \parbox[t]{0.49\linewidth}{
   \begin{center}
     \includegraphics[width=0.49\textwidth]{graphics/face.pdf}
   \end{center}
-  \vspace{-1.5\baselineskip}
+}
+\end{figure}
+\begin{figure}
+\vspace{-1.5\baselineskip}
+\parbox[t]{0.48\linewidth}{
+    \caption{Results for the online setting, where $n_p$ is the size of
+    the group as in Figure~\ref{fig:offline}}
+    \label{fig:online}
+}
+\hspace{0.02\linewidth}
+\parbox[t]{0.48\linewidth}{
   \caption{Results for face recognition versus skeleton recognition}
   \label{fig:face}
 }
@@ -259,21 +266,27 @@ we discuss in the next experiment.
 
 \begin{figure}[t]
 \parbox[t]{0.49\linewidth}{
-  \begin{center}
-    \includegraphics[width=0.49\textwidth]{graphics/back.pdf}
-  \end{center}
-  \vspace{-1.5\baselineskip}
-  \caption{Results with people walking away from and toward the camera}
-  \label{fig:back}
+\begin{center}
+  \includegraphics[width=0.49\textwidth]{graphics/back.pdf}
+\end{center}
 }
 \parbox[t]{0.49\linewidth}{
-  \begin{center}
-    \includegraphics[width=0.49\textwidth]{graphics/var.pdf}
-  \end{center}
-  \vspace{-1.5\baselineskip}
-  \caption{Results with and without halving the variance of the noise}
-  \label{fig:var}
-  }
+\begin{center}
+  \includegraphics[width=0.49\textwidth]{graphics/var.pdf}
+\end{center}
+}
+\end{figure}
+\begin{figure}
+\vspace{-1.5\baselineskip}
+\parbox[t]{0.48\linewidth}{
+\caption{Results with people walking away from and toward the camera}
+\label{fig:back}
+}
+\hspace{0.02\linewidth}
+\parbox[t]{0.48\linewidth}{
+\caption{Results with and without halving the variance of the noise}
+\label{fig:var}
+}
 \end{figure}
 
 \subsection{Walking away}
@@ -323,12 +336,11 @@ the Kinect.
 %the observed limbs' lengths away from the exact measurements.
 To simulate a reduction of the noise level, the data set is modified as
 follows: we compute the average profile of each person, and for each frame we
-divide the empirical variance from the average by 2. Formally, if $\bx$ is an
-observation in the 9-dimensional feature space for the person $i$, and if
-$\bar{\bx}$ is the average of all the observations available for this person in
-the data set, then $\bx$ is replaced by $\bx'$ defined by:
+divide the empirical variance from the average by 2. Formally, using
+the same notations as in Section~\ref{sec:mixture of Gaussians}, each
+observation $\bx_i$ is replaced by $\bx_i'$ defined by:
 \begin{equation}
-  \bx' = \bar{\bx} + \frac{\bx-\bar{\bx}}{\sqrt{2}}
+  \bx_i' = \bar{\bx}_{y_i} + \frac{\bx_i-\bar{\bx}_{y_i}}{2}
 \end{equation}
 We believe that a reducing factor of 2 for the noise's variance is
 realistic given the relative low resolution of the Kinect's infrared
diff --git a/graphics/back.pdf b/graphics/back.pdf
index 7768660..848522d 100644
Binary files a/graphics/back.pdf and b/graphics/back.pdf differ
diff --git a/graphics/face.pdf b/graphics/face.pdf
index 58516c2..8dde124 100644
Binary files a/graphics/face.pdf and b/graphics/face.pdf differ
diff --git a/graphics/frames.pdf b/graphics/frames.pdf
index b096475..ffbd79f 100644
Binary files a/graphics/frames.pdf and b/graphics/frames.pdf differ
diff --git a/graphics/limbs.pdf b/graphics/limbs.pdf
index c6cd9e6..7f98107 100644
Binary files a/graphics/limbs.pdf and b/graphics/limbs.pdf differ
diff --git a/graphics/offline-nb.pdf b/graphics/offline-nb.pdf
index 89422cb..08431d3 100644
Binary files a/graphics/offline-nb.pdf and b/graphics/offline-nb.pdf differ
diff --git a/graphics/offline-sht.pdf b/graphics/offline-sht.pdf
index b51ccc5..dcdce69 100644
Binary files a/graphics/offline-sht.pdf and b/graphics/offline-sht.pdf differ
diff --git a/graphics/online-nb.pdf b/graphics/online-nb.pdf
index 95c1d7f..7a0dd4a 100644
Binary files a/graphics/online-nb.pdf and b/graphics/online-nb.pdf differ
diff --git a/graphics/online-sht.pdf b/graphics/online-sht.pdf
index 3c42c10..72b0a71 100644
Binary files a/graphics/online-sht.pdf and b/graphics/online-sht.pdf differ
diff --git a/graphics/roc.pdf b/graphics/roc.pdf
index 8e25ab7..ae0c809 100644
Binary files a/graphics/roc.pdf and b/graphics/roc.pdf differ
diff --git a/graphics/var.pdf b/graphics/var.pdf
index 6fdb0ae..a79992f 100644
Binary files a/graphics/var.pdf and b/graphics/var.pdf differ
diff --git a/uniqueness.tex b/uniqueness.tex
index 35f9a37..d3b4822 100644
--- a/uniqueness.tex
+++ b/uniqueness.tex
@@ -24,7 +24,7 @@ on these sets of pairs. The average of the false-positive rates and
 the true-positive rates across the 10 experiments for a given
 threshold gives one operating point on the receiver operating
 characteristic (ROC) curve (Figure~\ref{fig:roc}). Note that in this
-benchmark the identity information of the individuals appearing in the
+benchmark, the identity information of the individuals appearing in the
 pairs is not available, which means that the algorithms cannot form
 additional image pairs from the input data. This is referred to as the
 \emph{image-restricted} setting in the LFW benchmark.
-- 
cgit v1.2.3-70-g09d2