summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2012-03-05 02:17:18 -0800
committerThibaut Horel <thibaut.horel@gmail.com>2012-03-05 02:17:18 -0800
commit4777da8bcee956f100dcce83c8d0be565fc693cc (patch)
tree2fc99a72b74bd5d91fb54986f5b1c5ea8f160f4a
parentc7b48885def36f3965be91c76ec1f54a5d9bf27a (diff)
downloadkinect-4777da8bcee956f100dcce83c8d0be565fc693cc.tar.gz
Some corrections. Adress some of Brano's comments.
Plots are now square Proper separation of captions
-rw-r--r--abstract.tex6
-rw-r--r--algorithm.tex5
-rw-r--r--conclusion.tex4
-rwxr-xr-xdata/combined/graphs/plots.py9
-rwxr-xr-xdata/pair-matching/roc.py4
-rw-r--r--experimental.tex108
-rw-r--r--graphics/back.pdfbin16456 -> 15441 bytes
-rw-r--r--graphics/face.pdfbin15009 -> 13860 bytes
-rw-r--r--graphics/frames.pdfbin10989 -> 10913 bytes
-rw-r--r--graphics/limbs.pdfbin48019 -> 50499 bytes
-rw-r--r--graphics/offline-nb.pdfbin17230 -> 17049 bytes
-rw-r--r--graphics/offline-sht.pdfbin20555 -> 18640 bytes
-rw-r--r--graphics/online-nb.pdfbin18002 -> 17585 bytes
-rw-r--r--graphics/online-sht.pdfbin24737 -> 20768 bytes
-rw-r--r--graphics/roc.pdfbin19394 -> 18167 bytes
-rw-r--r--graphics/var.pdfbin16137 -> 14690 bytes
-rw-r--r--uniqueness.tex2
17 files changed, 80 insertions, 58 deletions
diff --git a/abstract.tex b/abstract.tex
index 7aadce0..9386d11 100644
--- a/abstract.tex
+++ b/abstract.tex
@@ -1,11 +1,11 @@
\begin{abstract}
This paper explores a novel approach for person recognition based on
skeletal measurements. After showing that exact measurements allow
- for exact recognition, we study two algorithmic approaches for
+ for accurate recognition, we study two algorithmic approaches for
identification in case of approximate measurements. A real-life
experiment with 25 people and measurements obtained from the Kinect
range camera gives us promising results and comparison with state of
- the art facial recognition and validates the viability of
- skeleton-base identification.
+ the art facial recognition validates the viability of
+ skeleton-based identification.
\end{abstract}
diff --git a/algorithm.tex b/algorithm.tex
index 38e0c39..b3e7648 100644
--- a/algorithm.tex
+++ b/algorithm.tex
@@ -28,7 +28,10 @@ In this work, we use the mixture of Gaussians model for skeleton recognition. Sk
\begin{figure}[t]
\centering
\includegraphics{graphics/limbs.pdf}
- \caption{Histograms of differences between 9 skeleton measurements $x_k$ (Section~\ref{sec:experiment}) and their expectation given the class $y$.}
+ \caption{Histograms of differences between 9 skeleton measurements
+ $x_k$ (Section~\ref{sec:experiment}) and their expectation given the
+ class $y$. In red, the p.d.f. of a normal distribution with mean and
+ variance equal to the empirical mean and variance of the measurement}
\label{fig:error marginals}
\end{figure}
diff --git a/conclusion.tex b/conclusion.tex
index 7a3e9ed..c54bc9a 100644
--- a/conclusion.tex
+++ b/conclusion.tex
@@ -6,8 +6,8 @@ measurements are unique enough to distinguish individuals using a dataset of
real skeletons. We present a probabilistic model for recognition, and extend
it to take advantage of consecutive frames. Finally we test our model by
collecting data for a week in a real-world setting. Our results show that
-skeleton recognition performs close to face recognition, and it can be used in
-many more scenarios.
+skeleton recognition performs close to face recognition, and it can be
+used in other scenarios.
However, the Kinect SDK does have some limitations. First of all, the Kinect
SDK can only fit two skeletons at a time. Therefore, when a group of people
diff --git a/data/combined/graphs/plots.py b/data/combined/graphs/plots.py
index 31e9638..fb2628c 100755
--- a/data/combined/graphs/plots.py
+++ b/data/combined/graphs/plots.py
@@ -16,7 +16,7 @@ mpl.rcParams['axes.linewidth'] = 0.5
mpl.rcParams['figure.subplot.hspace'] = 0.4
mpl.rcParams['figure.subplot.wspace'] = 0.4
legend_width = 0.2
-#mpl.rcParams.update(params)
+
out_dir = sys.argv[1]
#limbs distribution
@@ -64,6 +64,7 @@ plt.ylabel("Precision [%]")
leg =plt.legend(loc="best")
leg.get_frame().set_linewidth(legend_width)
plt.axis([0,100,50,100])
+plt.gca().set_aspect(2)
plt.savefig(os.path.join(out_dir,"offline-nb.pdf"),bbox_inches="tight",pad_inches=0.05)
#10-fold, SHT
@@ -76,6 +77,7 @@ plt.ylabel("Precision [%]")
leg = plt.legend(loc="lower left")
leg.get_frame().set_linewidth(legend_width)
plt.axis([0,100,50,100])
+plt.gca().set_aspect(2)
plt.savefig(os.path.join(out_dir,"offline-sht.pdf"),bbox_inches="tight",pad_inches=0.05)
#online,NB
@@ -88,6 +90,7 @@ plt.ylabel("Precision [%]")
leg = plt.legend(loc="best")
leg.get_frame().set_linewidth(legend_width)
plt.axis([0,100,50,100])
+plt.gca().set_aspect(2)
plt.savefig(os.path.join(out_dir,"online-nb.pdf"),bbox_inches="tight",pad_inches=0.05)
#online,SHT
@@ -100,6 +103,7 @@ plt.ylabel("Precision [%]")
leg = plt.legend(loc="best")
leg.get_frame().set_linewidth(legend_width)
plt.axis([0,100,50,100])
+plt.gca().set_aspect(2)
plt.savefig(os.path.join(out_dir,"online-sht.pdf"),bbox_inches="tight",pad_inches=0.05)
#face
@@ -113,6 +117,7 @@ plt.ylabel("Precision [%]")
leg = plt.legend(loc="best")
leg.get_frame().set_linewidth(legend_width)
plt.axis([0,100,50,100])
+plt.gca().set_aspect(2)
plt.savefig(os.path.join(out_dir,"face.pdf"),bbox_inches="tight",pad_inches=0.05)
#back
@@ -128,6 +133,7 @@ plt.ylabel("Precision [%]")
leg = plt.legend(loc="best")
leg.get_frame().set_linewidth(legend_width)
plt.axis([0,100,50,100])
+plt.gca().set_aspect(2)
plt.savefig(os.path.join(out_dir,"back.pdf"),bbox_inches="tight",pad_inches=0.05)
#variance-reduction
@@ -141,4 +147,5 @@ plt.ylabel("Precision [%]")
leg = plt.legend(loc="best")
leg.get_frame().set_linewidth(legend_width)
plt.axis([0,100,50,100])
+plt.gca().set_aspect(2)
plt.savefig(os.path.join(out_dir,"var.pdf"),bbox_inches="tight",pad_inches=0.05)
diff --git a/data/pair-matching/roc.py b/data/pair-matching/roc.py
index 7396ab4..c0d7d83 100755
--- a/data/pair-matching/roc.py
+++ b/data/pair-matching/roc.py
@@ -51,7 +51,7 @@ if __name__ == "__main__":
indices = [i for i in range(ap.shape[0]) if ap[i,1]<0.1]
ap_false = ap[:,1][indices]
ap_true = ap[:,0][indices]
- plt.plot(ap_false,ap_true,label="Face recognition")
+ plt.plot(100*ap_false,100*ap_true,label="Face recognition")
plt.xlabel("False positive rate [%]")
plt.ylabel("True positive rate [%]")
np.random.seed()
@@ -77,7 +77,7 @@ if __name__ == "__main__":
false_pos = false_pos[indices]
true_pos = np.array(true_pos)
true_pos = true_pos[indices]
- plt.plot(false_pos,true_pos,label="$\sigma$ = "+str(s))
+ plt.plot(100*false_pos,100*true_pos,label="$\sigma$ = "+str(s))
leg = plt.legend(loc="lower right")
leg.get_frame().set_linewidth(legend_width)
plt.savefig("roc.pdf",bbox_inches="tight",pad_inches=0.05)
diff --git a/experimental.tex b/experimental.tex
index a0c98ae..d46270a 100644
--- a/experimental.tex
+++ b/experimental.tex
@@ -100,13 +100,14 @@ the same ID, it means that the skeleton-fitting algorithm was able to detect
the skeleton in a contiguous way. This allows us to define the concept of a
\emph{run}: a sequence of frames with the same skeleton ID.
-We perform five experiments. First, we test the performance of skeleton
-recognition using traditional 10-fold cross validation, to represent an offline
-setting. Second, we run our algorithms in an online setting by training and
-testing the data over time. Third, we pit skeleton recognition against the
-state-of-the-art in face recognition. Next, we test how our solution performs
-when people are walking away from the camera. Finally, we study what happens
-if the noise from the Kinect is reduced.
+We perform five experiments. First, we test the performance of
+skeleton recognition using traditional 10-fold cross validation, to
+represent an offline learning setting. Second, we run our algorithms
+in an online learning setting by training and testing the data over
+time. Third, we pit skeleton recognition against the state-of-the-art
+in face recognition. Next, we test how our solution performs when
+people are walking away from the camera. Finally, we study what
+happens if the noise from the Kinect is reduced.
%\begin{table}
%\begin{center}
@@ -131,7 +132,7 @@ if the noise from the Kinect is reduced.
\includegraphics[]{graphics/frames.pdf}
\end{center}
\vspace{-1.5\baselineskip}
- \caption{Distribution of the frame ratio of each individual in the
+ \caption{Distribution of the frequency of each individual in the
data set}
\label{fig:frames}
\end{figure}
@@ -139,22 +140,23 @@ if the noise from the Kinect is reduced.
\subsection{Offline learning setting}
\label{sec:experiment:offline}
-In the first experiment, we study the accuracy of skeleton recognition using
-10-fold cross validation. The data set is partitioned into 10 continuous time
-sequences of equal size. For a given recall threshold, the algorithm is trained
-on 9 continuous time sequences and trained on the last one. This is repeated
-for the 10 possible testing subsamples. Averaging the prediction rate over
-these 10 training-testing experiments yields the prediction rate for the chosen
-threshold. We test the mixture of Gaussians (MoG) and sequential hypothesis
-testing (SHT) models, and find that SHT generally performs better than MoG, and
-that accuracy increases as group size decreases.
+In the first experiment, we study the accuracy of skeleton recognition
+using 10-fold cross validation. The data set is partitioned into 10
+continuous time sequences of equal size. For a given recall threshold,
+the algorithm is trained on 9 sequences and tested on the last
+one. This is repeated for all 10 possible testing sequences. Averaging
+the prediction rate over these 10 training-testing experiments yields
+the prediction rate for the chosen threshold. We test the mixture of
+Gaussians (MoG) and sequential hypothesis testing (SHT) models, and
+find that SHT generally performs better than MoG, and that accuracy
+increases as group size decreases.
\fref{fig:offline} shows the precision-recall plot as the threshold varies.
-Both algrithms perform better than three times the majority class baseline of
+Both algrithms perform three times better than the majority class baseline of
15\% with a recall of 100\% on all people. Several curves are obtained for
different group sizes: people are ordered based on their frequency of
-appearance (\fref{fig:frames}, and all the frames belonging to people beyond a
+appearance (\fref{fig:frames}), and all the frames belonging to people beyond a
given rank in this ordering are removed. The decrease of performance when
increasing the number of people in the data set can be explained by the
overlaps between skeleton profiles due to the noise, as discussed in
@@ -168,7 +170,7 @@ permit a proper training of the algorithm.
\includegraphics[]{graphics/offline-nb.pdf}
\label{fig:offline:nb}
}
-\subfloat[Sequential Hypothesis Learning]{
+\subfloat[Sequential Hypothesis Testing]{
\includegraphics[]{graphics/offline-sht.pdf}
\label{fig:offline:sht}
}
@@ -200,13 +202,13 @@ augmented data set, and the newly obtained classifier can be deployed in the
building.
In this setting, the sequential hypothesis testing (SHT) algorithm is more
-suitable than the algorithm used in the previous paragraph, because it
+suitable than the algorithm used in Section~\ref{sec:experiment:offline}, because it
accounts for the fact that a person identity does not change across a
run. The analysis is therefore performed by partitioning the dataset
-into 10 subsamples of equal size. For a given threshold, the algorithm
+into 10 time sequences of equal size. For a given threshold, the algorithm
is trained and tested incrementally: trained on the first $k$
-subsamples (in the chronological order) and tested on the $(k+1)$-th
-subsample. \fref{fig:online} shows the prediction-recall
+sequences (in the chronological order) and tested on the $(k+1)$-th
+sequence. \fref{fig:online} shows the prediction-recall
curve when averaging the prediction rate of the 10 incremental
experiments.
@@ -220,17 +222,22 @@ experiments.
\begin{center}
\includegraphics[width=0.49\textwidth]{graphics/online-sht.pdf}
\end{center}
- \label{fig:online:sht}
- \vspace{-1.5\baselineskip}
- \caption{Results for the online setting, where $n_p$ is the size of
- the group as in Figure~\ref{fig:offline}}
- \label{fig:online}
}
\parbox[t]{0.49\linewidth}{
\begin{center}
\includegraphics[width=0.49\textwidth]{graphics/face.pdf}
\end{center}
- \vspace{-1.5\baselineskip}
+}
+\end{figure}
+\begin{figure}
+\vspace{-1.5\baselineskip}
+\parbox[t]{0.48\linewidth}{
+ \caption{Results for the online setting, where $n_p$ is the size of
+ the group as in Figure~\ref{fig:offline}}
+ \label{fig:online}
+}
+\hspace{0.02\linewidth}
+\parbox[t]{0.48\linewidth}{
\caption{Results for face recognition versus skeleton recognition}
\label{fig:face}
}
@@ -259,21 +266,27 @@ we discuss in the next experiment.
\begin{figure}[t]
\parbox[t]{0.49\linewidth}{
- \begin{center}
- \includegraphics[width=0.49\textwidth]{graphics/back.pdf}
- \end{center}
- \vspace{-1.5\baselineskip}
- \caption{Results with people walking away from and toward the camera}
- \label{fig:back}
+\begin{center}
+ \includegraphics[width=0.49\textwidth]{graphics/back.pdf}
+\end{center}
}
\parbox[t]{0.49\linewidth}{
- \begin{center}
- \includegraphics[width=0.49\textwidth]{graphics/var.pdf}
- \end{center}
- \vspace{-1.5\baselineskip}
- \caption{Results with and without halving the variance of the noise}
- \label{fig:var}
- }
+\begin{center}
+ \includegraphics[width=0.49\textwidth]{graphics/var.pdf}
+\end{center}
+}
+\end{figure}
+\begin{figure}
+\vspace{-1.5\baselineskip}
+\parbox[t]{0.48\linewidth}{
+\caption{Results with people walking away from and toward the camera}
+\label{fig:back}
+}
+\hspace{0.02\linewidth}
+\parbox[t]{0.48\linewidth}{
+\caption{Results with and without halving the variance of the noise}
+\label{fig:var}
+}
\end{figure}
\subsection{Walking away}
@@ -323,12 +336,11 @@ the Kinect.
%the observed limbs' lengths away from the exact measurements.
To simulate a reduction of the noise level, the data set is modified as
follows: we compute the average profile of each person, and for each frame we
-divide the empirical variance from the average by 2. Formally, if $\bx$ is an
-observation in the 9-dimensional feature space for the person $i$, and if
-$\bar{\bx}$ is the average of all the observations available for this person in
-the data set, then $\bx$ is replaced by $\bx'$ defined by:
+divide the empirical variance from the average by 2. Formally, using
+the same notations as in Section~\ref{sec:mixture of Gaussians}, each
+observation $\bx_i$ is replaced by $\bx_i'$ defined by:
\begin{equation}
- \bx' = \bar{\bx} + \frac{\bx-\bar{\bx}}{\sqrt{2}}
+ \bx_i' = \bar{\bx}_{y_i} + \frac{\bx_i-\bar{\bx}_{y_i}}{2}
\end{equation}
We believe that a reducing factor of 2 for the noise's variance is
realistic given the relative low resolution of the Kinect's infrared
diff --git a/graphics/back.pdf b/graphics/back.pdf
index 7768660..848522d 100644
--- a/graphics/back.pdf
+++ b/graphics/back.pdf
Binary files differ
diff --git a/graphics/face.pdf b/graphics/face.pdf
index 58516c2..8dde124 100644
--- a/graphics/face.pdf
+++ b/graphics/face.pdf
Binary files differ
diff --git a/graphics/frames.pdf b/graphics/frames.pdf
index b096475..ffbd79f 100644
--- a/graphics/frames.pdf
+++ b/graphics/frames.pdf
Binary files differ
diff --git a/graphics/limbs.pdf b/graphics/limbs.pdf
index c6cd9e6..7f98107 100644
--- a/graphics/limbs.pdf
+++ b/graphics/limbs.pdf
Binary files differ
diff --git a/graphics/offline-nb.pdf b/graphics/offline-nb.pdf
index 89422cb..08431d3 100644
--- a/graphics/offline-nb.pdf
+++ b/graphics/offline-nb.pdf
Binary files differ
diff --git a/graphics/offline-sht.pdf b/graphics/offline-sht.pdf
index b51ccc5..dcdce69 100644
--- a/graphics/offline-sht.pdf
+++ b/graphics/offline-sht.pdf
Binary files differ
diff --git a/graphics/online-nb.pdf b/graphics/online-nb.pdf
index 95c1d7f..7a0dd4a 100644
--- a/graphics/online-nb.pdf
+++ b/graphics/online-nb.pdf
Binary files differ
diff --git a/graphics/online-sht.pdf b/graphics/online-sht.pdf
index 3c42c10..72b0a71 100644
--- a/graphics/online-sht.pdf
+++ b/graphics/online-sht.pdf
Binary files differ
diff --git a/graphics/roc.pdf b/graphics/roc.pdf
index 8e25ab7..ae0c809 100644
--- a/graphics/roc.pdf
+++ b/graphics/roc.pdf
Binary files differ
diff --git a/graphics/var.pdf b/graphics/var.pdf
index 6fdb0ae..a79992f 100644
--- a/graphics/var.pdf
+++ b/graphics/var.pdf
Binary files differ
diff --git a/uniqueness.tex b/uniqueness.tex
index 35f9a37..d3b4822 100644
--- a/uniqueness.tex
+++ b/uniqueness.tex
@@ -24,7 +24,7 @@ on these sets of pairs. The average of the false-positive rates and
the true-positive rates across the 10 experiments for a given
threshold gives one operating point on the receiver operating
characteristic (ROC) curve (Figure~\ref{fig:roc}). Note that in this
-benchmark the identity information of the individuals appearing in the
+benchmark, the identity information of the individuals appearing in the
pairs is not available, which means that the algorithms cannot form
additional image pairs from the input data. This is referred to as the
\emph{image-restricted} setting in the LFW benchmark.