From d7b0798050f72f08bcb3995c465efeb9bf9f516d Mon Sep 17 00:00:00 2001
From: Thibaut Horel <thibaut.horel@gmail.com>
Date: Sun, 4 Mar 2012 13:01:08 -0800
Subject: Set font size, figsize, and bounding box of plots

Started taking Brano's comments into account (section 3)
---
 data/combined/graphs/plots.py |  87 ++++++++++++++++++++++--------------------
 data/pair-matching/roc.py     |   9 ++++-
 graphics/back.pdf             | Bin 17955 -> 17795 bytes
 graphics/face.pdf             | Bin 14824 -> 14783 bytes
 graphics/frames.pdf           | Bin 10850 -> 10867 bytes
 graphics/limbs.pdf            | Bin 0 -> 45439 bytes
 graphics/offline-nb.pdf       | Bin 17143 -> 17130 bytes
 graphics/offline-sht.pdf      | Bin 20594 -> 20304 bytes
 graphics/online-nb.pdf        | Bin 17929 -> 17894 bytes
 graphics/online-sht.pdf       | Bin 24868 -> 24070 bytes
 graphics/roc.pdf              | Bin 20447 -> 20430 bytes
 graphics/var.pdf              | Bin 16186 -> 16101 bytes
 uniqueness.tex                |  51 +++++++++++++------------
 13 files changed, 81 insertions(+), 66 deletions(-)
 create mode 100644 graphics/limbs.pdf

diff --git a/data/combined/graphs/plots.py b/data/combined/graphs/plots.py
index 98b9bc5..6f37b27 100755
--- a/data/combined/graphs/plots.py
+++ b/data/combined/graphs/plots.py
@@ -6,12 +6,17 @@ import matplotlib.mlab as mlab
 import sys
 import os
 import scipy
+import matplotlib as mpl
 
+mpl.rcParams['font.size'] = 8
+mpl.rcParams['lines.linewidth'] = 0.5
+mpl.rcParams['figure.figsize'] = 6,5
+mpl.rcParams['legend.fontsize'] = 8
+mpl.rcParams['axes.linewidth'] = 0.8
 out_dir = sys.argv[1]
 
-
 #limbs distribution
-plt.cla()
+plt.figure()
 data = np.loadtxt("../limbs-avg-zdiff/data.csv",delimiter=",")
 data = data[#(data[:,1] == 25) 
              ((data != -1).all(1))
@@ -21,13 +26,13 @@ data = data[:,7:]*100
 mean = data.mean(0)
 var = data.std(0)
 for i in range(len(mean)):
-    plt.subplot(3,3,i+1)
-    n,b,p = plt.hist(data[:,i],bins=50,normed=1)
+    ax = plt.subplot(2,5,i+1)
+    n,b,p = plt.hist(data[:,i],bins=100,normed=1,linewidth=0)
     plt.plot(b,mlab.normpdf(b,mean[i],var[i]))
-plt.savefig(os.path.join(out_dir,"limbs.pdf"))
+plt.savefig(os.path.join(out_dir,"limbs.pdf"),bbox_inches="tight",pad_inches=0.05)
 
 #frames distribution
-plt.cla()
+fig = plt.figure(figsize=(6,4))
 x = np.loadtxt("frames.txt",usecols=(0,))
 y = range(1,len(x)+1)
 width=0.8
@@ -38,88 +43,88 @@ plt.xlabel("Individual")
 plt.ylabel("Frame ratio [%]")
 plt.ylim(0,17)
 ax = plt.gca()
-plt.savefig(os.path.join(out_dir,"frames.pdf"))
+plt.savefig(os.path.join(out_dir,"frames.pdf"),bbox_inches="tight",pad_inches=0.05)
 
 l = ["3","5","10","all"]
 
 #10-fold, naive
-plt.cla()
-#ax = plt.subplot(121)
-plt.axis([0,100,50,100])
-#ax.set_aspect(2)
+plt.figure()
 for i in l:
     x,y = np.loadtxt(i+"_nb_off.mat",unpack=True)
-    plt.plot(100*x,100*y,label="$n_p=$ "+i,linewidth=0.8)
+    plt.plot(100*x,100*y,label="$n_p=$ "+i)
     plt.xlabel("Recall [%]")
     plt.ylabel("Precision [%]")
     plt.legend(loc="best")
-plt.savefig(os.path.join(out_dir,"offline-nb.pdf"))
+plt.axis([0,100,50,100])
+plt.savefig(os.path.join(out_dir,"offline-nb.pdf"),bbox_inches="tight",pad_inches=0.05)
+
 #10-fold, SHT
-#ax = plt.subplot(122)
-#plt.axis([0,100,50,100])
-#ax.set_aspect(2)
-plt.cla()
+plt.figure()
 for i in l:
     x,y = np.loadtxt(i+"_sht_off.mat",unpack=True)
-    plt.plot(100*x,100*y,label="$n_p=$ "+i,linewidth=0.8)
+    plt.plot(100*x,100*y,label="$n_p=$ "+i)
     plt.xlabel("Recall [%]")
     plt.ylabel("Precision [%]")
-    plt.legend(loc="best")
-
+    plt.legend(loc="lower left")
 plt.axis([0,100,50,100])
-plt.savefig(os.path.join(out_dir,"offline-sht.pdf"))
+plt.savefig(os.path.join(out_dir,"offline-sht.pdf"),bbox_inches="tight",pad_inches=0.05)
 
 #online,NB
-plt.cla()
+plt.figure()
 for i in l:
     x,y = np.loadtxt(i+"_nb_on.mat",unpack=True)
-    plt.plot(100*x,100*y,label="$n_p=$ "+i,linewidth=0.8,markersize=4)
+    plt.plot(100*x,100*y,label="$n_p=$ "+i)
     plt.xlabel("Recall [%]")
     plt.ylabel("Precision [%]")
     plt.legend(loc="best")
-plt.savefig(os.path.join(out_dir,"online-nb.pdf"))
+plt.axis([0,100,50,100])
+plt.savefig(os.path.join(out_dir,"online-nb.pdf"),bbox_inches="tight",pad_inches=0.05)
+
 #online,SHT
-plt.cla()
+plt.figure()
 for i in l:
     x,y = np.loadtxt(i+"_sht_on.mat",unpack=True)
-    plt.plot(100*x,100*y,label="$n_p=$ "+i,linewidth=0.8,markersize=4)
+    plt.plot(100*x,100*y,label="$n_p=$ "+i)
     plt.xlabel("Recall [%]")
     plt.ylabel("Precision [%]")
     plt.legend(loc="best")
-plt.savefig(os.path.join(out_dir,"online-sht.pdf"))
-
+plt.axis([0,100,50,100])
+plt.savefig(os.path.join(out_dir,"online-sht.pdf"),bbox_inches="tight",pad_inches=0.05)
 
 #face
-plt.cla()
+plt.figure()
 x,y = np.loadtxt("all_nb_off.mat",unpack=True)
 a,b = np.loadtxt("face.csv",delimiter=",", unpack=True)
-plt.plot(100*x,100*y,linewidth=0.8,label="Skeleton")
-plt.plot(100*a,100*b,linewidth=0.8,label="Face")
+plt.plot(100*x,100*y,label="Skeleton")
+plt.plot(100*a,100*b,label="Face")
 plt.xlabel("Recall [%]")
 plt.ylabel("Precision [%]")
 plt.legend(loc="best")
-plt.savefig(os.path.join(out_dir,"face.pdf"))
+plt.axis([0,100,50,100])
+plt.savefig(os.path.join(out_dir,"face.pdf"),bbox_inches="tight",pad_inches=0.05)
 
 #back
-plt.cla()
+plt.figure()
 x,y = np.loadtxt("back_all_sht_on.mat",unpack=True)
 a,b = np.loadtxt("all_sht_on.mat",unpack=True)
 c,d = np.loadtxt("front_back_all_sht.mat",unpack=True)
-plt.plot(100*a,100*b,linewidth=0.8,label="Train/test toward")
-plt.plot(100*x,100*y,linewidth=0.8,label="Train/test away")
-plt.plot(100*c,100*d,linewidth=0.8,label="Train toward test away")
+plt.plot(100*a,100*b,label="Train/test toward")
+plt.plot(100*x,100*y,label="Train/test away")
+plt.plot(100*c,100*d,label="Train toward test away")
 plt.xlabel("Recall [%]")
 plt.ylabel("Precision [%]")
 plt.legend(loc="best")
-plt.savefig(os.path.join(out_dir,"back.pdf"))
+plt.axis([0,100,50,100])
+plt.savefig(os.path.join(out_dir,"back.pdf"),bbox_inches="tight",pad_inches=0.05)
 
 #variance-reduction
-plt.cla()
+plt.figure()
 x,y = np.loadtxt("half-var-all_sht_on.mat",unpack=True)
 a,b = np.loadtxt("all_sht_on.mat",unpack=True)
-plt.plot(100*x,100*y,linewidth=0.8,label="Reduced noise")
-plt.plot(100*a,100*b,linewidth=0.8,label="Original noise")
+plt.plot(100*x,100*y,label="Reduced noise")
+plt.plot(100*a,100*b,label="Original noise")
 plt.xlabel("Recall [%]")
 plt.ylabel("Precision [%]")
 plt.legend(loc="best")
-plt.savefig(os.path.join(out_dir,"var.pdf"))
+plt.axis([0,100,50,100])
+plt.savefig(os.path.join(out_dir,"var.pdf"),bbox_inches="tight",pad_inches=0.05)
diff --git a/data/pair-matching/roc.py b/data/pair-matching/roc.py
index c121e97..19a1acf 100755
--- a/data/pair-matching/roc.py
+++ b/data/pair-matching/roc.py
@@ -2,9 +2,16 @@
 import sys
 import numpy as np
 import matplotlib.pyplot as plt
+import matplotlib as mpl
 import math
 from sets import ImmutableSet
 
+mpl.rcParams['font.size'] = 8
+mpl.rcParams['lines.linewidth'] = 0.5
+mpl.rcParams['figure.figsize'] = 6,5
+mpl.rcParams['legend.fontsize'] = 8
+mpl.rcParams['axes.linewidth'] = 0.8
+
 def distance(a,b):
     return math.sqrt(np.square(a-b).sum())
 
@@ -68,7 +75,7 @@ if __name__ == "__main__":
         true_pos = true_pos[indices]
         plt.plot(false_pos,true_pos,label="$\sigma$ = "+str(s))
     plt.legend(loc="lower right")
-    plt.savefig("roc.pdf")
+    plt.savefig("roc.pdf",bbox_inches="tight",pad_inches=0.05)
     plt.show()
 
     
diff --git a/graphics/back.pdf b/graphics/back.pdf
index 86ff2f2..ebda274 100644
Binary files a/graphics/back.pdf and b/graphics/back.pdf differ
diff --git a/graphics/face.pdf b/graphics/face.pdf
index 3f87ef6..c92b8c2 100644
Binary files a/graphics/face.pdf and b/graphics/face.pdf differ
diff --git a/graphics/frames.pdf b/graphics/frames.pdf
index cca65bf..819e23b 100644
Binary files a/graphics/frames.pdf and b/graphics/frames.pdf differ
diff --git a/graphics/limbs.pdf b/graphics/limbs.pdf
new file mode 100644
index 0000000..fe92ce2
Binary files /dev/null and b/graphics/limbs.pdf differ
diff --git a/graphics/offline-nb.pdf b/graphics/offline-nb.pdf
index 8c468bd..cf1a43b 100644
Binary files a/graphics/offline-nb.pdf and b/graphics/offline-nb.pdf differ
diff --git a/graphics/offline-sht.pdf b/graphics/offline-sht.pdf
index 637ea99..9ef1a03 100644
Binary files a/graphics/offline-sht.pdf and b/graphics/offline-sht.pdf differ
diff --git a/graphics/online-nb.pdf b/graphics/online-nb.pdf
index 03c561e..3cdc3cb 100644
Binary files a/graphics/online-nb.pdf and b/graphics/online-nb.pdf differ
diff --git a/graphics/online-sht.pdf b/graphics/online-sht.pdf
index 46d4b6c..067fbd2 100644
Binary files a/graphics/online-sht.pdf and b/graphics/online-sht.pdf differ
diff --git a/graphics/roc.pdf b/graphics/roc.pdf
index d5a0c79..2d4a42b 100644
Binary files a/graphics/roc.pdf and b/graphics/roc.pdf differ
diff --git a/graphics/var.pdf b/graphics/var.pdf
index 97fb5a1..d51105c 100644
Binary files a/graphics/var.pdf and b/graphics/var.pdf differ
diff --git a/uniqueness.tex b/uniqueness.tex
index 76b7461..2ed0f93 100644
--- a/uniqueness.tex
+++ b/uniqueness.tex
@@ -6,6 +6,7 @@ recognizable biometric is their uniqueness. Are skeletons consistently and
 sufficiently distinct to use them for person recognition?
 
 \subsection{Face recognition benchmark}
+\label{sec:frb}
 
 A good way to understand the uniqueness of a metric is to look at how
 well an algorithm based on it performs in the \emph{pair-matching
@@ -25,27 +26,30 @@ curve, which plots the true-positive rate against the false-positive rate as
 the threshold of the algorithm varies. Note that in this benchmark the identity
 information of the individuals appearing in the pairs is not available, which
 means that the algorithms cannot form additional image pairs from the input
-data. This is referred to as the \emph{Image-restricted} setting in the LFW
+data. This is referred to as the \emph{image-restricted} setting in the LFW
 benchmark.
 
 \subsection{Experiment design}
 
-In order to run an experiment similar to the one used in the face pair-matching
-problem, we use the Goldman Osteological Dataset \cite{deadbodies}. This
-dataset consists of skeletal measurements of 1538 skeletons uncovered around
-the world and dating from throughout the last several thousand years. Given the
-way these data were collected, only a partial view of the skeleton is
-available, we keep six measurements: the lengths of four bones (radius,
-humerus, femur, and tibia) and the breadth and height of the pelvis.  Because
-of missing values, this reduces the size of the dataset to 1191.
+In order to run an experiment similar to the one used in the face
+pair-matching problem (Section~\ref{sec:frb}), we use the Goldman
+Osteological Dataset \cite{deadbodies}. This dataset consists of
+skeletal measurements of 1538 skeletons uncovered around the world and
+dating from throughout the last several thousand years. Given the way
+these data were collected, only a partial view of the skeleton is
+available, we keep six measurements: the lengths of four bones
+(radius, humerus, femur, and tibia) and the breadth and height of the
+pelvis.  Because of missing values, this reduces the size of the
+dataset to 1191.
 
-From this dataset, 1191 matched pairs and 1191 unmatched pairs are generated.
-In practice, the exact measurements of the bones of living subjects are not
-directly accessible. Therefore, measurements are likely to have an error rate,
-whose variance depends on the method of collection (\eg measuring limbs over
-clothing versus on bare skin). Since there is only one sample per skeleton, we
-simulate this error by adding independent random Gaussian noise to each
-measurement of the pairs.
+From this dataset, 1191 matched pairs and 1191 unmatched pairs are
+generated.  In practice, the exact measurements of the bones of living
+subjects are not directly accessible. Therefore, measurements are
+likely to have an error rate, whose variance depends on the method of
+collection (\eg measuring limbs over clothing versus on bare
+skin). Since there is only one sample per skeleton, we simulate this
+error by adding independent random Gaussian noise to each measurement
+of the pairs.
 
 \subsection{Results}
 
@@ -53,26 +57,25 @@ We evaluate the performance of the pair-matching problem on the dataset by using
 threshold algorithm: for a given threshold, a pair will be classified
 as \emph{matched} if the Euclidean distance between the two skeletons is
 lower than the threshold, and \emph{unmatched} otherwise. Formally, let
-$(s_1,s_2)$ be an input pair of the algorithm
-($s_i\in\mathbf{R}_+^{6}$, these are the six bone measurements), 
+$(\bs_1,\bs_2)$ be an input pair of the algorithm
+($\bs_i\in\mathbf{R}_+^{6}$, these are the six bone measurements), 
 the output of the algorithm for the threshold $\delta$ is
 defined as:
 \begin{displaymath}
-  A_\delta(s_1,s_2) = \begin{cases}
-    1 & \text{if $d(s_1,s_2) < \delta$}\\
+  A_\delta(\bs_1,\bs_2) = \begin{cases}
+    1 & \text{if $d(\bs_1,\bs_2) < \delta$}\\
     0 & \text{otherwise}
   \end{cases}
 \end{displaymath}
 
 \begin{figure}[t]
   \begin{center}
-    \includegraphics[width=10cm]{graphics/roc.pdf}
+    \includegraphics[width=0.6\columnwidth]{graphics/roc.pdf}
   \end{center}
   \vspace{-1.5\baselineskip}
-  \caption{ROC curve (true positive rate
-  vs. false positive rate) for several standard deviations of the
+  \caption{ROC curve for several standard deviations of the
   noise and for the state-of-the-art \emph{Associate-Predict} face
-  detection algorithm}
+  detection algorithm. The standard deviation $\sigma$ is shown in millimeters}
   \label{fig:roc}
 \end{figure}
 
-- 
cgit v1.2.3-70-g09d2