Some cleaning in the svm code

author: Thibaut Horel <thibaut.horel@gmail.com> 2012-02-22 22:42:26 -0800
committer: Thibaut Horel <thibaut.horel@gmail.com> 2012-02-22 22:42:26 -0800
commit: 09c9c63b7f72faf8eb7cdf162998f719cfd3e330 (patch)
tree: 065a7385bc9702da21b956d980fae56708712e43 /data/class.py
parent: 0f71468c400e40ca9180a177b68f895aae0be90e (diff)
download: kinect-09c9c63b7f72faf8eb7cdf162998f719cfd3e330.tar.gz
1 files changed, 31 insertions, 23 deletions
diff --git a/data/class.py b/data/class.py
index 2cdfa40..404f2e9 100755
--- a/data/class.py
+++ b/data/class.py
@@ -5,35 +5,43 @@ from svmutil import *
 import numpy as np
 import matplotlib.pyplot as plt
 
-def read_normalize(filename,means=None,std=None) :
-    a = np.loadtxt(filename,comments="#",delimiter=",",usecols=(1,6,7,8,9,10,11,12,13,14))
-    distance,variance = np.loadtxt(filename,comments="#",delimiter=",",usecols=((4,5)),unpack=True)
+def normalize(a,weights=None):
+    if weights == None:
+        weights= {}
+        cols = a.shape[1]
+        for i in range(cols):
+            weights[i] = None
+
+    for i in weights.keys():
+        column = a[:,i]
+        if weights[i] == None:
+            weights[i] = np.mean(column), np.std(column)
+        a[:,i] = (column-weights[i][0])/weights[i][1]
+    return a,weights
+
+def read_normalize(filename,weights=None) :
+    a = np.loadtxt(filename,comments="#",delimiter=",",usecols=(1,4,5,6,7,8,9,10,11,12,13,14,15))
+    
+    #remove rows with missing values, filter data
     a = np.ma.masked_equal(a,-1)
     a = np.ma.mask_rows(a)
-    mask = a.mask[:,0]
     a = np.ma.compress_rows(a)
-    distance = distance[mask]
-    variance = variance[mask]
-#    plt.plot(range(len(variance)),variance,range(len(distance)),distance)
-#    plt.show()
-    a = a[np.logical_and(variance>0.005,variance<0.05)]
-    
     rows,cols = a.shape
-    if means==None:
-        means = {}
-    if std==None:
-        std = {}
-    for col in xrange(1,cols):
-        if col not in means:
-            means[col] = np.mean(a[:,col])
-        if col not in std:
-            std[col] = np.std(a[:,col])
-        a[:,col] = (a[:,col]-means[col])/(std[col])
-    return list(a[:,0]),[dict(zip(range(1,cols+1),r)) for r in a[:,1:]],means,std
+    distance = a[:,1]
+    #variance = a[:,2]
+    diff = a[:,3]
+    a = a[np.logical_and(np.logical_and(distance>2,distance<3.2),diff<0.5)]
+
+    #normalize data
+    if weights==None:
+        weights = dict(zip(range(4,13),[None for i in range(9)]))
+    a,weights = normalize(a,weights)
+    return list(a[:,0]),[dict(zip(range(1,11),r)) for r in a[:,4:]],weights
 
 train_filename = sys.argv[1]
 test_filename = sys.argv[2]
-y1,x1,means,std = read_normalize(train_filename)
+
+y1,x1,weights = read_normalize(train_filename)
 model = svm_train(y1,x1)
-y2,x2,means,std = read_normalize(test_filename,means=means,std=std)
+y2,x2,weights = read_normalize(test_filename,weights=weights)
 p_labels,p_acc,p_vals = svm_predict(y2,x2,model)
author	Thibaut Horel <thibaut.horel@gmail.com>	2012-02-22 22:42:26 -0800
committer	Thibaut Horel <thibaut.horel@gmail.com>	2012-02-22 22:42:26 -0800
commit	09c9c63b7f72faf8eb7cdf162998f719cfd3e330 (patch)
tree	065a7385bc9702da21b956d980fae56708712e43 /data/class.py
parent	0f71468c400e40ca9180a177b68f895aae0be90e (diff)
download	kinect-09c9c63b7f72faf8eb7cdf162998f719cfd3e330.tar.gz