summaryrefslogtreecommitdiffstats
path: root/data/class.py
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2012-02-22 22:42:26 -0800
committerThibaut Horel <thibaut.horel@gmail.com>2012-02-22 22:42:26 -0800
commit09c9c63b7f72faf8eb7cdf162998f719cfd3e330 (patch)
tree065a7385bc9702da21b956d980fae56708712e43 /data/class.py
parent0f71468c400e40ca9180a177b68f895aae0be90e (diff)
downloadkinect-09c9c63b7f72faf8eb7cdf162998f719cfd3e330.tar.gz
Some cleaning in the svm code
Diffstat (limited to 'data/class.py')
-rwxr-xr-xdata/class.py54
1 files changed, 31 insertions, 23 deletions
diff --git a/data/class.py b/data/class.py
index 2cdfa40..404f2e9 100755
--- a/data/class.py
+++ b/data/class.py
@@ -5,35 +5,43 @@ from svmutil import *
import numpy as np
import matplotlib.pyplot as plt
-def read_normalize(filename,means=None,std=None) :
- a = np.loadtxt(filename,comments="#",delimiter=",",usecols=(1,6,7,8,9,10,11,12,13,14))
- distance,variance = np.loadtxt(filename,comments="#",delimiter=",",usecols=((4,5)),unpack=True)
+def normalize(a,weights=None):
+ if weights == None:
+ weights= {}
+ cols = a.shape[1]
+ for i in range(cols):
+ weights[i] = None
+
+ for i in weights.keys():
+ column = a[:,i]
+ if weights[i] == None:
+ weights[i] = np.mean(column), np.std(column)
+ a[:,i] = (column-weights[i][0])/weights[i][1]
+ return a,weights
+
+def read_normalize(filename,weights=None) :
+ a = np.loadtxt(filename,comments="#",delimiter=",",usecols=(1,4,5,6,7,8,9,10,11,12,13,14,15))
+
+ #remove rows with missing values, filter data
a = np.ma.masked_equal(a,-1)
a = np.ma.mask_rows(a)
- mask = a.mask[:,0]
a = np.ma.compress_rows(a)
- distance = distance[mask]
- variance = variance[mask]
-# plt.plot(range(len(variance)),variance,range(len(distance)),distance)
-# plt.show()
- a = a[np.logical_and(variance>0.005,variance<0.05)]
-
rows,cols = a.shape
- if means==None:
- means = {}
- if std==None:
- std = {}
- for col in xrange(1,cols):
- if col not in means:
- means[col] = np.mean(a[:,col])
- if col not in std:
- std[col] = np.std(a[:,col])
- a[:,col] = (a[:,col]-means[col])/(std[col])
- return list(a[:,0]),[dict(zip(range(1,cols+1),r)) for r in a[:,1:]],means,std
+ distance = a[:,1]
+ #variance = a[:,2]
+ diff = a[:,3]
+ a = a[np.logical_and(np.logical_and(distance>2,distance<3.2),diff<0.5)]
+
+ #normalize data
+ if weights==None:
+ weights = dict(zip(range(4,13),[None for i in range(9)]))
+ a,weights = normalize(a,weights)
+ return list(a[:,0]),[dict(zip(range(1,11),r)) for r in a[:,4:]],weights
train_filename = sys.argv[1]
test_filename = sys.argv[2]
-y1,x1,means,std = read_normalize(train_filename)
+
+y1,x1,weights = read_normalize(train_filename)
model = svm_train(y1,x1)
-y2,x2,means,std = read_normalize(test_filename,means=means,std=std)
+y2,x2,weights = read_normalize(test_filename,weights=weights)
p_labels,p_acc,p_vals = svm_predict(y2,x2,model)