diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2012-02-22 22:42:26 -0800 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2012-02-22 22:42:26 -0800 |
| commit | 09c9c63b7f72faf8eb7cdf162998f719cfd3e330 (patch) | |
| tree | 065a7385bc9702da21b956d980fae56708712e43 | |
| parent | 0f71468c400e40ca9180a177b68f895aae0be90e (diff) | |
| download | kinect-09c9c63b7f72faf8eb7cdf162998f719cfd3e330.tar.gz | |
Some cleaning in the svm code
| -rwxr-xr-x | data/class.py | 54 |
1 files changed, 31 insertions, 23 deletions
diff --git a/data/class.py b/data/class.py index 2cdfa40..404f2e9 100755 --- a/data/class.py +++ b/data/class.py @@ -5,35 +5,43 @@ from svmutil import * import numpy as np import matplotlib.pyplot as plt -def read_normalize(filename,means=None,std=None) : - a = np.loadtxt(filename,comments="#",delimiter=",",usecols=(1,6,7,8,9,10,11,12,13,14)) - distance,variance = np.loadtxt(filename,comments="#",delimiter=",",usecols=((4,5)),unpack=True) +def normalize(a,weights=None): + if weights == None: + weights= {} + cols = a.shape[1] + for i in range(cols): + weights[i] = None + + for i in weights.keys(): + column = a[:,i] + if weights[i] == None: + weights[i] = np.mean(column), np.std(column) + a[:,i] = (column-weights[i][0])/weights[i][1] + return a,weights + +def read_normalize(filename,weights=None) : + a = np.loadtxt(filename,comments="#",delimiter=",",usecols=(1,4,5,6,7,8,9,10,11,12,13,14,15)) + + #remove rows with missing values, filter data a = np.ma.masked_equal(a,-1) a = np.ma.mask_rows(a) - mask = a.mask[:,0] a = np.ma.compress_rows(a) - distance = distance[mask] - variance = variance[mask] -# plt.plot(range(len(variance)),variance,range(len(distance)),distance) -# plt.show() - a = a[np.logical_and(variance>0.005,variance<0.05)] - rows,cols = a.shape - if means==None: - means = {} - if std==None: - std = {} - for col in xrange(1,cols): - if col not in means: - means[col] = np.mean(a[:,col]) - if col not in std: - std[col] = np.std(a[:,col]) - a[:,col] = (a[:,col]-means[col])/(std[col]) - return list(a[:,0]),[dict(zip(range(1,cols+1),r)) for r in a[:,1:]],means,std + distance = a[:,1] + #variance = a[:,2] + diff = a[:,3] + a = a[np.logical_and(np.logical_and(distance>2,distance<3.2),diff<0.5)] + + #normalize data + if weights==None: + weights = dict(zip(range(4,13),[None for i in range(9)])) + a,weights = normalize(a,weights) + return list(a[:,0]),[dict(zip(range(1,11),r)) for r in a[:,4:]],weights train_filename = sys.argv[1] test_filename = sys.argv[2] -y1,x1,means,std = read_normalize(train_filename) + +y1,x1,weights = read_normalize(train_filename) model = svm_train(y1,x1) -y2,x2,means,std = read_normalize(test_filename,means=means,std=std) +y2,x2,weights = read_normalize(test_filename,weights=weights) p_labels,p_acc,p_vals = svm_predict(y2,x2,model) |
