summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2012-02-22 20:42:52 -0800
committerThibaut Horel <thibaut.horel@gmail.com>2012-02-22 20:42:52 -0800
commit8f600aa7382e86ba78167af7d4ed417ddf05ba49 (patch)
treeb6f7543bd1617f57086a58c64a0788cfea16e620
parent80fa7033e12b896f314757ccd2067268d08dd2d5 (diff)
downloadkinect-8f600aa7382e86ba78167af7d4ed417ddf05ba49.tar.gz
Improvements to the libsvm code
proper renormalization of the data.
-rwxr-xr-xdata/class.py95
1 files changed, 32 insertions, 63 deletions
diff --git a/data/class.py b/data/class.py
index e7975f9..2cdfa40 100755
--- a/data/class.py
+++ b/data/class.py
@@ -2,69 +2,38 @@
import copy
import sys
from svmutil import *
+import numpy as np
+import matplotlib.pyplot as plt
-lower = 0.1
-upper = 10
-
-def normalize_instances(instances, ranges = None) :
- normalized_instances = copy.deepcopy(instances)
- if ranges == None :
- ranges_dict = dict()
- for attribute in normalized_instances[0].keys() : # we iterate on the attributes
- column = [instance[attribute] for instance in normalized_instances]
- if ranges != None :
- minimum = ranges[attribute][0]
- maximum = ranges[attribute][1]
- else :
- minimum = min(column)
- maximum = max(column)
- ranges_dict[attribute] = [minimum, maximum]
- for i in range(len(column)) :
- if column[i] == minimum :
- column[i] = lower
- elif column[i] == maximum :
- column[i] = upper
- else :
- column[i] = lower + (upper-lower) * (column[i] - minimum) / (maximum - minimum)
- # Copying normalized values in memory
-
- for elem, instance in zip(column, normalized_instances):
- instance[attribute] = elem
-
- if ranges == None :
- return normalized_instances, ranges_dict
- else :
- return normalized_instances
-
-
-def read_file(filename) :
- y = []
- x = []
- for line in filename:
- values = line.rstrip().split(',')
- if values[0] != "# dir":
- dict = {}
- for i in range(9):
- if float(values[i+5])!=-1.:
- dict[i+1] = float(values[i+5])
- if len(dict)==9:
- y += [int(values[1])]
- x += [dict]
- print line.rstrip()
- #for a,b in zip(y,x):
- # result = str(a)
- # for i in range(9):
- # result += " "+str(i+1)+":"+str(b[i+1])
- # print result
- #return (y,x)
+def read_normalize(filename,means=None,std=None) :
+ a = np.loadtxt(filename,comments="#",delimiter=",",usecols=(1,6,7,8,9,10,11,12,13,14))
+ distance,variance = np.loadtxt(filename,comments="#",delimiter=",",usecols=((4,5)),unpack=True)
+ a = np.ma.masked_equal(a,-1)
+ a = np.ma.mask_rows(a)
+ mask = a.mask[:,0]
+ a = np.ma.compress_rows(a)
+ distance = distance[mask]
+ variance = variance[mask]
+# plt.plot(range(len(variance)),variance,range(len(distance)),distance)
+# plt.show()
+ a = a[np.logical_and(variance>0.005,variance<0.05)]
+
+ rows,cols = a.shape
+ if means==None:
+ means = {}
+ if std==None:
+ std = {}
+ for col in xrange(1,cols):
+ if col not in means:
+ means[col] = np.mean(a[:,col])
+ if col not in std:
+ std[col] = np.std(a[:,col])
+ a[:,col] = (a[:,col]-means[col])/(std[col])
+ return list(a[:,0]),[dict(zip(range(1,cols+1),r)) for r in a[:,1:]],means,std
train_filename = sys.argv[1]
-#test_filename = sys.argv[2]
-y1,x1 = read_file(open(train_filename))
-#x1,ranges = normalize_instances(x1)
-#print ranges
-#exit(0)
-#model = svm_train(y1,x1)
-#y2,x2 = read_file(open(test_filename))
-#x2 = normalize_instances(x2,ranges)
-#p_labels,p_acc,p_vals = svm_predict(y2,x2,model)
+test_filename = sys.argv[2]
+y1,x1,means,std = read_normalize(train_filename)
+model = svm_train(y1,x1)
+y2,x2,means,std = read_normalize(test_filename,means=means,std=std)
+p_labels,p_acc,p_vals = svm_predict(y2,x2,model)