diff options
Diffstat (limited to 'data/class.py')
| -rwxr-xr-x | data/class.py | 51 |
1 files changed, 41 insertions, 10 deletions
diff --git a/data/class.py b/data/class.py index 404f2e9..b021b34 100755 --- a/data/class.py +++ b/data/class.py @@ -4,6 +4,8 @@ import sys from svmutil import * import numpy as np import matplotlib.pyplot as plt +from sets import Set +import itertools def normalize(a,weights=None): if weights == None: @@ -19,29 +21,58 @@ def normalize(a,weights=None): a[:,i] = (column-weights[i][0])/weights[i][1] return a,weights -def read_normalize(filename,weights=None) : - a = np.loadtxt(filename,comments="#",delimiter=",",usecols=(1,4,5,6,7,8,9,10,11,12,13,14,15)) +def read_filter(filename) : + a = np.loadtxt(filename,comments="#",delimiter=",", + usecols=(1,4,5,6,7,8,9,10,11,12,13,14,15)) #remove rows with missing values, filter data a = np.ma.masked_equal(a,-1) a = np.ma.mask_rows(a) a = np.ma.compress_rows(a) - rows,cols = a.shape distance = a[:,1] - #variance = a[:,2] + variance = a[:,2] diff = a[:,3] - a = a[np.logical_and(np.logical_and(distance>2,distance<3.2),diff<0.5)] + a = a[(distance>2) & (distance<3.2) & (diff<0.5)] + + return a + +def normalize_filter(a,weights=None,nameset=None): + a = np.ma.masked_array(a) #normalize data if weights==None: weights = dict(zip(range(4,13),[None for i in range(9)])) a,weights = normalize(a,weights) - return list(a[:,0]),[dict(zip(range(1,11),r)) for r in a[:,4:]],weights + + if nameset != None: + for i in range(len(a)): + if int(a[i][0]) not in nameset: + a.mask[i][0] = True + + a = np.ma.mask_rows(a) + a = np.ma.compress_rows(a) + return list(a[:,0]),[dict(zip(range(1,11),r)) for r in a[:,4:]],weights train_filename = sys.argv[1] test_filename = sys.argv[2] +log_filename = open(sys.argv[3],"w") +a = read_filter(train_filename) +b = read_filter(test_filename) +main_set = Set(range(1,26)).difference(Set([13,19,3])) + +def accuracy_subsets(n): + for s in itertools.combinations(main_set,n): + y1,x1,weights = normalize_filter(a,nameset=s) + model = svm_train(y1,x1) + y2,x2,weights = normalize_filter(b,weights=weights,nameset=s) + p_labels,p_acc,p_vals = svm_predict(y2,x2,model) + log_filename.write(str(n)+"#"+str(s)+"#"+str(p_acc[0])+"\n") + log_filename.flush() + +#y1,x1,weights = normalize_filter(a,nameset=main_set) +#model = svm_train(y1,x1) +#y2,x2,weights = normalize_filter(b,weights=weights,nameset=main_set) +#p_labels,p_acc,p_vals = svm_predict(y2,x2,model) -y1,x1,weights = read_normalize(train_filename) -model = svm_train(y1,x1) -y2,x2,weights = read_normalize(test_filename,weights=weights) -p_labels,p_acc,p_vals = svm_predict(y2,x2,model) +for i in range(2,6): + accuracy_subsets(i) |
