diff options
Diffstat (limited to 'data/class.py')
| -rwxr-xr-x | data/class.py | 78 |
1 files changed, 0 insertions, 78 deletions
diff --git a/data/class.py b/data/class.py deleted file mode 100755 index b021b34..0000000 --- a/data/class.py +++ /dev/null @@ -1,78 +0,0 @@ -#! /usr/bin/python -import copy -import sys -from svmutil import * -import numpy as np -import matplotlib.pyplot as plt -from sets import Set -import itertools - -def normalize(a,weights=None): - if weights == None: - weights= {} - cols = a.shape[1] - for i in range(cols): - weights[i] = None - - for i in weights.keys(): - column = a[:,i] - if weights[i] == None: - weights[i] = np.mean(column), np.std(column) - a[:,i] = (column-weights[i][0])/weights[i][1] - return a,weights - -def read_filter(filename) : - a = np.loadtxt(filename,comments="#",delimiter=",", - usecols=(1,4,5,6,7,8,9,10,11,12,13,14,15)) - - #remove rows with missing values, filter data - a = np.ma.masked_equal(a,-1) - a = np.ma.mask_rows(a) - a = np.ma.compress_rows(a) - distance = a[:,1] - variance = a[:,2] - diff = a[:,3] - a = a[(distance>2) & (distance<3.2) & (diff<0.5)] - - return a - - -def normalize_filter(a,weights=None,nameset=None): - a = np.ma.masked_array(a) - #normalize data - if weights==None: - weights = dict(zip(range(4,13),[None for i in range(9)])) - a,weights = normalize(a,weights) - - if nameset != None: - for i in range(len(a)): - if int(a[i][0]) not in nameset: - a.mask[i][0] = True - - a = np.ma.mask_rows(a) - a = np.ma.compress_rows(a) - return list(a[:,0]),[dict(zip(range(1,11),r)) for r in a[:,4:]],weights - -train_filename = sys.argv[1] -test_filename = sys.argv[2] -log_filename = open(sys.argv[3],"w") -a = read_filter(train_filename) -b = read_filter(test_filename) -main_set = Set(range(1,26)).difference(Set([13,19,3])) - -def accuracy_subsets(n): - for s in itertools.combinations(main_set,n): - y1,x1,weights = normalize_filter(a,nameset=s) - model = svm_train(y1,x1) - y2,x2,weights = normalize_filter(b,weights=weights,nameset=s) - p_labels,p_acc,p_vals = svm_predict(y2,x2,model) - log_filename.write(str(n)+"#"+str(s)+"#"+str(p_acc[0])+"\n") - log_filename.flush() - -#y1,x1,weights = normalize_filter(a,nameset=main_set) -#model = svm_train(y1,x1) -#y2,x2,weights = normalize_filter(b,weights=weights,nameset=main_set) -#p_labels,p_acc,p_vals = svm_predict(y2,x2,model) - -for i in range(2,6): - accuracy_subsets(i) |
