summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xdata/class.py51
1 files changed, 41 insertions, 10 deletions
diff --git a/data/class.py b/data/class.py
index 404f2e9..b021b34 100755
--- a/data/class.py
+++ b/data/class.py
@@ -4,6 +4,8 @@ import sys
from svmutil import *
import numpy as np
import matplotlib.pyplot as plt
+from sets import Set
+import itertools
def normalize(a,weights=None):
if weights == None:
@@ -19,29 +21,58 @@ def normalize(a,weights=None):
a[:,i] = (column-weights[i][0])/weights[i][1]
return a,weights
-def read_normalize(filename,weights=None) :
- a = np.loadtxt(filename,comments="#",delimiter=",",usecols=(1,4,5,6,7,8,9,10,11,12,13,14,15))
+def read_filter(filename) :
+ a = np.loadtxt(filename,comments="#",delimiter=",",
+ usecols=(1,4,5,6,7,8,9,10,11,12,13,14,15))
#remove rows with missing values, filter data
a = np.ma.masked_equal(a,-1)
a = np.ma.mask_rows(a)
a = np.ma.compress_rows(a)
- rows,cols = a.shape
distance = a[:,1]
- #variance = a[:,2]
+ variance = a[:,2]
diff = a[:,3]
- a = a[np.logical_and(np.logical_and(distance>2,distance<3.2),diff<0.5)]
+ a = a[(distance>2) & (distance<3.2) & (diff<0.5)]
+
+ return a
+
+def normalize_filter(a,weights=None,nameset=None):
+ a = np.ma.masked_array(a)
#normalize data
if weights==None:
weights = dict(zip(range(4,13),[None for i in range(9)]))
a,weights = normalize(a,weights)
- return list(a[:,0]),[dict(zip(range(1,11),r)) for r in a[:,4:]],weights
+
+ if nameset != None:
+ for i in range(len(a)):
+ if int(a[i][0]) not in nameset:
+ a.mask[i][0] = True
+
+ a = np.ma.mask_rows(a)
+ a = np.ma.compress_rows(a)
+ return list(a[:,0]),[dict(zip(range(1,11),r)) for r in a[:,4:]],weights
train_filename = sys.argv[1]
test_filename = sys.argv[2]
+log_filename = open(sys.argv[3],"w")
+a = read_filter(train_filename)
+b = read_filter(test_filename)
+main_set = Set(range(1,26)).difference(Set([13,19,3]))
+
+def accuracy_subsets(n):
+ for s in itertools.combinations(main_set,n):
+ y1,x1,weights = normalize_filter(a,nameset=s)
+ model = svm_train(y1,x1)
+ y2,x2,weights = normalize_filter(b,weights=weights,nameset=s)
+ p_labels,p_acc,p_vals = svm_predict(y2,x2,model)
+ log_filename.write(str(n)+"#"+str(s)+"#"+str(p_acc[0])+"\n")
+ log_filename.flush()
+
+#y1,x1,weights = normalize_filter(a,nameset=main_set)
+#model = svm_train(y1,x1)
+#y2,x2,weights = normalize_filter(b,weights=weights,nameset=main_set)
+#p_labels,p_acc,p_vals = svm_predict(y2,x2,model)
-y1,x1,weights = read_normalize(train_filename)
-model = svm_train(y1,x1)
-y2,x2,weights = read_normalize(test_filename,weights=weights)
-p_labels,p_acc,p_vals = svm_predict(y2,x2,model)
+for i in range(2,6):
+ accuracy_subsets(i)