summaryrefslogtreecommitdiffstats
path: root/data/class.py
diff options
context:
space:
mode:
Diffstat (limited to 'data/class.py')
-rwxr-xr-xdata/class.py78
1 files changed, 0 insertions, 78 deletions
diff --git a/data/class.py b/data/class.py
deleted file mode 100755
index b021b34..0000000
--- a/data/class.py
+++ /dev/null
@@ -1,78 +0,0 @@
-#! /usr/bin/python
-import copy
-import sys
-from svmutil import *
-import numpy as np
-import matplotlib.pyplot as plt
-from sets import Set
-import itertools
-
-def normalize(a,weights=None):
- if weights == None:
- weights= {}
- cols = a.shape[1]
- for i in range(cols):
- weights[i] = None
-
- for i in weights.keys():
- column = a[:,i]
- if weights[i] == None:
- weights[i] = np.mean(column), np.std(column)
- a[:,i] = (column-weights[i][0])/weights[i][1]
- return a,weights
-
-def read_filter(filename) :
- a = np.loadtxt(filename,comments="#",delimiter=",",
- usecols=(1,4,5,6,7,8,9,10,11,12,13,14,15))
-
- #remove rows with missing values, filter data
- a = np.ma.masked_equal(a,-1)
- a = np.ma.mask_rows(a)
- a = np.ma.compress_rows(a)
- distance = a[:,1]
- variance = a[:,2]
- diff = a[:,3]
- a = a[(distance>2) & (distance<3.2) & (diff<0.5)]
-
- return a
-
-
-def normalize_filter(a,weights=None,nameset=None):
- a = np.ma.masked_array(a)
- #normalize data
- if weights==None:
- weights = dict(zip(range(4,13),[None for i in range(9)]))
- a,weights = normalize(a,weights)
-
- if nameset != None:
- for i in range(len(a)):
- if int(a[i][0]) not in nameset:
- a.mask[i][0] = True
-
- a = np.ma.mask_rows(a)
- a = np.ma.compress_rows(a)
- return list(a[:,0]),[dict(zip(range(1,11),r)) for r in a[:,4:]],weights
-
-train_filename = sys.argv[1]
-test_filename = sys.argv[2]
-log_filename = open(sys.argv[3],"w")
-a = read_filter(train_filename)
-b = read_filter(test_filename)
-main_set = Set(range(1,26)).difference(Set([13,19,3]))
-
-def accuracy_subsets(n):
- for s in itertools.combinations(main_set,n):
- y1,x1,weights = normalize_filter(a,nameset=s)
- model = svm_train(y1,x1)
- y2,x2,weights = normalize_filter(b,weights=weights,nameset=s)
- p_labels,p_acc,p_vals = svm_predict(y2,x2,model)
- log_filename.write(str(n)+"#"+str(s)+"#"+str(p_acc[0])+"\n")
- log_filename.flush()
-
-#y1,x1,weights = normalize_filter(a,nameset=main_set)
-#model = svm_train(y1,x1)
-#y2,x2,weights = normalize_filter(b,weights=weights,nameset=main_set)
-#p_labels,p_acc,p_vals = svm_predict(y2,x2,model)
-
-for i in range(2,6):
- accuracy_subsets(i)