summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xdata/normality.py5
-rwxr-xr-xdata/svm/classification.py30
2 files changed, 19 insertions, 16 deletions
diff --git a/data/normality.py b/data/normality.py
index eb22553..3fb0fe7 100755
--- a/data/normality.py
+++ b/data/normality.py
@@ -1,3 +1,4 @@
+#! /usr/bin/python
import sys
import numpy as np
from scipy.stats import lognorm
@@ -8,7 +9,7 @@ def normtest(x):
s = np.matrix(np.cov(x,bias=1,rowvar=0))
n,p = x.shape
dift = x - np.mean(x,axis=0)
- dj = np.diag(dift*s.I*dift.T)
+ dj = np.matrix(np.diag(dift*s.I*dift.T))
y = x*s.I*x.T
djk = - 2*y.T + np.diag(y.T).T*np.ones((1,n)) + np.ones((n,1))*np.diag(y.T)
b = 1/(math.sqrt(2))*((2*p + 1)/4)**(1/(p + 4))*(n**(1/(p + 4)))
@@ -35,6 +36,6 @@ def normtest(x):
if __name__ == "__main__":
filename = sys.argv[1]
x = np.loadtxt(filename,delimiter=",")
- x = x[:10,7:]
+ x = x[:6000,7:]
p = normtest(x)
print p
diff --git a/data/svm/classification.py b/data/svm/classification.py
index b8c0ae6..c1cec68 100755
--- a/data/svm/classification.py
+++ b/data/svm/classification.py
@@ -19,35 +19,37 @@ def normalize(a,weights=None):
a[:,i] = (column-weights[i][0])/weights[i][1]
return a,weights
-def read_filter(filename) :
+def read_filter(filename,nameset=None) :
a = np.loadtxt(filename,comments="#",delimiter=",",
usecols=(1,4,5,6,7,8,9,10,11,12,13,14,15))
#remove rows with missing values, filter data
indexes = [i for i in range(a.shape[0]) if -1 not in a[i]]
a = a[indexes]
+ #a = a[(a!=-1).all(1)]
+
+ if nameset != None:
+ indexes = [i for i in range(a.shape[0]) if a[i,0] in nameset]
+ a = a[indexes]
+
distance = a[:,1]
variance = a[:,2]
diff = a[:,3]
- a = a[(distance>2) & (distance<3.2) & (diff<0.5)]
+ a = a[(distance>2) & (distance<3.2)]# & (diff<0.5)]
return a
-def normalize_filter(a,weights=None,nameset=None):
+def normalize_filter(a,weights=None):
#normalize data
if weights==None:
weights = {i:None for i in range(4,13)}
a,weights = normalize(a,weights)
- if nameset != None:
- indexes = [i for i in range(a.shape[0]) if a[i,0] in nameset]
- a = a[indexes]
-
return list(a[:,0]),[{i+1:v for i,v in enumerate(row[4:])} for row in a],weights
-def perform_svm(a,b,nameset=None):
- y1,x1,weights = normalize_filter(a,nameset=nameset)
+def perform_svm(a,b):
+ y1,x1,weights = normalize_filter(a)
model = svm_train(y1,x1)
- y2,x2,weights = normalize_filter(b,weights=weights,nameset=nameset)
+ y2,x2,weights = normalize_filter(b,weights=weights)
p_labels,p_acc,p_vals = svm_predict(y2,x2,model)
return p_labels,p_acc,p_vals
@@ -68,11 +70,11 @@ if __name__ == "__main__":
random.seed()
train_filename = sys.argv[1]
test_filename = sys.argv[2]
- log_filename = open(sys.argv[3],"w")
- a = read_filter(train_filename)
- b = read_filter(test_filename)
main_set = Set(range(1,26)).difference(Set([13,19,3]))
- perform_svm(a,b,nameset=main_set)
+ log_filename = open(sys.argv[3],"w")
+ a = read_filter(train_filename,nameset=main_set)
+ b = read_filter(test_filename,nameset=main_set)
+ perform_svm(a,b)
#for i in [6]:
# accuracy_subsets(i)