2 files changed, 19 insertions, 16 deletions
diff --git a/data/normality.py b/data/normality.py
index eb22553..3fb0fe7 100755
--- a/data/normality.py
+++ b/data/normality.py
@@ -1,3 +1,4 @@
+#! /usr/bin/python
 import sys
 import numpy as np
 from scipy.stats import lognorm
@@ -8,7 +9,7 @@ def normtest(x):
     s = np.matrix(np.cov(x,bias=1,rowvar=0))
     n,p = x.shape
     dift = x - np.mean(x,axis=0)
-    dj = np.diag(dift*s.I*dift.T)
+    dj = np.matrix(np.diag(dift*s.I*dift.T))
     y = x*s.I*x.T
     djk = - 2*y.T + np.diag(y.T).T*np.ones((1,n)) + np.ones((n,1))*np.diag(y.T)
     b = 1/(math.sqrt(2))*((2*p + 1)/4)**(1/(p + 4))*(n**(1/(p + 4)))
@@ -35,6 +36,6 @@ def normtest(x):
 if __name__ == "__main__":
     filename = sys.argv[1]
     x = np.loadtxt(filename,delimiter=",")
-    x = x[:10,7:]
+    x = x[:6000,7:]
     p = normtest(x)
     print p
diff --git a/data/svm/classification.py b/data/svm/classification.py
index b8c0ae6..c1cec68 100755
--- a/data/svm/classification.py
+++ b/data/svm/classification.py
@@ -19,35 +19,37 @@ def normalize(a,weights=None):
         a[:,i] = (column-weights[i][0])/weights[i][1]
     return a,weights
 
-def read_filter(filename) :
+def read_filter(filename,nameset=None) :
     a = np.loadtxt(filename,comments="#",delimiter=",",
                    usecols=(1,4,5,6,7,8,9,10,11,12,13,14,15))
     
     #remove rows with missing values, filter data
     indexes = [i for i in range(a.shape[0]) if -1 not in a[i]]
     a = a[indexes]
+    #a = a[(a!=-1).all(1)]
+    
+    if nameset != None:
+        indexes = [i for i in range(a.shape[0]) if a[i,0] in nameset]
+        a = a[indexes]
+
     distance = a[:,1]
     variance = a[:,2]
     diff = a[:,3]
-    a = a[(distance>2) & (distance<3.2) & (diff<0.5)]
+    a = a[(distance>2) & (distance<3.2)]# & (diff<0.5)]
     return a
 
-def normalize_filter(a,weights=None,nameset=None):
+def normalize_filter(a,weights=None):
     #normalize data
     if weights==None:
         weights = {i:None for i in range(4,13)}
     a,weights = normalize(a,weights)
 
-    if nameset != None:
-        indexes = [i for i in range(a.shape[0]) if a[i,0] in nameset]
-        a = a[indexes]
-
     return list(a[:,0]),[{i+1:v for i,v in enumerate(row[4:])} for row in a],weights    
 
-def perform_svm(a,b,nameset=None):
-    y1,x1,weights = normalize_filter(a,nameset=nameset)
+def perform_svm(a,b):
+    y1,x1,weights = normalize_filter(a)
     model = svm_train(y1,x1)
-    y2,x2,weights = normalize_filter(b,weights=weights,nameset=nameset)
+    y2,x2,weights = normalize_filter(b,weights=weights)
     p_labels,p_acc,p_vals = svm_predict(y2,x2,model)
     return p_labels,p_acc,p_vals
 
@@ -68,11 +70,11 @@ if __name__ == "__main__":
     random.seed()
     train_filename = sys.argv[1]
     test_filename = sys.argv[2]
-    log_filename = open(sys.argv[3],"w")
-    a = read_filter(train_filename)
-    b = read_filter(test_filename)
     main_set = Set(range(1,26)).difference(Set([13,19,3]))
-    perform_svm(a,b,nameset=main_set)
+    log_filename = open(sys.argv[3],"w")
+    a = read_filter(train_filename,nameset=main_set)
+    b = read_filter(test_filename,nameset=main_set)
+    perform_svm(a,b)
 
 #for i in [6]:
 #    accuracy_subsets(i)