From ad67d73a9c716446ecb6ed4ab27284f8b5fcc4d4 Mon Sep 17 00:00:00 2001
From: Thibaut Horel <thibaut.horel@gmail.com>
Date: Fri, 24 Feb 2012 15:30:26 -0800
Subject: Reduce by two the loading/filtering time of data files (numpy masked
 arrays are really slow)

more data for accuracy=f(groupsize)
---
 data/svm/classification.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'data/svm/classification.py')

diff --git a/data/svm/classification.py b/data/svm/classification.py
index 5515364..7f4532c 100755
--- a/data/svm/classification.py
+++ b/data/svm/classification.py
@@ -27,9 +27,8 @@ def read_filter(filename) :
                    usecols=(1,4,5,6,7,8,9,10,11,12,13,14,15))
     
     #remove rows with missing values, filter data
-    a = np.ma.masked_equal(a,-1)
-    a = np.ma.mask_rows(a)
-    a = np.ma.compress_rows(a)
+    indexes = [i for i in range(a.shape[0]) if -1 not in a[i]]
+    a = a[indexes]
     distance = a[:,1]
     variance = a[:,2]
     diff = a[:,3]
@@ -44,7 +43,7 @@ def normalize_filter(a,weights=None,nameset=None):
     a,weights = normalize(a,weights)
     
     if nameset != None:
-        indexes = [i for i,v in enumerate(a[:,0]) if v in nameset]
+        indexes = [i for i in range(a.shape[0]) if a[i][0] in nameset]
         a = a[indexes]
 
     return list(a[:,0]),[dict(zip(range(1,11),r)) for r in a[:,4:]],weights    
-- 
cgit v1.2.3-70-g09d2