From ad67d73a9c716446ecb6ed4ab27284f8b5fcc4d4 Mon Sep 17 00:00:00 2001 From: Thibaut Horel Date: Fri, 24 Feb 2012 15:30:26 -0800 Subject: Reduce by two the loading/filtering time of data files (numpy masked arrays are really slow) more data for accuracy=f(groupsize) --- data/svm/classification.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'data/svm/classification.py') diff --git a/data/svm/classification.py b/data/svm/classification.py index 5515364..7f4532c 100755 --- a/data/svm/classification.py +++ b/data/svm/classification.py @@ -27,9 +27,8 @@ def read_filter(filename) : usecols=(1,4,5,6,7,8,9,10,11,12,13,14,15)) #remove rows with missing values, filter data - a = np.ma.masked_equal(a,-1) - a = np.ma.mask_rows(a) - a = np.ma.compress_rows(a) + indexes = [i for i in range(a.shape[0]) if -1 not in a[i]] + a = a[indexes] distance = a[:,1] variance = a[:,2] diff = a[:,3] @@ -44,7 +43,7 @@ def normalize_filter(a,weights=None,nameset=None): a,weights = normalize(a,weights) if nameset != None: - indexes = [i for i,v in enumerate(a[:,0]) if v in nameset] + indexes = [i for i in range(a.shape[0]) if a[i][0] in nameset] a = a[indexes] return list(a[:,0]),[dict(zip(range(1,11),r)) for r in a[:,4:]],weights -- cgit v1.2.3-70-g09d2