diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2012-02-24 15:30:26 -0800 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2012-02-24 15:32:43 -0800 |
| commit | ad67d73a9c716446ecb6ed4ab27284f8b5fcc4d4 (patch) | |
| tree | 2d0506e382c71a5e187760cab2fa6e19d5d63981 /data/svm/classification.py | |
| parent | 79583868fb94bbd187cff06bbcc92fbebe70b110 (diff) | |
| download | kinect-ad67d73a9c716446ecb6ed4ab27284f8b5fcc4d4.tar.gz | |
Reduce by two the loading/filtering time of data files (numpy masked arrays are really slow)
more data for accuracy=f(groupsize)
Diffstat (limited to 'data/svm/classification.py')
| -rwxr-xr-x | data/svm/classification.py | 7 |
1 files changed, 3 insertions, 4 deletions
diff --git a/data/svm/classification.py b/data/svm/classification.py index 5515364..7f4532c 100755 --- a/data/svm/classification.py +++ b/data/svm/classification.py @@ -27,9 +27,8 @@ def read_filter(filename) : usecols=(1,4,5,6,7,8,9,10,11,12,13,14,15)) #remove rows with missing values, filter data - a = np.ma.masked_equal(a,-1) - a = np.ma.mask_rows(a) - a = np.ma.compress_rows(a) + indexes = [i for i in range(a.shape[0]) if -1 not in a[i]] + a = a[indexes] distance = a[:,1] variance = a[:,2] diff = a[:,3] @@ -44,7 +43,7 @@ def normalize_filter(a,weights=None,nameset=None): a,weights = normalize(a,weights) if nameset != None: - indexes = [i for i,v in enumerate(a[:,0]) if v in nameset] + indexes = [i for i in range(a.shape[0]) if a[i][0] in nameset] a = a[indexes] return list(a[:,0]),[dict(zip(range(1,11),r)) for r in a[:,4:]],weights |
