summaryrefslogtreecommitdiffstats
path: root/data/class.py
blob: 2cdfa405023b2cb795ebb3a59c2d3b36004ac7eb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#! /usr/bin/python
import copy
import sys
from svmutil import *
import numpy as np
import matplotlib.pyplot as plt

def read_normalize(filename,means=None,std=None) :
    a = np.loadtxt(filename,comments="#",delimiter=",",usecols=(1,6,7,8,9,10,11,12,13,14))
    distance,variance = np.loadtxt(filename,comments="#",delimiter=",",usecols=((4,5)),unpack=True)
    a = np.ma.masked_equal(a,-1)
    a = np.ma.mask_rows(a)
    mask = a.mask[:,0]
    a = np.ma.compress_rows(a)
    distance = distance[mask]
    variance = variance[mask]
#    plt.plot(range(len(variance)),variance,range(len(distance)),distance)
#    plt.show()
    a = a[np.logical_and(variance>0.005,variance<0.05)]
    
    rows,cols = a.shape
    if means==None:
        means = {}
    if std==None:
        std = {}
    for col in xrange(1,cols):
        if col not in means:
            means[col] = np.mean(a[:,col])
        if col not in std:
            std[col] = np.std(a[:,col])
        a[:,col] = (a[:,col]-means[col])/(std[col])
    return list(a[:,0]),[dict(zip(range(1,cols+1),r)) for r in a[:,1:]],means,std

train_filename = sys.argv[1]
test_filename = sys.argv[2]
y1,x1,means,std = read_normalize(train_filename)
model = svm_train(y1,x1)
y2,x2,means,std = read_normalize(test_filename,means=means,std=std)
p_labels,p_acc,p_vals = svm_predict(y2,x2,model)