#! /usr/bin/python import copy import sys from svmutil import * import numpy as np import matplotlib.pyplot as plt def read_normalize(filename,means=None,std=None) : a = np.loadtxt(filename,comments="#",delimiter=",",usecols=(1,6,7,8,9,10,11,12,13,14)) distance,variance = np.loadtxt(filename,comments="#",delimiter=",",usecols=((4,5)),unpack=True) a = np.ma.masked_equal(a,-1) a = np.ma.mask_rows(a) mask = a.mask[:,0] a = np.ma.compress_rows(a) distance = distance[mask] variance = variance[mask] # plt.plot(range(len(variance)),variance,range(len(distance)),distance) # plt.show() a = a[np.logical_and(variance>0.005,variance<0.05)] rows,cols = a.shape if means==None: means = {} if std==None: std = {} for col in xrange(1,cols): if col not in means: means[col] = np.mean(a[:,col]) if col not in std: std[col] = np.std(a[:,col]) a[:,col] = (a[:,col]-means[col])/(std[col]) return list(a[:,0]),[dict(zip(range(1,cols+1),r)) for r in a[:,1:]],means,std train_filename = sys.argv[1] test_filename = sys.argv[2] y1,x1,means,std = read_normalize(train_filename) model = svm_train(y1,x1) y2,x2,means,std = read_normalize(test_filename,means=means,std=std) p_labels,p_acc,p_vals = svm_predict(y2,x2,model)