#! /usr/bin/python import copy import sys from svmutil import * import numpy as np import matplotlib.pyplot as plt def normalize(a,weights=None): if weights == None: weights= {} cols = a.shape[1] for i in range(cols): weights[i] = None for i in weights.keys(): column = a[:,i] if weights[i] == None: weights[i] = np.mean(column), np.std(column) a[:,i] = (column-weights[i][0])/weights[i][1] return a,weights def read_normalize(filename,weights=None) : a = np.loadtxt(filename,comments="#",delimiter=",",usecols=(1,4,5,6,7,8,9,10,11,12,13,14,15)) #remove rows with missing values, filter data a = np.ma.masked_equal(a,-1) a = np.ma.mask_rows(a) a = np.ma.compress_rows(a) rows,cols = a.shape distance = a[:,1] #variance = a[:,2] diff = a[:,3] a = a[np.logical_and(np.logical_and(distance>2,distance<3.2),diff<0.5)] #normalize data if weights==None: weights = dict(zip(range(4,13),[None for i in range(9)])) a,weights = normalize(a,weights) return list(a[:,0]),[dict(zip(range(1,11),r)) for r in a[:,4:]],weights train_filename = sys.argv[1] test_filename = sys.argv[2] y1,x1,weights = read_normalize(train_filename) model = svm_train(y1,x1) y2,x2,weights = read_normalize(test_filename,weights=weights) p_labels,p_acc,p_vals = svm_predict(y2,x2,model)