1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
#! /usr/bin/python
import copy
import sys
from svmutil import *
import numpy as np
import matplotlib.pyplot as plt
def read_normalize(filename,means=None,std=None) :
a = np.loadtxt(filename,comments="#",delimiter=",",usecols=(1,6,7,8,9,10,11,12,13,14))
distance,variance = np.loadtxt(filename,comments="#",delimiter=",",usecols=((4,5)),unpack=True)
a = np.ma.masked_equal(a,-1)
a = np.ma.mask_rows(a)
mask = a.mask[:,0]
a = np.ma.compress_rows(a)
distance = distance[mask]
variance = variance[mask]
# plt.plot(range(len(variance)),variance,range(len(distance)),distance)
# plt.show()
a = a[np.logical_and(variance>0.005,variance<0.05)]
rows,cols = a.shape
if means==None:
means = {}
if std==None:
std = {}
for col in xrange(1,cols):
if col not in means:
means[col] = np.mean(a[:,col])
if col not in std:
std[col] = np.std(a[:,col])
a[:,col] = (a[:,col]-means[col])/(std[col])
return list(a[:,0]),[dict(zip(range(1,cols+1),r)) for r in a[:,1:]],means,std
train_filename = sys.argv[1]
test_filename = sys.argv[2]
y1,x1,means,std = read_normalize(train_filename)
model = svm_train(y1,x1)
y2,x2,means,std = read_normalize(test_filename,means=means,std=std)
p_labels,p_acc,p_vals = svm_predict(y2,x2,model)
|