summaryrefslogtreecommitdiffstats
path: root/data/class.py
blob: 404f2e994f596c0015703208c398159fb558964f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#! /usr/bin/python
import copy
import sys
from svmutil import *
import numpy as np
import matplotlib.pyplot as plt

def normalize(a,weights=None):
    if weights == None:
        weights= {}
        cols = a.shape[1]
        for i in range(cols):
            weights[i] = None

    for i in weights.keys():
        column = a[:,i]
        if weights[i] == None:
            weights[i] = np.mean(column), np.std(column)
        a[:,i] = (column-weights[i][0])/weights[i][1]
    return a,weights

def read_normalize(filename,weights=None) :
    a = np.loadtxt(filename,comments="#",delimiter=",",usecols=(1,4,5,6,7,8,9,10,11,12,13,14,15))
    
    #remove rows with missing values, filter data
    a = np.ma.masked_equal(a,-1)
    a = np.ma.mask_rows(a)
    a = np.ma.compress_rows(a)
    rows,cols = a.shape
    distance = a[:,1]
    #variance = a[:,2]
    diff = a[:,3]
    a = a[np.logical_and(np.logical_and(distance>2,distance<3.2),diff<0.5)]

    #normalize data
    if weights==None:
        weights = dict(zip(range(4,13),[None for i in range(9)]))
    a,weights = normalize(a,weights)
    return list(a[:,0]),[dict(zip(range(1,11),r)) for r in a[:,4:]],weights

train_filename = sys.argv[1]
test_filename = sys.argv[2]

y1,x1,weights = read_normalize(train_filename)
model = svm_train(y1,x1)
y2,x2,weights = read_normalize(test_filename,weights=weights)
p_labels,p_acc,p_vals = svm_predict(y2,x2,model)