summaryrefslogtreecommitdiffstats
path: root/data/class.py
blob: e7975f99da84b0aced2b1de7a3fb38dedf0f007b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#! /usr/bin/python
import copy
import sys
from svmutil import *

lower = 0.1
upper = 10

def normalize_instances(instances, ranges = None) :
    normalized_instances = copy.deepcopy(instances)
    if ranges == None :
        ranges_dict = dict()
    for attribute in normalized_instances[0].keys() :  # we iterate on the attributes
        column = [instance[attribute] for instance in normalized_instances]
        if ranges != None :
            minimum = ranges[attribute][0]
            maximum = ranges[attribute][1]
        else :
            minimum = min(column)
            maximum = max(column)
            ranges_dict[attribute] = [minimum, maximum]
        for i in range(len(column)) :
            if column[i] == minimum :
                column[i] = lower
            elif column[i] == maximum :
                column[i] = upper
            else :
                column[i] = lower + (upper-lower) * (column[i] - minimum) / (maximum - minimum)
        # Copying normalized values in memory
      
        for elem, instance in zip(column, normalized_instances):
            instance[attribute] = elem
  
    if ranges == None :
        return normalized_instances, ranges_dict
    else :
        return normalized_instances


def read_file(filename) :
    y = []
    x = []
    for line in filename:
        values = line.rstrip().split(',')
        if values[0] != "# dir":
            dict = {}
            for i in range(9):
                if float(values[i+5])!=-1.:
                    dict[i+1] = float(values[i+5])
            if len(dict)==9:
                y += [int(values[1])]
                x += [dict]
                print line.rstrip()
    #for a,b in zip(y,x):
    #    result = str(a)
    #    for i in range(9):
    #        result += " "+str(i+1)+":"+str(b[i+1])
    #    print result
    #return (y,x)

train_filename = sys.argv[1]
#test_filename = sys.argv[2]
y1,x1 = read_file(open(train_filename))
#x1,ranges = normalize_instances(x1)
#print ranges
#exit(0)
#model = svm_train(y1,x1)
#y2,x2 = read_file(open(test_filename))
#x2 = normalize_instances(x2,ranges)
#p_labels,p_acc,p_vals = svm_predict(y2,x2,model)