1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
|
#! /usr/bin/python
import copy
import sys
from svmutil import *
lower = 0.1
upper = 10
def normalize_instances(instances, ranges = None) :
normalized_instances = copy.deepcopy(instances)
if ranges == None :
ranges_dict = dict()
for attribute in normalized_instances[0].keys() : # we iterate on the attributes
column = [instance[attribute] for instance in normalized_instances]
if ranges != None :
minimum = ranges[attribute][0]
maximum = ranges[attribute][1]
else :
minimum = min(column)
maximum = max(column)
ranges_dict[attribute] = [minimum, maximum]
for i in range(len(column)) :
if column[i] == minimum :
column[i] = lower
elif column[i] == maximum :
column[i] = upper
else :
column[i] = lower + (upper-lower) * (column[i] - minimum) / (maximum - minimum)
# Copying normalized values in memory
for elem, instance in zip(column, normalized_instances):
instance[attribute] = elem
if ranges == None :
return normalized_instances, ranges_dict
else :
return normalized_instances
def read_file(filename) :
y = []
x = []
for line in filename:
values = line.rstrip().split(',')
if values[0] != "# dir":
dict = {}
for i in range(9):
if float(values[i+5])!=-1.:
dict[i+1] = float(values[i+5])
if len(dict)==9:
y += [int(values[1])]
x += [dict]
print line.rstrip()
#for a,b in zip(y,x):
# result = str(a)
# for i in range(9):
# result += " "+str(i+1)+":"+str(b[i+1])
# print result
#return (y,x)
train_filename = sys.argv[1]
#test_filename = sys.argv[2]
y1,x1 = read_file(open(train_filename))
#x1,ranges = normalize_instances(x1)
#print ranges
#exit(0)
#model = svm_train(y1,x1)
#y2,x2 = read_file(open(test_filename))
#x2 = normalize_instances(x2,ranges)
#p_labels,p_acc,p_vals = svm_predict(y2,x2,model)
|