1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
|
#!/usr/bin/python
import sys
import numpy as np
#in place modification !
def normalize(a):
print a
for i in range(a.shape[1]):
column = a[:,i]
weights = np.mean(column), np.std(column)
a[:,i] = (column-weights[0])/weights[1]
return a
def knn_search(names,d1,d2,k):
for i,row2 in enumerate(d2):
distance = []
for row1 in d1:
distance += [((row2-row1)**2).sum()]
indexes = np.argsort(np.array(distance))[:k]
nn = map(int,names[indexes])
name = int(names[i])
print str(name)+"|"+ ",".join(map(str,nn))+"|"+str(name in nn)
if __name__ == "__main__":
np.random.seed()
var = float(sys.argv[2])
sk_data = np.loadtxt(sys.argv[1],comments="#",delimiter=",")
names = sk_data[:,0]
sk_data = sk_data[:,1:]
noise1 = np.random.normal(0,var,sk_data.shape)
noise2 = np.random.normal(0,var,sk_data.shape)
sk1 = normalize(sk_data+noise1)
sk2 = normalize(sk_data+noise2)
knn_search(names,sk1,sk2,1)
|