#!/usr/bin/python import sys import numpy as np #in place modification ! def normalize(a): print a for i in range(a.shape[1]): column = a[:,i] weights = np.mean(column), np.std(column) a[:,i] = (column-weights[0])/weights[1] return a def knn_search(names,d1,d2,k): for i,row2 in enumerate(d2): distance = [] for row1 in d1: distance += [((row2-row1)**2).sum()] indexes = np.argsort(np.array(distance))[:k] nn = map(int,names[indexes]) name = int(names[i]) print str(name)+"|"+ ",".join(map(str,nn))+"|"+str(name in nn) if __name__ == "__main__": np.random.seed() var = float(sys.argv[2]) sk_data = np.loadtxt(sys.argv[1],comments="#",delimiter=",") names = sk_data[:,0] sk_data = sk_data[:,1:] noise1 = np.random.normal(0,var,sk_data.shape) noise2 = np.random.normal(0,var,sk_data.shape) sk1 = normalize(sk_data+noise1) sk2 = normalize(sk_data+noise2) knn_search(names,sk1,sk2,1)