#!/usr/bin/python import sys import random import numpy as np def quadratic_knn_search(data, lidx, ldata, K): """ find K nearest neighbours of data among ldata """ ndata = ldata.shape[1] param = ldata.shape[0] K = K if K < ndata else ndata retval = [] sqd = ((ldata - data[:,:ndata])**2).sum(axis=0) # data.reshape((param,1)).repeat(ndata, axis=1); idx = np.argsort(sqd, kind='mergesort') idx = idx[:K] return zip(sqd[idx], lidx[idx]) def normalize(a,weights=None): if weights == None: weights= {} cols = a.shape[1] for i in range(cols): weights[i] = None for i in weights.keys(): column = a[:,i] if weights[i] == None: weights[i] = np.mean(column), np.std(column) a[:,i] = (column-weights[i][0])/weights[i][1] return a def knn_search( data1, data2, K ): """ find the K nearest neighbours for data points in data, using O(n**2) search """ ndata = data1.shape[1] knn = [] idx = np.arange(ndata) for i in np.arange(ndata): _knn = quadratic_knn_search(data1[:,i], idx, data2, K+1) # see above knn.append( _knn[1:] ) return knn if __name__ == "__main__": random.seed() sk_data = normalize(np.loadtxt(sys.argv[1],comments="#",delimiter=",",usecols=range(1,7,1)).T) gaussify = np.vectorize(lambda x: x+random.gauss(0,float(sys.argv[2]))) sk1 = gaussify(sk_data) sk2 = gaussify(sk_data) print sk1 print sk2 print knn_search(sk1,sk2,1)