diff options
| -rwxr-xr-x | data/nn-goldman.py | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/data/nn-goldman.py b/data/nn-goldman.py new file mode 100755 index 0000000..0a04e7a --- /dev/null +++ b/data/nn-goldman.py @@ -0,0 +1,52 @@ +#!/usr/bin/python +import sys +import random +import numpy as np + + +def quadratic_knn_search(data, lidx, ldata, K): + """ find K nearest neighbours of data among ldata """ + ndata = ldata.shape[1] + param = ldata.shape[0] + K = K if K < ndata else ndata + retval = [] + sqd = ((ldata - data[:,:ndata])**2).sum(axis=0) # data.reshape((param,1)).repeat(ndata, axis=1); + idx = np.argsort(sqd, kind='mergesort') + idx = idx[:K] + return zip(sqd[idx], lidx[idx]) + +def normalize(a,weights=None): + if weights == None: + weights= {} + cols = a.shape[1] + for i in range(cols): + weights[i] = None + + for i in weights.keys(): + column = a[:,i] + if weights[i] == None: + weights[i] = np.mean(column), np.std(column) + a[:,i] = (column-weights[i][0])/weights[i][1] + return a + +def knn_search( data1, data2, K ): + """ find the K nearest neighbours for data points in data, + using O(n**2) search """ + ndata = data1.shape[1] + knn = [] + idx = np.arange(ndata) + for i in np.arange(ndata): + _knn = quadratic_knn_search(data1[:,i], idx, data2, K+1) # see above + knn.append( _knn[1:] ) + return knn + +if __name__ == "__main__": + random.seed() + sk_data = normalize(np.loadtxt(sys.argv[1],comments="#",delimiter=",",usecols=range(1,7,1)).T) + gaussify = np.vectorize(lambda x: x+random.gauss(0,float(sys.argv[2]))) + sk1 = gaussify(sk_data) + sk2 = gaussify(sk_data) + print sk1 + print sk2 + print knn_search(sk1,sk2,1) + |
