diff options
Diffstat (limited to 'data/pair-matching/roc.py')
| -rwxr-xr-x | data/pair-matching/roc.py | 74 |
1 files changed, 74 insertions, 0 deletions
diff --git a/data/pair-matching/roc.py b/data/pair-matching/roc.py new file mode 100755 index 0000000..ebf8f68 --- /dev/null +++ b/data/pair-matching/roc.py @@ -0,0 +1,74 @@ +#!/usr/bin/python +import sys +import numpy as np +import matplotlib.pyplot as plt +import math +from sets import ImmutableSet + +def distance(a,b): + return math.sqrt(np.square(a-b).sum()) + +def gen_pairs(var,sk_data): + np.random.shuffle(sk_data) + sk_data = sk_data[:,1:] + noise1 = np.random.normal(0,var,sk_data.shape) + noise2 = np.random.normal(0,var,sk_data.shape) + sk1 = sk_data+noise1 + sk2 = sk_data+noise2 + randoms = np.random.randint(0,sk_data.shape[0],(2000,2)) + dict = {} + u_pairs = [] + i = 0 + while len(u_pairs) < sk_data.shape[0]: + pair = randoms[i] + key = ImmutableSet(pair) + i += 1 + if pair[0] != pair[1] and key not in dict: + dict[key] = True + u_pairs += [(pair[0],pair[1])] + + m_pairs = zip(range(sk_data.shape[0]),range(sk_data.shape[0])) + result = [] + for j in range(sk_data.shape[0]): + result += [(distance(sk1[m_pairs[j][0]],sk2[m_pairs[j][1]]), distance(sk1[u_pairs[j][0]],sk2[u_pairs[j][1]]))] + + return result + +if __name__ == "__main__": +# eg = np.loadtxt("eigenfaces.txt",delimiter=" ") + ap = np.loadtxt("associatepredict.txt",delimiter=",") +# plt.plot(eg[:,0],eg[:,1]) + plt.plot(ap[:,1],ap[:,0]) + plt.xlabel("False positive %") + plt.ylabel("True positive %") + np.random.seed() + var = map(float,sys.argv[2].split(",")) + sk_data = np.loadtxt(sys.argv[1],comments="#",delimiter=",") + for v in var: + result = gen_pairs(v,sk_data) + thresholds = np.square(np.arange(0,10,0.01)) + true_pos = [] + false_pos = [] + for threshold in thresholds: + true_values = [] + false_values = [] + for i in range(4): + true = 0 + false = 0 + min_j = i*300 + max_j = min(min_j+300,sk_data.shape[0]) + for j in range(min_j,max_j): + if result[j][0] < threshold: + true += 1 + if result[j][1] < threshold: + false += 1 + true_values += [float(true)/(max_j-min_j)] + false_values += [float(false)/(max_j-min_j)] + true_pos += [sum(true_values)/4] + false_pos += [sum(false_values)/4] + plt.plot(false_pos,true_pos) + plt.show() + + + + |
