summaryrefslogtreecommitdiffstats
path: root/data/pair-matching/roc.py
diff options
context:
space:
mode:
Diffstat (limited to 'data/pair-matching/roc.py')
-rwxr-xr-xdata/pair-matching/roc.py74
1 files changed, 74 insertions, 0 deletions
diff --git a/data/pair-matching/roc.py b/data/pair-matching/roc.py
new file mode 100755
index 0000000..ebf8f68
--- /dev/null
+++ b/data/pair-matching/roc.py
@@ -0,0 +1,74 @@
+#!/usr/bin/python
+import sys
+import numpy as np
+import matplotlib.pyplot as plt
+import math
+from sets import ImmutableSet
+
+def distance(a,b):
+ return math.sqrt(np.square(a-b).sum())
+
+def gen_pairs(var,sk_data):
+ np.random.shuffle(sk_data)
+ sk_data = sk_data[:,1:]
+ noise1 = np.random.normal(0,var,sk_data.shape)
+ noise2 = np.random.normal(0,var,sk_data.shape)
+ sk1 = sk_data+noise1
+ sk2 = sk_data+noise2
+ randoms = np.random.randint(0,sk_data.shape[0],(2000,2))
+ dict = {}
+ u_pairs = []
+ i = 0
+ while len(u_pairs) < sk_data.shape[0]:
+ pair = randoms[i]
+ key = ImmutableSet(pair)
+ i += 1
+ if pair[0] != pair[1] and key not in dict:
+ dict[key] = True
+ u_pairs += [(pair[0],pair[1])]
+
+ m_pairs = zip(range(sk_data.shape[0]),range(sk_data.shape[0]))
+ result = []
+ for j in range(sk_data.shape[0]):
+ result += [(distance(sk1[m_pairs[j][0]],sk2[m_pairs[j][1]]), distance(sk1[u_pairs[j][0]],sk2[u_pairs[j][1]]))]
+
+ return result
+
+if __name__ == "__main__":
+# eg = np.loadtxt("eigenfaces.txt",delimiter=" ")
+ ap = np.loadtxt("associatepredict.txt",delimiter=",")
+# plt.plot(eg[:,0],eg[:,1])
+ plt.plot(ap[:,1],ap[:,0])
+ plt.xlabel("False positive %")
+ plt.ylabel("True positive %")
+ np.random.seed()
+ var = map(float,sys.argv[2].split(","))
+ sk_data = np.loadtxt(sys.argv[1],comments="#",delimiter=",")
+ for v in var:
+ result = gen_pairs(v,sk_data)
+ thresholds = np.square(np.arange(0,10,0.01))
+ true_pos = []
+ false_pos = []
+ for threshold in thresholds:
+ true_values = []
+ false_values = []
+ for i in range(4):
+ true = 0
+ false = 0
+ min_j = i*300
+ max_j = min(min_j+300,sk_data.shape[0])
+ for j in range(min_j,max_j):
+ if result[j][0] < threshold:
+ true += 1
+ if result[j][1] < threshold:
+ false += 1
+ true_values += [float(true)/(max_j-min_j)]
+ false_values += [float(false)/(max_j-min_j)]
+ true_pos += [sum(true_values)/4]
+ false_pos += [sum(false_values)/4]
+ plt.plot(false_pos,true_pos)
+ plt.show()
+
+
+
+