1 files changed, 74 insertions, 0 deletions
diff --git a/data/pair-matching/roc.py b/data/pair-matching/roc.py
new file mode 100755
index 0000000..ebf8f68
--- /dev/null
+++ b/data/pair-matching/roc.py
@@ -0,0 +1,74 @@
+#!/usr/bin/python
+import sys
+import numpy as np
+import matplotlib.pyplot as plt
+import math
+from sets import ImmutableSet
+
+def distance(a,b):
+    return math.sqrt(np.square(a-b).sum())
+
+def gen_pairs(var,sk_data):
+    np.random.shuffle(sk_data)
+    sk_data = sk_data[:,1:]
+    noise1 = np.random.normal(0,var,sk_data.shape)
+    noise2 = np.random.normal(0,var,sk_data.shape)
+    sk1 = sk_data+noise1
+    sk2 = sk_data+noise2
+    randoms = np.random.randint(0,sk_data.shape[0],(2000,2))
+    dict = {}
+    u_pairs = []
+    i = 0
+    while len(u_pairs) < sk_data.shape[0]:
+        pair = randoms[i]
+        key = ImmutableSet(pair)
+        i += 1
+        if pair[0] != pair[1] and key not in dict:
+            dict[key] = True
+            u_pairs += [(pair[0],pair[1])]
+
+    m_pairs = zip(range(sk_data.shape[0]),range(sk_data.shape[0]))
+    result = []
+    for j in range(sk_data.shape[0]):
+            result += [(distance(sk1[m_pairs[j][0]],sk2[m_pairs[j][1]]), distance(sk1[u_pairs[j][0]],sk2[u_pairs[j][1]]))]
+
+    return result
+
+if __name__ == "__main__":
+#    eg = np.loadtxt("eigenfaces.txt",delimiter=" ")
+    ap = np.loadtxt("associatepredict.txt",delimiter=",")
+#    plt.plot(eg[:,0],eg[:,1])
+    plt.plot(ap[:,1],ap[:,0])
+    plt.xlabel("False positive %")
+    plt.ylabel("True positive %")
+    np.random.seed()
+    var = map(float,sys.argv[2].split(","))
+    sk_data  = np.loadtxt(sys.argv[1],comments="#",delimiter=",")
+    for v in var:
+        result = gen_pairs(v,sk_data)
+        thresholds = np.square(np.arange(0,10,0.01))
+        true_pos = []
+        false_pos = []
+        for threshold in thresholds:
+            true_values = []
+            false_values = []
+            for i in range(4):
+                true = 0
+                false = 0
+                min_j = i*300
+                max_j = min(min_j+300,sk_data.shape[0])
+                for j in range(min_j,max_j):
+                    if result[j][0] < threshold:
+                        true += 1
+                    if result[j][1] < threshold:
+                        false += 1
+                true_values += [float(true)/(max_j-min_j)]
+                false_values += [float(false)/(max_j-min_j)]
+            true_pos += [sum(true_values)/4]
+            false_pos += [sum(false_values)/4]
+        plt.plot(false_pos,true_pos)
+    plt.show()
+
+    
+    
+