data/pair-matching/roc.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74

#!/usr/bin/python
import sys
import numpy as np
import matplotlib.pyplot as plt
import math
from sets import ImmutableSet

def distance(a,b):
    return math.sqrt(np.square(a-b).sum())

def gen_pairs(var,sk_data):
    np.random.shuffle(sk_data)
    sk_data = sk_data[:,1:]
    noise1 = np.random.normal(0,var,sk_data.shape)
    noise2 = np.random.normal(0,var,sk_data.shape)
    sk1 = sk_data+noise1
    sk2 = sk_data+noise2
    randoms = np.random.randint(0,sk_data.shape[0],(2000,2))
    dict = {}
    u_pairs = []
    i = 0
    while len(u_pairs) < sk_data.shape[0]:
        pair = randoms[i]
        key = ImmutableSet(pair)
        i += 1
        if pair[0] != pair[1] and key not in dict:
            dict[key] = True
            u_pairs += [(pair[0],pair[1])]

    m_pairs = zip(range(sk_data.shape[0]),range(sk_data.shape[0]))
    result = []
    for j in range(sk_data.shape[0]):
            result += [(distance(sk1[m_pairs[j][0]],sk2[m_pairs[j][1]]), distance(sk1[u_pairs[j][0]],sk2[u_pairs[j][1]]))]

    return result

if __name__ == "__main__":
#    eg = np.loadtxt("eigenfaces.txt",delimiter=" ")
    ap = np.loadtxt("associatepredict.txt",delimiter=",")
#    plt.plot(eg[:,0],eg[:,1])
    plt.plot(ap[:,1],ap[:,0])
    plt.xlabel("False positive %")
    plt.ylabel("True positive %")
    np.random.seed()
    var = map(float,sys.argv[2].split(","))
    sk_data  = np.loadtxt(sys.argv[1],comments="#",delimiter=",")
    for v in var:
        result = gen_pairs(v,sk_data)
        thresholds = np.square(np.arange(0,10,0.01))
        true_pos = []
        false_pos = []
        for threshold in thresholds:
            true_values = []
            false_values = []
            for i in range(4):
                true = 0
                false = 0
                min_j = i*300
                max_j = min(min_j+300,sk_data.shape[0])
                for j in range(min_j,max_j):
                    if result[j][0] < threshold:
                        true += 1
                    if result[j][1] < threshold:
                        false += 1
                true_values += [float(true)/(max_j-min_j)]
                false_values += [float(false)/(max_j-min_j)]
            true_pos += [sum(true_values)/4]
            false_pos += [sum(false_values)/4]
        plt.plot(false_pos,true_pos)
    plt.show()