aboutsummaryrefslogtreecommitdiffstats
path: root/jpa_test/algorithms.py
blob: 2a32f57b21c0a5650715ef10ba8fa8df1bc2a9ce (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import numpy as np
import networkx as nx
import cascade_creation
from collections import Counter

from itertools import izip


def greedy_prediction(G, cascades):
    """
    Returns estimated graph from Greedy algorithm in "Learning Epidemic ..."
    """
    G_hat = cascade_creation.InfluenceGraph(max_proba=None)
    G_hat.add_nodes_from(G.nodes())
    for node in G_hat.nodes():
        unaccounted = np.ones(len(cascades), dtype=bool)
        for t, cascade in izip(xrange(len(cascades)), cascades):
            if not cascade.infection_time(node) or \
                    cascade.infection_time(node)[0] == 0:
                unaccounted[t] = False
        while unaccounted.any():
            tmp = [cascade for boolean, cascade in izip(unaccounted,
                    cascades) if boolean]
            parents = Counter()
            for cascade in tmp:
                    parents += cascade.candidate_infectors(node)
            parent = parents.most_common(1)[0][0]
            G_hat.add_edge(parent, node)
            for t, cascade in izip(xrange(len(cascades)), cascades):
                if (cascade.infection_time(parent) == \
                        [item - 1 for item in cascade.infection_time(node)]):
                    unaccounted[t] = False
    return G_hat


def correctness_measure(G, G_hat):
    """
    Measures correctness of estimated graph G_hat to ground truth G
    """
    edges = set(G.edges())
    edges_hat = set(G_hat.edges())
    fp = edges_hat - edges
    fn = edges - edges_hat
    gp = edges | edges_hat
    return fp, fn, gp


def test():
    """
    unit test
    """
    G = cascade_creation.InfluenceGraph(max_proba = .3)
    G.erdos_init(n = 100, p = .1)
    import time
    t0 = time.time()
    A = cascade_creation.generate_cascades(G, .1, 100)
    G_hat = greedy_prediction(G, A)
    fp, fn, gp = correctness_measure(G, G_hat)
    print "False Positive: {}".format(len(fp))
    print "False Negative: {}".format(len(fn))
    print "Good Positives: {}".format(len(gp))
    t1 = time.time()
    print t1 - t0

if __name__=="__main__":
    test()