aboutsummaryrefslogtreecommitdiffstats
path: root/jpa_test/algorithms.py
blob: 5d28300ef46669f31595357215c2c270577787ab (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import numpy as np
import networkx as nx
import cascade_creation
from collections import Counter
from itertools import izip
import convex_optimization
import timeout


def greedy_prediction(G, cascades):
    """
    Returns estimated graph from Greedy algorithm in "Learning Epidemic ..."
    """
    #TODO: This function is deprecated!
    G_hat = cascade_creation.InfluenceGraph(max_proba=None)
    G_hat.add_nodes_from(G.nodes())
    for node in G_hat.nodes():
        unaccounted = np.ones(len(cascades), dtype=bool)
        for t, cascade in izip(xrange(len(cascades)), cascades):
            if not cascade.infection_time(node) or \
                    cascade.infection_time(node)[0] == 0:
                unaccounted[t] = False
        while unaccounted.any():
            tmp = [cascade for boolean, cascade in izip(unaccounted,
                    cascades) if boolean]
            parents = Counter()
            for cascade in tmp:
                    parents += cascade.candidate_infectors(node)
            parent = parents.most_common(1)[0][0]
            G_hat.add_edge(parent, node)
            for t, cascade in izip(xrange(len(cascades)), cascades):
                if (cascade.infection_time(parent) == \
                        [item - 1 for item in cascade.infection_time(node)]):
                    unaccounted[t] = False
    return G_hat


def recovery_l1obj_l2constraint(G, cascades, floor_cstt, passed_function,
                                *args, **kwargs):
    """
    Returns estimated graph from convex program specified by passed_function
    passed_function should have similar structure to ones in convex_optimation
    """
    G_hat = cascade_creation.InfluenceGraph(max_proba=None)
    G_hat.add_nodes_from(G.nodes())
    for node in G_hat.nodes():
        print node
        try:
            M, w = cascade_creation.icc_matrixvector_for_node(cascades, node)
            p_node, __ = passed_function(M,w, *args, **kwargs)
            G_hat = cascade_creation.add_edges_from_proba_vector(G=G_hat,
                        p_node=p_node, node=node, floor_cstt=floor_cstt)
        except timeout.TimeoutError:
            print "TimeoutError, skipping to next node"
    return G_hat


def correctness_measure(G, G_hat, print_values=False):
    """
    Measures correctness of estimated graph G_hat to ground truth G
    """
    edges = set(G.edges())
    edges_hat = set(G_hat.edges())
    fp = edges_hat - edges
    fn = edges - edges_hat
    gp = edges | edges_hat
    if print_values:
        print "False Positives: {}".format(len(fp))
        print "False Negatives: {}".format(len(fn))
        print "Good Positives: {}".format(len(gp))
    return fp, fn, gp


def test():
    """
    unit test
    """
    G = cascade_creation.InfluenceGraph(max_proba = .8)
    G.erdos_init(n = 100, p = .3)
    import time
    t0 = time.time()
    A = cascade_creation.generate_cascades(G, .2, 10000)
    G_hat = recovery_l1obj_l2constraint(G, A,
            passed_function=convex_optimization.l1obj_l2penalization,
            floor_cstt=.1, lbda=100)
    correctness_measure(G, G_hat, print_values=True)


if __name__=="__main__":
    test()