jpa_test/algorithms.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85

import numpy as np
import networkx as nx
import cascade_creation
from collections import Counter
from itertools import izip
import convex_optimization


def greedy_prediction(G, cascades):
    """
    Returns estimated graph from Greedy algorithm in "Learning Epidemic ..."
    TODO: write cleaner code?
    """
    G_hat = cascade_creation.InfluenceGraph(max_proba=None)
    G_hat.add_nodes_from(G.nodes())
    for node in G_hat.nodes():
        unaccounted = np.ones(len(cascades), dtype=bool)
        for t, cascade in izip(xrange(len(cascades)), cascades):
            if not cascade.infection_time(node) or \
                    cascade.infection_time(node)[0] == 0:
                unaccounted[t] = False
        while unaccounted.any():
            tmp = [cascade for boolean, cascade in izip(unaccounted,
                    cascades) if boolean]
            parents = Counter()
            for cascade in tmp:
                    parents += cascade.candidate_infectors(node)
            parent = parents.most_common(1)[0][0]
            G_hat.add_edge(parent, node)
            for t, cascade in izip(xrange(len(cascades)), cascades):
                if (cascade.infection_time(parent) == \
                        [item - 1 for item in cascade.infection_time(node)]):
                    unaccounted[t] = False
    return G_hat


def recovery_l1obj_l2constraint(G, cascades):
    """
    Returns estimated graph from following convex program:
    min |theta_1| + lbda | exp(M theta) -(1- w)|
    where theta = log (1 - p); w = 1_{infected}; lbda = lagrange cstt
    """
    G_hat = cascade_creation.InfluenceGraph(max_proba=None)
    G_hat.add_nodes_from(G.nodes())
    for node in G_hat.nodes():
        M, w = cascade_creation.icc_matrixvector_for_node(cascades, node)
        M = M.astype("float64")
        edges_node = convex_optimization.l1obj_l2constraint(M,w)
        print edges_node


def correctness_measure(G, G_hat):
    """
    Measures correctness of estimated graph G_hat to ground truth G
    """
    edges = set(G.edges())
    edges_hat = set(G_hat.edges())
    fp = edges_hat - edges
    fn = edges - edges_hat
    gp = edges | edges_hat
    return fp, fn, gp


def test():
    """
    unit test
    """
    G = cascade_creation.InfluenceGraph(max_proba = .3)
    G.erdos_init(n = 100, p = .5)
    import time
    t0 = time.time()
    A = cascade_creation.generate_cascades(G, .2, 2)

    recovery_l1obj_l2constraint(G, A)

    G_hat = greedy_prediction(G, A)
    fp, fn, gp = correctness_measure(G, G_hat)
    print "False Positive: {}".format(len(fp))
    print "False Negative: {}".format(len(fn))
    print "Good Positives: {}".format(len(gp))
    t1 = time.time()
    print t1 - t0

if __name__=="__main__":
    test()