1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
|
import numpy as np
import networkx as nx
import cascade_creation
from collections import Counter
import convex_optimization
import timeout
def greedy_prediction(G, cascades):
"""
Returns estimated graph from Greedy algorithm in "Learning Epidemic ..."
Only works for independent cascade model!
"""
G_hat = cascade_creation.InfluenceGraph(max_proba=None, min_proba=None)
G_hat.add_nodes_from(G.nodes())
for node in G_hat.nodes():
print(node)
# Avoid cases where infection time is None or 0
tmp = [cascade for cascade in cascades if cascade.infection_time(node)
[0]]
while tmp:
parents = Counter()
for cascade in tmp:
parents += cascade.candidate_infectors(node)
parent = parents.most_common(1)[0][0]
G_hat.add_edge(parent, node)
tmp = [cascade for cascade in tmp if (
cascade.infection_time(parent)[0] is not None and
cascade.infection_time(parent)[0]+1 not in
cascade.infection_time(node))]
return G_hat
def recovery_passed_function(G, cascades, floor_cstt, passed_function,
*args, **kwargs):
"""
Returns estimated graph from convex program specified by passed_function
passed_function should have similar structure to ones in convex_optimation
"""
G_hat = cascade_creation.InfluenceGraph(max_proba=None)
G_hat.add_nodes_from(G.nodes())
f_x, f_xz = passed_function(*args, **kwargs)
for node in G_hat.nodes():
print(node)
try:
M, w = cascade_creation.icc_matrixvector_for_node(cascades, node)
p_node, _ = convex_optimization.diff_and_opt(M, w, f_x, f_xz)
G_hat = cascade_creation.add_edges_from_proba_vector(G=G_hat,
p_node=p_node, node=node, floor_cstt=floor_cstt)
except timeout.TimeoutError:
print("TimeoutError, skipping to next node")
return G_hat
def correctness_measure(G, G_hat, print_values=True):
"""
Measures correctness of estimated graph G_hat to ground truth G
"""
edges = set(G.edges())
edges_hat = set(G_hat.edges())
fp = len(edges_hat - edges)
fn = len(edges - edges_hat)
tp = len(edges & edges_hat)
tn = G.number_of_nodes() ** 2 - fp - fn - tp
#Other metrics
precision = 1. * tp / (tp + fp)
recall = 1. * tp / (tp + fn)
f1_score = 2.* tp / (2 * tp + fp + fn)
fall_out = 1. * fp / (fp + tn)
if print_values:
print("False Positives: {}".format(fp))
print("False Negatives: {}".format(fn))
print("True Positives: {}".format(tp))
print("True Negatives: {}".format(tn))
print("-------------------------------")
print("Precision: {}".format(precision))
print("Recall: {}".format(recall))
print("F1 score: {}".format(f1_score))
print("Fall Out: {}".format(fall_out))
return fp, fn, tp, tn
def test():
"""
unit test
"""
G = cascade_creation.InfluenceGraph(max_proba = .8)
G.erdos_init(n = 100, p = .2)
import time
t0 = time.time()
A = cascade_creation.generate_cascades(G, .2, 1000)
if 0:
G_hat = greedy_prediction(G, A)
if 1:
G_hat = recovery_passed_function(G, A,
passed_function=convex_optimization.type_lasso,
floor_cstt=.1, lbda=10)
correctness_measure(G, G_hat, print_values=True)
if __name__=="__main__":
test()
|