1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
import numpy as np
import numpy.random as nr
from scipy.optimize import minimize
import matplotlib.pyplot as plt
import seaborn
seaborn.set_style("white")
def likelihood(p, x, y):
a = np.dot(x, p)
return np.log(1. - np.exp(-a[y])).sum() - a[~y].sum()
def likelihood_gradient(p, x, y):
a = np.dot(x, p)
l = np.log(1. - np.exp(-a[y])).sum() - a[~y].sum()
g1 = 1. / (np.exp(a[y]) - 1.)
g = (x[y] * g1[:, np.newaxis]).sum(0) - x[~y].sum(0)
return l, g
def test_gradient(x, y):
eps = 1e-10
for i in xrange(x.shape[1]):
p = 0.5 * np.ones(x.shape[1])
a = np.dot(x, p)
g1 = 1. / (np.exp(a[y]) - 1.)
g = (x[y] * g1[:, np.newaxis]).sum(0) - x[~y].sum(0)
p[i] += eps
f1 = likelihood(p, x, y)
p[i] -= 2 * eps
f2 = likelihood(p, x, y)
print g[i], (f1 - f2) / (2 * eps)
def infer(x, y):
def f(p):
l, g = likelihood_gradient(p, x, y)
return -l, -g
x0 = np.ones(x.shape[1])
bounds = [(0, None)] * x.shape[1]
return minimize(f, x0, jac=True, bounds=bounds, method="L-BFGS-B").x
def bootstrap(x, y, n_iter=100):
rval = np.zeros((n_iter, x.shape[1]))
for i in xrange(n_iter):
indices = np.random.choice(len(y), replace=False, size=int(len(y)*.9))
rval[i] = infer(x[indices], y[indices])
return rval
def confidence_interval(counts, bins):
k = 0
while np.sum(counts[len(counts)/2-k:len(counts)/2+k]) <= .95*np.sum(counts):
k += 1
return bins[len(bins)/2-k], bins[len(bins)/2+k]
def build_matrix(cascades, node):
def aux(cascade, node):
xlist, slist = zip(*cascade)
indices = [i for i, s in enumerate(slist) if s[node] and i >= 1]
if indices:
x = np.vstack(xlist[i-1] for i in indices)
y = np.array([xlist[i][node] for i in indices])
return x, y
else:
return None
pairs = (aux(cascade, node) for cascade in cascades)
xs, ys = zip(*(pair for pair in pairs if pair))
x = np.vstack(xs)
y = np.concatenate(ys)
return x, y
def build_cascade_list(cascades):
x, s = [], []
for cascade in cascades:
xlist, slist = zip(*cascade)
x.append(xlist)
s.append(slist)
return x, s
def simulate_cascade(x, graph):
"""
Simulate an IC cascade given a graph and initial state.
For each time step we yield:
- susc: the nodes susceptible at the beginning of this time step
- x: the subset of susc who became infected
"""
susc = np.ones(graph.shape[0], dtype=bool) # t=0, everyone is susceptible
yield x, susc
while np.any(x):
susc = susc ^ x # nodes infected at previous step are now inactive
if not np.any(susc):
break
x = 1 - np.exp(-np.dot(graph.T, x))
y = nr.random(x.shape[0])
x = (x >= y) & susc
yield x, susc
def simulate_cascades(n, graph):
for _ in xrange(n):
x0 = np.zeros(graph.shape[0], dtype=bool)
x0[nr.randint(0, graph.shape[0])] = True
yield simulate_cascade(x0, graph)
if __name__ == "__main__":
g = np.array([[0, 1, 1, 0], [1, 0, 0, 1], [1, 0, 0, 1], [0, 1, 1, 0]])
p = 0.5
g = np.log(1. / (1 - p * g))
sizes = [100, 500, 1000, 5000, 10000]
error = []
for i in sizes:
cascades = simulate_cascades(i, g)
x, y = build_matrix(cascades, 0)
conf = bootstrap(x, y, n_iter=100)
estimand = np.linalg.norm(np.delete(conf - g[0], 0, axis=1), axis=1)
error.append(confidence_interval(*np.histogram(estimand, bins=50)))
plt.semilogx(sizes, error)
plt.show()
|