aboutsummaryrefslogtreecommitdiffstats
path: root/simulation/main.py
diff options
context:
space:
mode:
authorjeanpouget-abadie <jean.pougetabadie@gmail.com>2015-11-11 13:39:53 -0500
committerjeanpouget-abadie <jean.pougetabadie@gmail.com>2015-11-11 13:39:53 -0500
commiteab2cc05424475a1b14bf950decde1bae8d8cc9a (patch)
treed93ff5bd5d2aacc7f91f78816a898143e0933a15 /simulation/main.py
parent43b07f68505ee10f2fe9fa94f365b27fb72f953c (diff)
downloadcascades-eab2cc05424475a1b14bf950decde1bae8d8cc9a.tar.gz
moving node-specific methods to new file, adding cascade-level log-likelihood, committing active learning ipython notebook
Diffstat (limited to 'simulation/main.py')
-rw-r--r--simulation/main.py109
1 files changed, 25 insertions, 84 deletions
diff --git a/simulation/main.py b/simulation/main.py
index 3e3de4b..3e458a7 100644
--- a/simulation/main.py
+++ b/simulation/main.py
@@ -1,3 +1,5 @@
+import mleNode as mn
+
import numpy as np
from numpy.linalg import norm
import numpy.random as nr
@@ -9,85 +11,6 @@ from random import random, randint
seaborn.set_style("white")
-def likelihood(p, x, y):
- a = np.dot(x, p)
- return np.log(1. - np.exp(-a[y])).sum() - a[~y].sum()
-
-
-def likelihood_gradient(p, x, y):
- a = np.dot(x, p)
- l = np.log(1. - np.exp(-a[y])).sum() - a[~y].sum()
- g1 = 1. / (np.exp(a[y]) - 1.)
- g = (x[y] * g1[:, np.newaxis]).sum(0) - x[~y].sum(0)
- return l, g
-
-
-def test_gradient(x, y):
- eps = 1e-10
- for i in xrange(x.shape[1]):
- p = 0.5 * np.ones(x.shape[1])
- a = np.dot(x, p)
- g1 = 1. / (np.exp(a[y]) - 1.)
- g = (x[y] * g1[:, np.newaxis]).sum(0) - x[~y].sum(0)
- p[i] += eps
- f1 = likelihood(p, x, y)
- p[i] -= 2 * eps
- f2 = likelihood(p, x, y)
- print g[i], (f1 - f2) / (2 * eps)
-
-
-def infer(x, y):
- def f(p):
- l, g = likelihood_gradient(p, x, y)
- return -l, -g
- x0 = np.ones(x.shape[1])
- bounds = [(1e-10, None)] * x.shape[1]
- return minimize(f, x0, jac=True, bounds=bounds, method="L-BFGS-B").x
-
-
-def bootstrap(x, y, n_iter=100):
- rval = np.zeros((n_iter, x.shape[1]))
- for i in xrange(n_iter):
- indices = np.random.choice(len(y), replace=False, size=int(len(y)*.9))
- rval[i] = infer(x[indices], y[indices])
- return rval
-
-
-def confidence_interval(counts, bins):
- k = 0
- while np.sum(counts[len(counts)/2-k:len(counts)/2+k]) <= .95*np.sum(counts):
- k += 1
- return bins[len(bins)/2-k], bins[len(bins)/2+k]
-
-
-def build_matrix(cascades, node):
-
- def aux(cascade, node):
- xlist, slist = zip(*cascade)
- indices = [i for i, s in enumerate(slist) if s[node] and i >= 1]
- if indices:
- x = np.vstack(xlist[i-1] for i in indices)
- y = np.array([xlist[i][node] for i in indices])
- return x, y
- else:
- return None
-
- pairs = (aux(cascade, node) for cascade in cascades)
- xs, ys = zip(*(pair for pair in pairs if pair))
- x = np.vstack(xs)
- y = np.concatenate(ys)
- return x, y
-
-
-def build_cascade_list(cascades):
- x, s = [], []
- for cascade in cascades:
- xlist, slist = zip(*cascade)
- x.append(xlist)
- s.append(slist)
- return x, s
-
-
def simulate_cascade(x, graph):
"""
Simulate an IC cascade given a graph and initial state.
@@ -120,6 +43,24 @@ def simulate_cascades(n, graph, source=uniform_source):
yield simulate_cascade(x0, graph)
+def build_cascade_list(cascades, collapse=False):
+ x, s = [], []
+ for cascade in cascades:
+ xlist, slist = zip(*cascade)
+ x.append(np.vstack(xlist))
+ s.append(np.vstack(slist))
+ if not collapse:
+ return x, s
+ else:
+ return np.vstack(x), np.vstack(s)
+
+
+def cascadeLkl(graph, infect, sus):
+ # There is a problem with the current implementation
+ a = np.dot(infect, graph)
+ return np.log(1. - np.exp(-a[(infect)*sus])).sum() - a[(~infect)*sus].sum()
+
+
if __name__ == "__main__":
# g = np.array([[0, 1, 1, 0], [1, 0, 0, 1], [1, 0, 0, 1], [0, 1, 1, 0]])
g = np.array([[0, 0, 1], [0, 0, 0.5], [0, 0, 0]])
@@ -145,17 +86,17 @@ if __name__ == "__main__":
source=lambda graph: source(graph, t))
e = np.zeros(g.shape[0])
for j, s in enumerate(sizes):
- x, y = build_matrix(cascades, 2)
- e += infer(x[:s], y[:s])
+ x, y = mn.build_matrix(cascades, 2)
+ e += mn.infer(x[:s], y[:s])
for i, t in enumerate(thresh):
- plt.plot(sizes, m[:, i], label=str(t))
+ plt.plot(sizes, e[:, i], label=str(t))
plt.legend()
plt.show()
- # conf = bootstrap(x, y, n_iter=100)
+ # conf = mn.bootstrap(x, y, n_iter=100)
# estimand = np.linalg.norm(np.delete(conf - g[0], 0, axis=1), axis=1)
- # error.append(confidence_interval(*np.histogram(estimand, bins=50)))
+ # error.append(mn.confidence_interval(*np.histogram(estimand, bins=50)))
# plt.semilogx(sizes, error)
# plt.show()