summaryrefslogtreecommitdiffstats
path: root/experiments/ml.pyx
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2015-06-07 22:23:39 -0700
committerThibaut Horel <thibaut.horel@gmail.com>2015-06-07 22:23:39 -0700
commite5dada202c34521618bf82a086093c342841e5e8 (patch)
treedd7c640ed2bd77ce5e6b0ae050e6662c21cc3b43 /experiments/ml.pyx
parenta29d738721db46b0ca3b7b5b3ffd282ad3f25909 (diff)
downloadcriminal_cascades-e5dada202c34521618bf82a086093c342841e5e8.tar.gz
Project cleanup before handing it to Ben
Diffstat (limited to 'experiments/ml.pyx')
-rw-r--r--experiments/ml.pyx84
1 files changed, 61 insertions, 23 deletions
diff --git a/experiments/ml.pyx b/experiments/ml.pyx
index 48d4549..74e5be3 100644
--- a/experiments/ml.pyx
+++ b/experiments/ml.pyx
@@ -6,63 +6,101 @@ from libc.math cimport log, exp
DTYPE = np.float64
ctypedef np.float_t DTYPE_t
+
cdef DTYPE_t weight_success(int dist, int dt, DTYPE_t alpha,
DTYPE_t delta, DTYPE_t gamma):
+ """weight for successful infection, exponential time model"""
cdef DTYPE_t structural, temporal, result
- structural = delta ** dist
+ structural = delta ** (dist)
temporal = exp(-alpha * dt) * (1 - exp(-alpha))
- result = structural * temporal
+ result = log(structural * temporal)
+ return result
+
+
+cdef DTYPE_t weight_success_power(int dist, int dt, DTYPE_t alpha,
+ DTYPE_t delta, DTYPE_t gamma):
+ """weight for successful infection, power-law time model"""
+ cdef DTYPE_t structural, temporal, result
+ structural = delta ** (dist)
+ temporal = 1. / (1. + (dt - 1.)/alpha)**0.01 - 1. / (1. + dt/alpha)**0.01
+ result = log(structural * temporal)
return result
cdef DTYPE_t weight_failure(int dist, int dt, DTYPE_t alpha,
DTYPE_t delta, DTYPE_t gamma):
+ """weight for failed infection, exponential time model"""
cdef DTYPE_t structural, temporal, result
- structural = delta ** dist
+ structural = delta ** (dist)
temporal = 1. - exp(-alpha * dt)
- result = 1. - structural * temporal
+ #result = log(1. - structural)
+ result = log(1. - structural * temporal)
return result
-def ml(dict root_victims, dict victims, dict non_victims,
+cdef DTYPE_t weight_failure_power(int dist, int dt, DTYPE_t alpha,
+ DTYPE_t delta, DTYPE_t gamma):
+ """weight for failed infection, power-law time model"""
+ cdef DTYPE_t structural, temporal, result
+ structural = delta ** (dist)
+ temporal = 1. - 1. / (1. + dt/alpha)**0.01
+ result = log(1. - structural * temporal)
+ return result
+
+def ml(dict root_victims, dict victims, dict non_victims, DTYPE_t age,
DTYPE_t alpha, DTYPE_t delta, DTYPE_t gamma=10):
cdef:
- int n_roots, n_victims, n_nodes, roots, i, dist, dt, t
- DTYPE_t beta, all_failures
+ int n_roots, n_victims, n_nodes, roots, i, dist, dt, t, l
+ DTYPE_t beta, all_failures, ll, beta2
list parents, failures, successes
n_roots, n_victims = len(root_victims), len(victims)
- n_nodes = n_victims + len(non_victims)
+ n_nodes = n_victims + len(non_victims) + n_roots
cdef:
np.ndarray[DTYPE_t] probs = np.zeros(n_victims, dtype=DTYPE)
np.ndarray[DTYPE_t] probs_fail = np.zeros(n_victims, dtype=DTYPE)
np.ndarray[DTYPE_t] probs_nv = np.zeros(len(non_victims), dtype=DTYPE)
for i, parents in enumerate(victims.itervalues()):
- failures = [log(weight_failure(dist, dt, alpha, delta, gamma))
- for (dist, dt) in parents]
+ # for each victim node i, compute the probability that all its parents
+ # fail to infect it, also computes the probability that its most
+ # likely parent infects it
+ failures = [weight_failure(dist, dt, alpha, delta, gamma)
+ for (dist, dt) in parents]
all_failures = sum(failures)
- successes = [log(weight_success(dist, dt, alpha, delta, gamma))
- for (dist, dt) in parents]
- probs[i] = max(s - failures[i] for i, s in enumerate(successes))
+ successes = [weight_success(dist, dt, alpha, delta, gamma)
+ for (dist, dt) in parents]
+ probs[i] = max(s - failures[l] for l, s in enumerate(successes))
probs_fail[i] = all_failures
for i, parents in enumerate(non_victims.itervalues()):
- failures = [log(weight_failure(dist, dt, alpha, delta, gamma))
- for (dist, dt) in parents]
+ # for each non victim node, compute the probability that all its
+ # parents fail to infect it
+ failures = [weight_failure(dist, dt, alpha, delta, gamma)
+ for (dist, dt) in parents]
probs_nv[i] = sum(failures)
probs.sort()
probs = probs[::-1]
cdef:
np.ndarray[DTYPE_t] cums = probs.cumsum()
+ ll = probs_fail.sum()
+ ll += probs_nv.sum()
+
for i in xrange(n_victims - 1, 0, -1):
- roots = n_victims - 1 - i
+ # iterate over all victim nodes to find the optimal threshold
+ roots = n_roots + n_victims - 1 - i
beta = 1. / (1. + exp(-probs[i]))
- if beta > float(roots) / float(n_nodes):
+ if beta > float(roots) / age:
break
else:
print "alpha: {0}, delta: {1}. Everyone is a root".format(alpha, delta)
- roots = n_victims
- beta = float(roots) / float(n_nodes)
- return (beta, roots,
- roots * log(beta) + (n_nodes - roots) * log(1 - beta) + cums[i]
- + probs_nv.sum()
- + probs_fail.sum())
+ roots = n_victims + n_roots
+ i = -1
+ beta = float(roots) / age
+ for i in xrange(n_victims - 1, 0, -1):
+ if probs[i] >= log(beta/(1.- beta)):
+ break
+ ll += age * log(1 - beta)
+ if i >= 0:
+ ll += cums[i]
+ if roots > 0:
+ ll += roots * log(beta) - roots * log(1 - beta)
+ return (beta, roots, ll)