diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2015-06-07 22:23:39 -0700 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2015-06-07 22:23:39 -0700 |
| commit | e5dada202c34521618bf82a086093c342841e5e8 (patch) | |
| tree | dd7c640ed2bd77ce5e6b0ae050e6662c21cc3b43 /experiments/ml.pyx | |
| parent | a29d738721db46b0ca3b7b5b3ffd282ad3f25909 (diff) | |
| download | criminal_cascades-e5dada202c34521618bf82a086093c342841e5e8.tar.gz | |
Project cleanup before handing it to Ben
Diffstat (limited to 'experiments/ml.pyx')
| -rw-r--r-- | experiments/ml.pyx | 84 |
1 files changed, 61 insertions, 23 deletions
diff --git a/experiments/ml.pyx b/experiments/ml.pyx index 48d4549..74e5be3 100644 --- a/experiments/ml.pyx +++ b/experiments/ml.pyx @@ -6,63 +6,101 @@ from libc.math cimport log, exp DTYPE = np.float64 ctypedef np.float_t DTYPE_t + cdef DTYPE_t weight_success(int dist, int dt, DTYPE_t alpha, DTYPE_t delta, DTYPE_t gamma): + """weight for successful infection, exponential time model""" cdef DTYPE_t structural, temporal, result - structural = delta ** dist + structural = delta ** (dist) temporal = exp(-alpha * dt) * (1 - exp(-alpha)) - result = structural * temporal + result = log(structural * temporal) + return result + + +cdef DTYPE_t weight_success_power(int dist, int dt, DTYPE_t alpha, + DTYPE_t delta, DTYPE_t gamma): + """weight for successful infection, power-law time model""" + cdef DTYPE_t structural, temporal, result + structural = delta ** (dist) + temporal = 1. / (1. + (dt - 1.)/alpha)**0.01 - 1. / (1. + dt/alpha)**0.01 + result = log(structural * temporal) return result cdef DTYPE_t weight_failure(int dist, int dt, DTYPE_t alpha, DTYPE_t delta, DTYPE_t gamma): + """weight for failed infection, exponential time model""" cdef DTYPE_t structural, temporal, result - structural = delta ** dist + structural = delta ** (dist) temporal = 1. - exp(-alpha * dt) - result = 1. - structural * temporal + #result = log(1. - structural) + result = log(1. - structural * temporal) return result -def ml(dict root_victims, dict victims, dict non_victims, +cdef DTYPE_t weight_failure_power(int dist, int dt, DTYPE_t alpha, + DTYPE_t delta, DTYPE_t gamma): + """weight for failed infection, power-law time model""" + cdef DTYPE_t structural, temporal, result + structural = delta ** (dist) + temporal = 1. - 1. / (1. + dt/alpha)**0.01 + result = log(1. - structural * temporal) + return result + +def ml(dict root_victims, dict victims, dict non_victims, DTYPE_t age, DTYPE_t alpha, DTYPE_t delta, DTYPE_t gamma=10): cdef: - int n_roots, n_victims, n_nodes, roots, i, dist, dt, t - DTYPE_t beta, all_failures + int n_roots, n_victims, n_nodes, roots, i, dist, dt, t, l + DTYPE_t beta, all_failures, ll, beta2 list parents, failures, successes n_roots, n_victims = len(root_victims), len(victims) - n_nodes = n_victims + len(non_victims) + n_nodes = n_victims + len(non_victims) + n_roots cdef: np.ndarray[DTYPE_t] probs = np.zeros(n_victims, dtype=DTYPE) np.ndarray[DTYPE_t] probs_fail = np.zeros(n_victims, dtype=DTYPE) np.ndarray[DTYPE_t] probs_nv = np.zeros(len(non_victims), dtype=DTYPE) for i, parents in enumerate(victims.itervalues()): - failures = [log(weight_failure(dist, dt, alpha, delta, gamma)) - for (dist, dt) in parents] + # for each victim node i, compute the probability that all its parents + # fail to infect it, also computes the probability that its most + # likely parent infects it + failures = [weight_failure(dist, dt, alpha, delta, gamma) + for (dist, dt) in parents] all_failures = sum(failures) - successes = [log(weight_success(dist, dt, alpha, delta, gamma)) - for (dist, dt) in parents] - probs[i] = max(s - failures[i] for i, s in enumerate(successes)) + successes = [weight_success(dist, dt, alpha, delta, gamma) + for (dist, dt) in parents] + probs[i] = max(s - failures[l] for l, s in enumerate(successes)) probs_fail[i] = all_failures for i, parents in enumerate(non_victims.itervalues()): - failures = [log(weight_failure(dist, dt, alpha, delta, gamma)) - for (dist, dt) in parents] + # for each non victim node, compute the probability that all its + # parents fail to infect it + failures = [weight_failure(dist, dt, alpha, delta, gamma) + for (dist, dt) in parents] probs_nv[i] = sum(failures) probs.sort() probs = probs[::-1] cdef: np.ndarray[DTYPE_t] cums = probs.cumsum() + ll = probs_fail.sum() + ll += probs_nv.sum() + for i in xrange(n_victims - 1, 0, -1): - roots = n_victims - 1 - i + # iterate over all victim nodes to find the optimal threshold + roots = n_roots + n_victims - 1 - i beta = 1. / (1. + exp(-probs[i])) - if beta > float(roots) / float(n_nodes): + if beta > float(roots) / age: break else: print "alpha: {0}, delta: {1}. Everyone is a root".format(alpha, delta) - roots = n_victims - beta = float(roots) / float(n_nodes) - return (beta, roots, - roots * log(beta) + (n_nodes - roots) * log(1 - beta) + cums[i] - + probs_nv.sum() - + probs_fail.sum()) + roots = n_victims + n_roots + i = -1 + beta = float(roots) / age + for i in xrange(n_victims - 1, 0, -1): + if probs[i] >= log(beta/(1.- beta)): + break + ll += age * log(1 - beta) + if i >= 0: + ll += cums[i] + if roots > 0: + ll += roots * log(beta) - roots * log(1 - beta) + return (beta, roots, ll) |
