diff options
| author | Thibaut Horel <thibaut.horel@gmail.com> | 2015-04-06 13:52:02 -0400 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2015-04-06 13:52:13 -0400 |
| commit | a29d738721db46b0ca3b7b5b3ffd282ad3f25909 (patch) | |
| tree | 26f881338d1b6e6f44869e9f475920ca8739b8fa /experiments | |
| parent | f0860ef0d66a9b70ac7bc4073716c2ae0f55862a (diff) | |
| download | criminal_cascades-a29d738721db46b0ca3b7b5b3ffd282ad3f25909.tar.gz | |
Updated model, still getting weird results though
Diffstat (limited to 'experiments')
| -rw-r--r-- | experiments/ml.pyx | 57 | ||||
| -rw-r--r-- | experiments/process.py | 9 |
2 files changed, 38 insertions, 28 deletions
diff --git a/experiments/ml.pyx b/experiments/ml.pyx index c6291c5..48d4549 100644 --- a/experiments/ml.pyx +++ b/experiments/ml.pyx @@ -1,59 +1,68 @@ # cython: boundscheck=False, cdivision=True import numpy as np cimport numpy as np -from libc.math cimport log +from libc.math cimport log, exp DTYPE = np.float64 ctypedef np.float_t DTYPE_t -cdef DTYPE_t weight_victim(int dist, int dt, DTYPE_t alpha, +cdef DTYPE_t weight_success(int dist, int dt, DTYPE_t alpha, DTYPE_t delta, DTYPE_t gamma): - cdef DTYPE_t structural, temporal + cdef DTYPE_t structural, temporal, result structural = delta ** dist - temporal = (gamma - 1. / alpha) * 1. / (1. + dt / alpha) ** gamma - return structural * temporal + temporal = exp(-alpha * dt) * (1 - exp(-alpha)) + result = structural * temporal + return result -cdef DTYPE_t weight_non_victim(int dist, int t, DTYPE_t alpha, +cdef DTYPE_t weight_failure(int dist, int dt, DTYPE_t alpha, DTYPE_t delta, DTYPE_t gamma): - cdef DTYPE_t structural, temporal + cdef DTYPE_t structural, temporal, result structural = delta ** dist - temporal = 1. - 1. / (1. + (3012. - t) / alpha) ** gamma - return 1. - structural * temporal + temporal = 1. - exp(-alpha * dt) + result = 1. - structural * temporal + return result def ml(dict root_victims, dict victims, dict non_victims, - DTYPE_t alpha, DTYPE_t delta, DTYPE_t gamma=1.01): + DTYPE_t alpha, DTYPE_t delta, DTYPE_t gamma=10): cdef: int n_roots, n_victims, n_nodes, roots, i, dist, dt, t - DTYPE_t beta - list parents, parents_weights + DTYPE_t beta, all_failures + list parents, failures, successes n_roots, n_victims = len(root_victims), len(victims) n_nodes = n_victims + len(non_victims) cdef: np.ndarray[DTYPE_t] probs = np.zeros(n_victims, dtype=DTYPE) + np.ndarray[DTYPE_t] probs_fail = np.zeros(n_victims, dtype=DTYPE) np.ndarray[DTYPE_t] probs_nv = np.zeros(len(non_victims), dtype=DTYPE) for i, parents in enumerate(victims.itervalues()): - parents_weights = [weight_victim(dist, dt, alpha, delta, gamma) + failures = [log(weight_failure(dist, dt, alpha, delta, gamma)) for (dist, dt) in parents] - probs[i] = max(parents_weights) + all_failures = sum(failures) + successes = [log(weight_success(dist, dt, alpha, delta, gamma)) + for (dist, dt) in parents] + probs[i] = max(s - failures[i] for i, s in enumerate(successes)) + probs_fail[i] = all_failures + for i, parents in enumerate(non_victims.itervalues()): - parents_weights = [weight_non_victim(dist, t, alpha, delta, gamma) - for (dist, t) in parents] - probs_nv[i] = max(parents_weights) + failures = [log(weight_failure(dist, dt, alpha, delta, gamma)) + for (dist, dt) in parents] + probs_nv[i] = sum(failures) probs.sort() probs = probs[::-1] cdef: - np.ndarray[DTYPE_t] betas = probs / (1. + probs) - np.ndarray[DTYPE_t] cums = np.log(probs.cumsum()) + np.ndarray[DTYPE_t] cums = probs.cumsum() for i in xrange(n_victims - 1, 0, -1): - roots = n_roots + n_victims - 1 - i - if betas[i] > roots / float(n_nodes): + roots = n_victims - 1 - i + beta = 1. / (1. + exp(-probs[i])) + if beta > float(roots) / float(n_nodes): break else: print "alpha: {0}, delta: {1}. Everyone is a root".format(alpha, delta) - roots = n_roots + n_victims - beta = roots / float(n_nodes) + roots = n_victims + beta = float(roots) / float(n_nodes) return (beta, roots, roots * log(beta) + (n_nodes - roots) * log(1 - beta) + cums[i] - + np.log(probs_nv).sum()) + + probs_nv.sum() + + probs_fail.sum()) diff --git a/experiments/process.py b/experiments/process.py index 1ec917e..b5b70ca 100644 --- a/experiments/process.py +++ b/experiments/process.py @@ -23,7 +23,7 @@ def build_network(filename): victims[from_] = [] else: from_, to = int(float(row["from"])), int(float(row["to"])) - parent = (int(row["dist"]), int(row["t1"])) + parent = (int(row["dist"]), 3012 - int(row["t1"])) if to not in victims: non_victims[to] = [] non_victims[to].append(parent) @@ -33,7 +33,7 @@ def build_network(filename): for victim in victims.keys(): if not victims[victim]: del victims[victim] - root_victims[victim] = [] + root_victims[victim] = [] return root_victims, victims, non_victims @@ -42,9 +42,10 @@ if __name__ == "__main__": #dump((root_victims, victims, non_victims), open("network.pickle", "w")) root_victims, victims, non_victims = load(open("network.pickle")) - alpha = np.arange(1000006., 1000007., 1.) - delta = np.arange(0.1, 1., 0.01) + alpha = np.arange(0.0000005, 0.00000051, 0.000001) + delta = np.arange(1., 1.000001, 0.001) with open("out.log", "a") as fh: for a, d in product(alpha, delta): beta, roots, ll = ml(root_victims, victims, non_victims, a, d) fh.write("\t".join(map(str, [a, d, beta, roots, ll])) + "\n") + fh.flush() |
