Updated model, still getting weird results though

author: Thibaut Horel <thibaut.horel@gmail.com> 2015-04-06 13:52:02 -0400
committer: Thibaut Horel <thibaut.horel@gmail.com> 2015-04-06 13:52:13 -0400
commit: a29d738721db46b0ca3b7b5b3ffd282ad3f25909 (patch)
tree: 26f881338d1b6e6f44869e9f475920ca8739b8fa /experiments
parent: f0860ef0d66a9b70ac7bc4073716c2ae0f55862a (diff)
download: criminal_cascades-a29d738721db46b0ca3b7b5b3ffd282ad3f25909.tar.gz
2 files changed, 38 insertions, 28 deletions
diff --git a/experiments/ml.pyx b/experiments/ml.pyx
index c6291c5..48d4549 100644
--- a/experiments/ml.pyx
+++ b/experiments/ml.pyx
@@ -1,59 +1,68 @@
 # cython: boundscheck=False, cdivision=True
 import numpy as np
 cimport numpy as np
-from libc.math cimport log
+from libc.math cimport log, exp
 
 DTYPE = np.float64
 ctypedef np.float_t DTYPE_t
 
-cdef DTYPE_t weight_victim(int dist, int dt, DTYPE_t alpha,
+cdef DTYPE_t weight_success(int dist, int dt, DTYPE_t alpha,
                            DTYPE_t delta, DTYPE_t gamma):
-    cdef DTYPE_t structural, temporal
+    cdef DTYPE_t structural, temporal, result
     structural = delta ** dist
-    temporal = (gamma - 1. / alpha) * 1. / (1. + dt / alpha) ** gamma
-    return structural * temporal
+    temporal = exp(-alpha * dt) * (1 - exp(-alpha))
+    result = structural * temporal
+    return result
 
 
-cdef DTYPE_t weight_non_victim(int dist, int t, DTYPE_t alpha,
+cdef DTYPE_t weight_failure(int dist, int dt, DTYPE_t alpha,
                                DTYPE_t delta, DTYPE_t gamma):
-    cdef DTYPE_t structural, temporal
+    cdef DTYPE_t structural, temporal, result
     structural = delta ** dist
-    temporal = 1. - 1. / (1. + (3012. - t) / alpha) ** gamma
-    return 1. - structural * temporal
+    temporal = 1. - exp(-alpha * dt)
+    result = 1. - structural * temporal
+    return result
 
 
 def ml(dict root_victims, dict victims, dict non_victims,
-       DTYPE_t alpha, DTYPE_t delta, DTYPE_t gamma=1.01):
+       DTYPE_t alpha, DTYPE_t delta, DTYPE_t gamma=10):
     cdef:
         int n_roots, n_victims, n_nodes, roots, i, dist, dt, t
-        DTYPE_t beta
-        list parents, parents_weights
+        DTYPE_t beta, all_failures
+        list parents, failures, successes
     n_roots, n_victims = len(root_victims), len(victims)
     n_nodes = n_victims + len(non_victims)
     cdef:
         np.ndarray[DTYPE_t] probs = np.zeros(n_victims, dtype=DTYPE)
+        np.ndarray[DTYPE_t] probs_fail = np.zeros(n_victims, dtype=DTYPE)
         np.ndarray[DTYPE_t] probs_nv = np.zeros(len(non_victims), dtype=DTYPE)
     for i, parents in enumerate(victims.itervalues()):
-        parents_weights = [weight_victim(dist, dt, alpha, delta, gamma)
+        failures = [log(weight_failure(dist, dt, alpha, delta, gamma))
                            for (dist, dt) in parents]
-        probs[i] = max(parents_weights)
+        all_failures = sum(failures)
+        successes = [log(weight_success(dist, dt, alpha, delta, gamma))
+                           for (dist, dt) in parents]
+        probs[i] = max(s - failures[i] for i, s in enumerate(successes))
+        probs_fail[i] = all_failures
+
     for i, parents in enumerate(non_victims.itervalues()):
-        parents_weights = [weight_non_victim(dist, t, alpha, delta, gamma)
-                           for (dist, t) in parents]
-        probs_nv[i] = max(parents_weights)
+        failures = [log(weight_failure(dist, dt, alpha, delta, gamma))
+                           for (dist, dt) in parents]
+        probs_nv[i] = sum(failures)
     probs.sort()
     probs = probs[::-1]
     cdef:
-        np.ndarray[DTYPE_t] betas = probs / (1. + probs)
-        np.ndarray[DTYPE_t] cums = np.log(probs.cumsum())
+        np.ndarray[DTYPE_t] cums = probs.cumsum()
     for i in xrange(n_victims - 1, 0, -1):
-        roots = n_roots + n_victims - 1 - i
-        if betas[i] > roots / float(n_nodes):
+        roots = n_victims - 1 - i
+        beta = 1. / (1. + exp(-probs[i]))
+        if beta > float(roots) / float(n_nodes):
             break
     else:
         print "alpha: {0}, delta: {1}. Everyone is a root".format(alpha, delta)
-        roots = n_roots + n_victims
-    beta = roots / float(n_nodes)
+        roots = n_victims
+    beta = float(roots) / float(n_nodes)
     return (beta, roots,
             roots * log(beta) + (n_nodes - roots) * log(1 - beta) + cums[i]
-            + np.log(probs_nv).sum())
+            + probs_nv.sum()
+            + probs_fail.sum())
diff --git a/experiments/process.py b/experiments/process.py
index 1ec917e..b5b70ca 100644
--- a/experiments/process.py
+++ b/experiments/process.py
@@ -23,7 +23,7 @@ def build_network(filename):
                     victims[from_] = []
             else:
                 from_, to = int(float(row["from"])), int(float(row["to"]))
-                parent = (int(row["dist"]), int(row["t1"]))
+                parent = (int(row["dist"]), 3012 - int(row["t1"]))
                 if to not in victims:
                     non_victims[to] = []
                 non_victims[to].append(parent)
@@ -33,7 +33,7 @@ def build_network(filename):
     for victim in victims.keys():
         if not victims[victim]:
             del victims[victim]
-        root_victims[victim] = []
+            root_victims[victim] = []
     return root_victims, victims, non_victims
 
 
@@ -42,9 +42,10 @@ if __name__ == "__main__":
     #dump((root_victims, victims, non_victims), open("network.pickle", "w"))
     root_victims, victims, non_victims = load(open("network.pickle"))
 
-    alpha = np.arange(1000006., 1000007., 1.)
-    delta = np.arange(0.1, 1., 0.01)
+    alpha = np.arange(0.0000005, 0.00000051, 0.000001)
+    delta = np.arange(1., 1.000001, 0.001)
     with open("out.log", "a") as fh:
         for a, d in product(alpha, delta):
             beta, roots, ll = ml(root_victims, victims, non_victims, a, d)
             fh.write("\t".join(map(str, [a, d, beta, roots, ll])) + "\n")
+            fh.flush()
author	Thibaut Horel <thibaut.horel@gmail.com>	2015-04-06 13:52:02 -0400
committer	Thibaut Horel <thibaut.horel@gmail.com>	2015-04-06 13:52:13 -0400
commit	a29d738721db46b0ca3b7b5b3ffd282ad3f25909 (patch)
tree	26f881338d1b6e6f44869e9f475920ca8739b8fa /experiments
parent	f0860ef0d66a9b70ac7bc4073716c2ae0f55862a (diff)
download	criminal_cascades-a29d738721db46b0ca3b7b5b3ffd282ad3f25909.tar.gz