1 files changed, 33 insertions, 53 deletions
diff --git a/experiments/ml.pyx b/experiments/ml.pyx
index 9a786db..8dfea70 100644
--- a/experiments/ml.pyx
+++ b/experiments/ml.pyx
@@ -13,43 +13,21 @@ cdef DTYPE_t weight_success(int dist, int dt, DTYPE_t alpha, DTYPE_t delta,
                             DTYPE_t w1, DTYPE_t w2, DTYPE_t w3):
     """weight for successful infection, exponential time model"""
     cdef DTYPE_t structural, temporal, result
-    structural = delta ** (dist)
+    structural = dist * log(delta)
     # structural = plogis(w1,delta) * plogis(w2,delta) * plogis(w3,delta)
-    temporal = exp(-alpha * dt) * (1 - exp(-alpha))
-    result = log(structural * temporal)
+    temporal = log(alpha) - alpha * dt
+    # temporal = 1. / (1. + (dt - 1.)/alpha)**0.01 - 1. / (1. + dt/alpha)**0.01
+    result = structural + temporal
     return result
 
-
-cdef DTYPE_t weight_success_power(int dist, int dt, DTYPE_t alpha, DTYPE_t delta,
-                                DTYPE_t w1, DTYPE_t w2, DTYPE_t w3):
-    """weight for successful infection, power-law time model"""
-    cdef DTYPE_t structural, temporal, result
-    structural = delta ** (dist)
-    # structural = plogis(w1,delta) * plogis(w2,delta) * plogis(w3,delta)
-    temporal = 1. / (1. + (dt - 1.)/alpha)**0.01 - 1. / (1. + dt/alpha)**0.01
-    result = log(structural * temporal)
-    return result
-
-
 cdef DTYPE_t weight_failure(int dist, int dt, DTYPE_t alpha, DTYPE_t delta,
                             DTYPE_t w1, DTYPE_t w2, DTYPE_t w3):
     """weight for failed infection, exponential time model"""
     cdef DTYPE_t structural, temporal, result
-    structural = delta ** (dist)
+    structural = delta ** dist
     # structural = plogis(w1,delta) * plogis(w2,delta) * plogis(w3,delta)
     temporal = 1. - exp(-alpha * dt)
-    #result = log(1. - structural)
-    result = log(1. - structural * temporal)
-    return result
-
-
-cdef DTYPE_t weight_failure_power(int dist, int dt, DTYPE_t alpha, DTYPE_t delta,
-                                DTYPE_t w1, DTYPE_t w2, DTYPE_t w3):
-    """weight for failed infection, power-law time model"""
-    cdef DTYPE_t structural, temporal, result
-    structural = delta ** (dist)
-    # structural = plogis(w1,delta) * plogis(w2,delta) * plogis(w3,delta)
-    temporal = 1.  - 1. / (1. + dt/alpha)**0.01
+    # temporal = 1.  - 1. / (1. + dt/alpha)**0.01
     result = log(1. - structural * temporal)
     return result
 
@@ -60,7 +38,7 @@ def ml(dict root_victims, dict victims, dict non_victims, DTYPE_t age,
         DTYPE_t beta, ll, beta2
         list parents, failures, successes
     n_roots, n_victims = len(root_victims), len(victims)
-    n_nodes = n_roots + n_victims + len(non_victims)
+    n_nodes = 148152
     cdef:
         np.ndarray[DTYPE_t] probs = np.zeros(n_victims, dtype=DTYPE)
         np.ndarray[DTYPE_t] probs_fail = np.zeros(n_victims, dtype=DTYPE)
@@ -88,29 +66,31 @@ def ml(dict root_victims, dict victims, dict non_victims, DTYPE_t age,
         probs_nv[i] = sum(failures)
 
     # calculate log likelihood
-    probs.sort(); probs = probs[::-1] # sort probs in descending order
-    cdef:
-        np.ndarray[DTYPE_t] cums = probs.cumsum()
-    ll =  probs_fail.sum()
-    ll += probs_nv.sum()
+    # probs.sort(); probs = probs[::-1] # sort probs in descending order
+    # cdef:
+    #     np.ndarray[DTYPE_t] cums = probs.cumsum()
+    ll =  probs_fail.sum() # add probability that all edges to victims fail
+    ll += probs_nv.sum() # add probability that all edges to non_victims fail
+
+    max_i = -1
+    max_beta_add = float('-inf')
+    # iterate over all victim nodes to find the optimal threshold
+    for i in xrange(0, n_victims+1, 1):
+        roots = n_roots + n_victims - i
+        beta = float(roots)/float(n_nodes)
+        thresh = log(beta/(1.-beta))
+
+        # add probability for realized edges and subtract probability these edges fail
+        beta_add = (probs[probs>thresh]).sum()
+        print len(probs[probs>thresh])
+        # add probability for the seeds and non-seeds
+        beta_add += roots * log(beta) + (n_nodes-roots) * log(1 - beta)
+
+        if beta_add > max_beta_add:
+            max_i = i
+            max_beta_add = beta_add
 
-    for i in xrange(n_victims - 1, 0, -1):
-        # iterate over all victim nodes to find the optimal threshold
-        roots = n_roots + n_victims - 1 - i
-        beta = 1. / (1. + exp(-probs[i]))#exp(probs[i])#
-        if beta > float(roots) / age:
-            break
-    else:
-        print "alpha: {0}, delta: {1}. Everyone is a root".format(alpha, delta)
-        roots = n_victims + n_roots
-        i = -1
-    beta = float(roots) / age
-    for i in xrange(n_victims - 1, 0, -1):
-        if probs[i] >= log(beta/(1.- beta)):
-            break
-    ll += age * log(1 - beta)
-    if i >= 0:
-        ll += cums[i]
-    if roots > 0:
-        ll += roots * log(beta) - roots * log(1 - beta)
+    ll += max_beta_add
+    roots = n_roots + n_victims - max_i
+    # print n_nodes, n_roots, n_victims, max_i, roots
     return (beta, roots, ll)