diff options
Diffstat (limited to 'experiments/ml.pyx')
| -rw-r--r-- | experiments/ml.pyx | 86 |
1 files changed, 33 insertions, 53 deletions
diff --git a/experiments/ml.pyx b/experiments/ml.pyx index 9a786db..8dfea70 100644 --- a/experiments/ml.pyx +++ b/experiments/ml.pyx @@ -13,43 +13,21 @@ cdef DTYPE_t weight_success(int dist, int dt, DTYPE_t alpha, DTYPE_t delta, DTYPE_t w1, DTYPE_t w2, DTYPE_t w3): """weight for successful infection, exponential time model""" cdef DTYPE_t structural, temporal, result - structural = delta ** (dist) + structural = dist * log(delta) # structural = plogis(w1,delta) * plogis(w2,delta) * plogis(w3,delta) - temporal = exp(-alpha * dt) * (1 - exp(-alpha)) - result = log(structural * temporal) + temporal = log(alpha) - alpha * dt + # temporal = 1. / (1. + (dt - 1.)/alpha)**0.01 - 1. / (1. + dt/alpha)**0.01 + result = structural + temporal return result - -cdef DTYPE_t weight_success_power(int dist, int dt, DTYPE_t alpha, DTYPE_t delta, - DTYPE_t w1, DTYPE_t w2, DTYPE_t w3): - """weight for successful infection, power-law time model""" - cdef DTYPE_t structural, temporal, result - structural = delta ** (dist) - # structural = plogis(w1,delta) * plogis(w2,delta) * plogis(w3,delta) - temporal = 1. / (1. + (dt - 1.)/alpha)**0.01 - 1. / (1. + dt/alpha)**0.01 - result = log(structural * temporal) - return result - - cdef DTYPE_t weight_failure(int dist, int dt, DTYPE_t alpha, DTYPE_t delta, DTYPE_t w1, DTYPE_t w2, DTYPE_t w3): """weight for failed infection, exponential time model""" cdef DTYPE_t structural, temporal, result - structural = delta ** (dist) + structural = delta ** dist # structural = plogis(w1,delta) * plogis(w2,delta) * plogis(w3,delta) temporal = 1. - exp(-alpha * dt) - #result = log(1. - structural) - result = log(1. - structural * temporal) - return result - - -cdef DTYPE_t weight_failure_power(int dist, int dt, DTYPE_t alpha, DTYPE_t delta, - DTYPE_t w1, DTYPE_t w2, DTYPE_t w3): - """weight for failed infection, power-law time model""" - cdef DTYPE_t structural, temporal, result - structural = delta ** (dist) - # structural = plogis(w1,delta) * plogis(w2,delta) * plogis(w3,delta) - temporal = 1. - 1. / (1. + dt/alpha)**0.01 + # temporal = 1. - 1. / (1. + dt/alpha)**0.01 result = log(1. - structural * temporal) return result @@ -60,7 +38,7 @@ def ml(dict root_victims, dict victims, dict non_victims, DTYPE_t age, DTYPE_t beta, ll, beta2 list parents, failures, successes n_roots, n_victims = len(root_victims), len(victims) - n_nodes = n_roots + n_victims + len(non_victims) + n_nodes = 148152 cdef: np.ndarray[DTYPE_t] probs = np.zeros(n_victims, dtype=DTYPE) np.ndarray[DTYPE_t] probs_fail = np.zeros(n_victims, dtype=DTYPE) @@ -88,29 +66,31 @@ def ml(dict root_victims, dict victims, dict non_victims, DTYPE_t age, probs_nv[i] = sum(failures) # calculate log likelihood - probs.sort(); probs = probs[::-1] # sort probs in descending order - cdef: - np.ndarray[DTYPE_t] cums = probs.cumsum() - ll = probs_fail.sum() - ll += probs_nv.sum() + # probs.sort(); probs = probs[::-1] # sort probs in descending order + # cdef: + # np.ndarray[DTYPE_t] cums = probs.cumsum() + ll = probs_fail.sum() # add probability that all edges to victims fail + ll += probs_nv.sum() # add probability that all edges to non_victims fail + + max_i = -1 + max_beta_add = float('-inf') + # iterate over all victim nodes to find the optimal threshold + for i in xrange(0, n_victims+1, 1): + roots = n_roots + n_victims - i + beta = float(roots)/float(n_nodes) + thresh = log(beta/(1.-beta)) + + # add probability for realized edges and subtract probability these edges fail + beta_add = (probs[probs>thresh]).sum() + print len(probs[probs>thresh]) + # add probability for the seeds and non-seeds + beta_add += roots * log(beta) + (n_nodes-roots) * log(1 - beta) + + if beta_add > max_beta_add: + max_i = i + max_beta_add = beta_add - for i in xrange(n_victims - 1, 0, -1): - # iterate over all victim nodes to find the optimal threshold - roots = n_roots + n_victims - 1 - i - beta = 1. / (1. + exp(-probs[i]))#exp(probs[i])# - if beta > float(roots) / age: - break - else: - print "alpha: {0}, delta: {1}. Everyone is a root".format(alpha, delta) - roots = n_victims + n_roots - i = -1 - beta = float(roots) / age - for i in xrange(n_victims - 1, 0, -1): - if probs[i] >= log(beta/(1.- beta)): - break - ll += age * log(1 - beta) - if i >= 0: - ll += cums[i] - if roots > 0: - ll += roots * log(beta) - roots * log(1 - beta) + ll += max_beta_add + roots = n_roots + n_victims - max_i + # print n_nodes, n_roots, n_victims, max_i, roots return (beta, roots, ll) |
