diff options
| author | Ben Green <bgreen@g.harvard.edu> | 2015-09-13 23:52:40 -0400 |
|---|---|---|
| committer | Ben Green <bgreen@g.harvard.edu> | 2015-09-13 23:52:42 -0400 |
| commit | f23e9e5da58d1d19913703e11f7ff96a7adff96b (patch) | |
| tree | b9459b3670cdf125f8bdf0888288be3a0a1357a8 /R Scripts/find-cascades.R | |
| parent | fffb68b515a5cae198d8b756b562ddea6f2c814c (diff) | |
| download | criminal_cascades-f23e9e5da58d1d19913703e11f7ff96a7adff96b.tar.gz | |
updated prediction code for new model and data format
Diffstat (limited to 'R Scripts/find-cascades.R')
| -rw-r--r-- | R Scripts/find-cascades.R | 79 |
1 files changed, 36 insertions, 43 deletions
diff --git a/R Scripts/find-cascades.R b/R Scripts/find-cascades.R index ded9b9c..77c7b08 100644 --- a/R Scripts/find-cascades.R +++ b/R Scripts/find-cascades.R @@ -1,64 +1,57 @@ library(igraph) -setwd("~/Documents/Cascade Project/") -load('Results/hyper-lcc.RData') -load('Results/dag_dat_vics.RData') +setwd('~/Documents/Violence Cascades/') +load('Raw Data/lcc.RData') +load('Raw Data/dag_dat_lcc.RData') source('criminal_cascades/R Scripts/temporal.R') source('criminal_cascades/R Scripts/structural.R') +source('criminal_cascades/R Scripts/background.R') -vic_ids = which(V(hyp_lcc)$vic==TRUE) +vic_ids = which(lcc_verts$vic==TRUE) +edges = dag_dat_lcc #### Initialize model parameters -alpha = 0.041 -# gamma = 0.3 -delta = 0.1 -# beta = 0.00796964464237 +mu0 = 1.1845e-05 #background +alpha = 0.00317 #structural +beta = 0.0039 #temporal ##### Get weights -# find max n days where infection possible given alpha -edges = dag_dat_test[!is.na(dag_dat_test$t2),] -# edges = edges[(edges$t2-edges$t1)<300,] - -p_t = temporal(edges$t1, edges$t2, alpha) -p_s = structural(delta, edges$dist) -p = p_s * p_t -p_tilde = 1 - p_s + p_s * temporal(edges$t1, edges$t2+1, alpha) -weights = p/p_tilde +# p_t = temporal(edges$t1, edges$t2, beta) +# p_s = structural(alpha, edges$dist) +# p = p_s * p_t +# background = background(mu0,t) ##### Find transmission edges -# load('Results/lm-probs.RData') -# names = attr(lm.probs,'names') -# probs = as.numeric(lm.probs) -# betas = probs[match(hyp_lcc_verts$ir_no[edges$to],names)] -# betas = 0.055 -# thresh = beta/(1-beta) - realized = c() -# edges = edges[weights>thresh,] -# weights = weights[weights>thresh] for (u in vic_ids){ - max_prob = -1 - max_edge = NULL - Ei = which(edges$to==u) - for(i in Ei){ - if(weights[i]>thresh){ - prob = p[i] * prod(p_tilde[setdiff(Ei,i)]) - if(prob>max_prob){ - max_prob = prob - max_edge = i + days = as.numeric(vic_times[u,2:7]) + days = days[!is.na(days)] + for(day in days){ + bkgnd = background(mu0,day) + Ei = which(edges$to==u & edges$t1<day) + p_t = temporal(edges$t1[Ei], day, beta) + p_s = structural(alpha, edges$dist[Ei]) + p = p_s*p_t + + pc = sum(p) + + if (pc>bkgnd){ + max_prob = -1 + max_edge = NULL + for(i in 1:length(Ei)){ + if(p[i]>max_prob){ + max_prob = p[i] + max_edge = i + } } - } + } + realized = c(realized, max_edge) } - realized = c(realized, max_edge) + edges[realized,c('from','to')] } -edges[realized,c('from','to')] -# if (length(Ei)>0){ -# max_edge = Ei[which.max(weights[Ei])] # how to deal with ties???? -# realized = c(realized, max_edge) -# } ##### Create forest of cascades # dag is all of the victim nodes but with only transmission edges, or don't index by vic_ids -dag = graph.data.frame(edges[realized,1:2], directed=TRUE, vertices=hyp_lcc_verts[vic_ids,]) +dag = graph.data.frame(edges[realized,1:2], directed=TRUE, vertices=lcc_verts[vic_ids,]) dag = set.edge.attribute(dag,'weight',value=weights[realized]) table(clusters(dag)$csize) |
