diff options
| author | Ben Green <bgreen@g.harvard.edu> | 2015-08-21 13:06:12 -0400 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2015-08-22 17:01:37 -0700 |
| commit | ef61ece9773e8a865b57f60ca1e1b9faa903af23 (patch) | |
| tree | 577ff3fad1750cc824c1cb732bc05046c36efc11 /R Scripts/generate-dag-dat.R | |
| parent | 542012fc5ab0b373d85d1d13852daf834193bd33 (diff) | |
| download | criminal_cascades-ef61ece9773e8a865b57f60ca1e1b9faa903af23.tar.gz | |
added age to sim analysis and updated data generation for new model
Diffstat (limited to 'R Scripts/generate-dag-dat.R')
| -rwxr-xr-x | R Scripts/generate-dag-dat.R | 88 |
1 files changed, 51 insertions, 37 deletions
diff --git a/R Scripts/generate-dag-dat.R b/R Scripts/generate-dag-dat.R index a2df165..adef5f6 100755 --- a/R Scripts/generate-dag-dat.R +++ b/R Scripts/generate-dag-dat.R @@ -1,46 +1,60 @@ library(igraph) -setwd("~/Documents/Cascade Project/") -load('Results/hyper-lcc.RData') +setwd("~/Documents/Violence Cascades/") +load('Raw Data/lcc.RData') -vic_ids = which(V(hyp_lcc)$vic==TRUE) +library(foreach) +library(doMC) +registerDoMC(cores=4) -edgeWeights = function(eis){return(c(hyp_lcc_edges$weight[eis],Inf,Inf)[1:3])} +edgeWeights = function(eis){return(c(lcc_edges$weight[eis],Inf,Inf)[1:3])} +lcc2 = remove.edge.attribute(lcc,'weight') -dag_dat_all = data.frame(matrix(nrow=1,ncol=10)) -hyp_lcc2 = remove.edge.attribute(hyp_lcc,'weight') -ei = 1 -ptm=proc.time() -for (u in vic_ids){ - if ((which(vic_ids==u) %% 1000)==0) print(which(vic_ids==u)) - tu = hyp_lcc_verts$vic.day[u] - u_spawn = hyp_lcc_verts$spawn.date[u] - nbhd = unlist(neighborhood(hyp_lcc,nodes=u,order=3)) # get nodes within neighborhood - nbhd = nbhd[-1] # don't want to include u in the neighborhood - tvs = hyp_lcc_verts$vic.day[nbhd] - v_spawn = hyp_lcc_verts$spawn.date[nbhd] - nbhd = nbhd[tu>v_spawn & (is.na(tvs) | tu<tvs)] - tvs = hyp_lcc_verts$vic.day[nbhd] - dists = as.numeric(shortest.paths(hyp_lcc2,u,nbhd)) - - es = get.shortest.paths(hyp_lcc2,u,nbhd,output='epath')$epath - weights = matrix(unlist(lapply(es,edgeWeights),use.names = F),ncol=3,byrow=T) +vics = split(vic_ids, ceiling(seq_along(vic_ids)/98)) +dag_dat_lcc = c() +for(i in 1:length(vics)){ + ptm = proc.time() + print(c(i,length(vics))) + vic_ids = unlist(vics[i], use.names=F) + + ddl = foreach (u = vic_ids, .combine=rbind) %dopar% { + if ((which(vic_ids==u) %% 100)==0) print(which(vic_ids==u)) + + nbhd = unlist(neighborhood(lcc,nodes=u,order=1)) # get nodes within neighborhood + nbhd = nbhd[-1] # don't want to include u in the neighborhood + + dists = as.numeric(shortest.paths(lcc2,u,nbhd)) + es = get.shortest.paths(lcc2,u,nbhd,output='epath')$epath + weights = matrix(unlist(lapply(es,edgeWeights),use.names = F),ncol=3,byrow=T) + + # make edge for every infection + ddlu = data.frame(matrix(nrow=1,ncol=7)) + ei = 1 + for (j in c(17:21,16)){ + tu = lcc_verts[u,j] + if (is.na(tu)) next + + ddlu[ei:(ei+length(nbhd)-1),] = data.frame(rep(u,length(nbhd)), nbhd, + rep(tu,length(nbhd)), dists, + weights, row.names=NULL) + ei = ei + length(nbhd) + } + + return(ddlu) + } - #will be faster to pre-allocate and fill in rather than rbind each time - dag_dat_all[ei:(ei+length(nbhd)-1),] = data.frame(rep(u,length(nbhd)), nbhd, - rep(tu,length(nbhd)), tvs, dists, - weights, u_spawn, v_spawn, row.names=NULL) - ei = ei + length(nbhd) + dag_dat_lcc = rbind(dag_dat_lcc,ddl) + print(proc.time()-ptm) } -print(proc.time()-ptm) #3.5 hours -colnames(dag_dat_all) = c('from','to','t1','t2','dist','w1','w2','w3','spawn1','spawn2') -rownames(dag_dat_all) = NULL -# dag_dat_all$spawn1 = hyp_lcc_verts$spawn.date[dag_dat_all$from] -# dag_dat_all$spawn2 = hyp_lcc_verts$spawn.date[dag_dat_all$to] +colnames(dag_dat_lcc) = c('from','to','t1','dist','w1','w2','w3') +rownames(dag_dat_lcc) = NULL + +save(dag_dat_lcc, file='Results/dag_dat_lcc.RData') +write.csv(dag_dat_lcc, file='Results/dag_dat_lcc.csv') -save(dag_dat_all, file='Results/dag_dat_all.RData') -write.csv(dag_dat_all, file='Results/dag_dat_all.csv') +# dag_dat_vics = dag_dat_lcc[!is.na(dag_dat_lcc$t2),] +# save(dag_dat_lcc_vics, file='Results/dag_dat_lcc_vics.RData') +# write.csv(dag_dat_lcc_vics, file='Results/dag_dat_lcc_vics.csv') -dag_dat_vics = dag_dat_all[!is.na(dag_dat_all$t2),] -save(dag_dat_vics, file='Results/dag_dat_vics.RData') -write.csv(dag_dat_vics, file='Results/dag_dat_vics.csv') +#### +# create lcc_vic_times |
