summaryrefslogtreecommitdiffstats
path: root/R Scripts/generate-dag-dat.R
diff options
context:
space:
mode:
authorBen Green <bgreen@g.harvard.edu>2015-08-21 13:06:12 -0400
committerBen Green <bgreen@g.harvard.edu>2015-08-21 13:06:12 -0400
commite36b7500767da4c0a8dbd29f69667aa8fa275e8c (patch)
tree6e8245fe614ec161bb03cd96263374a35b0008be /R Scripts/generate-dag-dat.R
parent9350ee2c6359562a23cf8efdefdd7de80b2a682e (diff)
downloadcriminal_cascades-e36b7500767da4c0a8dbd29f69667aa8fa275e8c.tar.gz
added age to sim analysis and updated data generation for new model
Diffstat (limited to 'R Scripts/generate-dag-dat.R')
-rwxr-xr-xR Scripts/generate-dag-dat.R88
1 files changed, 51 insertions, 37 deletions
diff --git a/R Scripts/generate-dag-dat.R b/R Scripts/generate-dag-dat.R
index a2df165..adef5f6 100755
--- a/R Scripts/generate-dag-dat.R
+++ b/R Scripts/generate-dag-dat.R
@@ -1,46 +1,60 @@
library(igraph)
-setwd("~/Documents/Cascade Project/")
-load('Results/hyper-lcc.RData')
+setwd("~/Documents/Violence Cascades/")
+load('Raw Data/lcc.RData')
-vic_ids = which(V(hyp_lcc)$vic==TRUE)
+library(foreach)
+library(doMC)
+registerDoMC(cores=4)
-edgeWeights = function(eis){return(c(hyp_lcc_edges$weight[eis],Inf,Inf)[1:3])}
+edgeWeights = function(eis){return(c(lcc_edges$weight[eis],Inf,Inf)[1:3])}
+lcc2 = remove.edge.attribute(lcc,'weight')
-dag_dat_all = data.frame(matrix(nrow=1,ncol=10))
-hyp_lcc2 = remove.edge.attribute(hyp_lcc,'weight')
-ei = 1
-ptm=proc.time()
-for (u in vic_ids){
- if ((which(vic_ids==u) %% 1000)==0) print(which(vic_ids==u))
- tu = hyp_lcc_verts$vic.day[u]
- u_spawn = hyp_lcc_verts$spawn.date[u]
- nbhd = unlist(neighborhood(hyp_lcc,nodes=u,order=3)) # get nodes within neighborhood
- nbhd = nbhd[-1] # don't want to include u in the neighborhood
- tvs = hyp_lcc_verts$vic.day[nbhd]
- v_spawn = hyp_lcc_verts$spawn.date[nbhd]
- nbhd = nbhd[tu>v_spawn & (is.na(tvs) | tu<tvs)]
- tvs = hyp_lcc_verts$vic.day[nbhd]
- dists = as.numeric(shortest.paths(hyp_lcc2,u,nbhd))
-
- es = get.shortest.paths(hyp_lcc2,u,nbhd,output='epath')$epath
- weights = matrix(unlist(lapply(es,edgeWeights),use.names = F),ncol=3,byrow=T)
+vics = split(vic_ids, ceiling(seq_along(vic_ids)/98))
+dag_dat_lcc = c()
+for(i in 1:length(vics)){
+ ptm = proc.time()
+ print(c(i,length(vics)))
+ vic_ids = unlist(vics[i], use.names=F)
+
+ ddl = foreach (u = vic_ids, .combine=rbind) %dopar% {
+ if ((which(vic_ids==u) %% 100)==0) print(which(vic_ids==u))
+
+ nbhd = unlist(neighborhood(lcc,nodes=u,order=1)) # get nodes within neighborhood
+ nbhd = nbhd[-1] # don't want to include u in the neighborhood
+
+ dists = as.numeric(shortest.paths(lcc2,u,nbhd))
+ es = get.shortest.paths(lcc2,u,nbhd,output='epath')$epath
+ weights = matrix(unlist(lapply(es,edgeWeights),use.names = F),ncol=3,byrow=T)
+
+ # make edge for every infection
+ ddlu = data.frame(matrix(nrow=1,ncol=7))
+ ei = 1
+ for (j in c(17:21,16)){
+ tu = lcc_verts[u,j]
+ if (is.na(tu)) next
+
+ ddlu[ei:(ei+length(nbhd)-1),] = data.frame(rep(u,length(nbhd)), nbhd,
+ rep(tu,length(nbhd)), dists,
+ weights, row.names=NULL)
+ ei = ei + length(nbhd)
+ }
+
+ return(ddlu)
+ }
- #will be faster to pre-allocate and fill in rather than rbind each time
- dag_dat_all[ei:(ei+length(nbhd)-1),] = data.frame(rep(u,length(nbhd)), nbhd,
- rep(tu,length(nbhd)), tvs, dists,
- weights, u_spawn, v_spawn, row.names=NULL)
- ei = ei + length(nbhd)
+ dag_dat_lcc = rbind(dag_dat_lcc,ddl)
+ print(proc.time()-ptm)
}
-print(proc.time()-ptm) #3.5 hours
-colnames(dag_dat_all) = c('from','to','t1','t2','dist','w1','w2','w3','spawn1','spawn2')
-rownames(dag_dat_all) = NULL
-# dag_dat_all$spawn1 = hyp_lcc_verts$spawn.date[dag_dat_all$from]
-# dag_dat_all$spawn2 = hyp_lcc_verts$spawn.date[dag_dat_all$to]
+colnames(dag_dat_lcc) = c('from','to','t1','dist','w1','w2','w3')
+rownames(dag_dat_lcc) = NULL
+
+save(dag_dat_lcc, file='Results/dag_dat_lcc.RData')
+write.csv(dag_dat_lcc, file='Results/dag_dat_lcc.csv')
-save(dag_dat_all, file='Results/dag_dat_all.RData')
-write.csv(dag_dat_all, file='Results/dag_dat_all.csv')
+# dag_dat_vics = dag_dat_lcc[!is.na(dag_dat_lcc$t2),]
+# save(dag_dat_lcc_vics, file='Results/dag_dat_lcc_vics.RData')
+# write.csv(dag_dat_lcc_vics, file='Results/dag_dat_lcc_vics.csv')
-dag_dat_vics = dag_dat_all[!is.na(dag_dat_all$t2),]
-save(dag_dat_vics, file='Results/dag_dat_vics.RData')
-write.csv(dag_dat_vics, file='Results/dag_dat_vics.csv')
+####
+# create lcc_vic_times