added age to sim analysis and updated data generation for new model

author: Ben Green <bgreen@g.harvard.edu> 2015-08-21 13:06:12 -0400
committer: Thibaut Horel <thibaut.horel@gmail.com> 2015-08-22 17:01:37 -0700
commit: ef61ece9773e8a865b57f60ca1e1b9faa903af23 (patch)
tree: 577ff3fad1750cc824c1cb732bc05046c36efc11 /R Scripts/generate-dag-dat.R
parent: 542012fc5ab0b373d85d1d13852daf834193bd33 (diff)
download: criminal_cascades-ef61ece9773e8a865b57f60ca1e1b9faa903af23.tar.gz
1 files changed, 51 insertions, 37 deletions
diff --git a/R Scripts/generate-dag-dat.R b/R Scripts/generate-dag-dat.R
index a2df165..adef5f6 100755
--- a/R Scripts/generate-dag-dat.R
+++ b/R Scripts/generate-dag-dat.R
@@ -1,46 +1,60 @@
 library(igraph)
-setwd("~/Documents/Cascade Project/")
-load('Results/hyper-lcc.RData')
+setwd("~/Documents/Violence Cascades/")
+load('Raw Data/lcc.RData')
 
-vic_ids = which(V(hyp_lcc)$vic==TRUE)
+library(foreach)
+library(doMC)
+registerDoMC(cores=4)
 
-edgeWeights = function(eis){return(c(hyp_lcc_edges$weight[eis],Inf,Inf)[1:3])}
+edgeWeights = function(eis){return(c(lcc_edges$weight[eis],Inf,Inf)[1:3])}
+lcc2 = remove.edge.attribute(lcc,'weight')
 
-dag_dat_all = data.frame(matrix(nrow=1,ncol=10))
-hyp_lcc2 = remove.edge.attribute(hyp_lcc,'weight')
-ei = 1
-ptm=proc.time()
-for (u in vic_ids){
-  if ((which(vic_ids==u) %% 1000)==0)  print(which(vic_ids==u))
-  tu    = hyp_lcc_verts$vic.day[u]
-  u_spawn = hyp_lcc_verts$spawn.date[u]
-  nbhd  = unlist(neighborhood(hyp_lcc,nodes=u,order=3)) # get nodes within neighborhood
-  nbhd  = nbhd[-1] # don't want to include u in the neighborhood
-  tvs   = hyp_lcc_verts$vic.day[nbhd]
-  v_spawn = hyp_lcc_verts$spawn.date[nbhd]
-  nbhd  = nbhd[tu>v_spawn & (is.na(tvs) | tu<tvs)]
-  tvs   = hyp_lcc_verts$vic.day[nbhd]
-  dists = as.numeric(shortest.paths(hyp_lcc2,u,nbhd))
-  
-  es = get.shortest.paths(hyp_lcc2,u,nbhd,output='epath')$epath
-  weights = matrix(unlist(lapply(es,edgeWeights),use.names = F),ncol=3,byrow=T)
+vics = split(vic_ids, ceiling(seq_along(vic_ids)/98))
+dag_dat_lcc = c()
+for(i in 1:length(vics)){
+  ptm = proc.time()
+  print(c(i,length(vics)))  
+  vic_ids = unlist(vics[i], use.names=F)
+
+  ddl = foreach (u = vic_ids, .combine=rbind) %dopar% {
+    if ((which(vic_ids==u) %% 100)==0)  print(which(vic_ids==u))
+    
+    nbhd  = unlist(neighborhood(lcc,nodes=u,order=1)) # get nodes within neighborhood
+    nbhd  = nbhd[-1] # don't want to include u in the neighborhood
+    
+    dists = as.numeric(shortest.paths(lcc2,u,nbhd))
+    es = get.shortest.paths(lcc2,u,nbhd,output='epath')$epath
+    weights = matrix(unlist(lapply(es,edgeWeights),use.names = F),ncol=3,byrow=T)
+    
+    # make edge for every infection
+    ddlu = data.frame(matrix(nrow=1,ncol=7))
+    ei = 1
+    for (j in c(17:21,16)){
+      tu = lcc_verts[u,j]
+      if (is.na(tu)) next
+      
+      ddlu[ei:(ei+length(nbhd)-1),] = data.frame(rep(u,length(nbhd)), nbhd, 
+                                                rep(tu,length(nbhd)), dists, 
+                                                weights, row.names=NULL)
+      ei = ei + length(nbhd)
+    }
+    
+    return(ddlu)
+  }
   
-  #will be faster to pre-allocate and fill in rather than rbind each time
-  dag_dat_all[ei:(ei+length(nbhd)-1),] = data.frame(rep(u,length(nbhd)), nbhd, 
-                                                    rep(tu,length(nbhd)), tvs, dists, 
-                                                    weights, u_spawn, v_spawn, row.names=NULL)
-  ei = ei + length(nbhd)
+  dag_dat_lcc = rbind(dag_dat_lcc,ddl)
+  print(proc.time()-ptm)
 }
-print(proc.time()-ptm) #3.5 hours
-colnames(dag_dat_all) = c('from','to','t1','t2','dist','w1','w2','w3','spawn1','spawn2')
-rownames(dag_dat_all) = NULL
 
-# dag_dat_all$spawn1 = hyp_lcc_verts$spawn.date[dag_dat_all$from]
-# dag_dat_all$spawn2 = hyp_lcc_verts$spawn.date[dag_dat_all$to]
+colnames(dag_dat_lcc) = c('from','to','t1','dist','w1','w2','w3')
+rownames(dag_dat_lcc) = NULL
+
+save(dag_dat_lcc, file='Results/dag_dat_lcc.RData')
+write.csv(dag_dat_lcc, file='Results/dag_dat_lcc.csv')
 
-save(dag_dat_all, file='Results/dag_dat_all.RData')
-write.csv(dag_dat_all, file='Results/dag_dat_all.csv')
+# dag_dat_vics = dag_dat_lcc[!is.na(dag_dat_lcc$t2),]
+# save(dag_dat_lcc_vics, file='Results/dag_dat_lcc_vics.RData')
+# write.csv(dag_dat_lcc_vics, file='Results/dag_dat_lcc_vics.csv')
 
-dag_dat_vics = dag_dat_all[!is.na(dag_dat_all$t2),]
-save(dag_dat_vics, file='Results/dag_dat_vics.RData')
-write.csv(dag_dat_vics, file='Results/dag_dat_vics.csv')
+####
+# create lcc_vic_times
author	Ben Green <bgreen@g.harvard.edu>	2015-08-21 13:06:12 -0400
committer	Thibaut Horel <thibaut.horel@gmail.com>	2015-08-22 17:01:37 -0700
commit	ef61ece9773e8a865b57f60ca1e1b9faa903af23 (patch)
tree	577ff3fad1750cc824c1cb732bc05046c36efc11 /R Scripts/generate-dag-dat.R
parent	542012fc5ab0b373d85d1d13852daf834193bd33 (diff)
download	criminal_cascades-ef61ece9773e8a865b57f60ca1e1b9faa903af23.tar.gz