added my R scripts

author: Ben Green <bgreen@g.harvard.edu> 2015-06-08 15:21:51 -0400
committer: Ben Green <bgreen@g.harvard.edu> 2015-06-08 15:21:51 -0400
commit: 1739e9f5706bb8a73de5dbf0b467de49ea040898 (patch)
tree: 6f1d0f166986c5f0757be9b40d8eeb3409ab022c /R Scripts/generate-dag-dat.R
parent: e5dada202c34521618bf82a086093c342841e5e8 (diff)
download: criminal_cascades-1739e9f5706bb8a73de5dbf0b467de49ea040898.tar.gz
1 files changed, 59 insertions, 0 deletions
diff --git a/R Scripts/generate-dag-dat.R b/R Scripts/generate-dag-dat.R
new file mode 100755
index 0000000..660a503
--- /dev/null
+++ b/R Scripts/generate-dag-dat.R
@@ -0,0 +1,59 @@
+library(igraph)
+setwd("~/Documents/Cascade Project/")
+load('Results/hyper-lcc.RData')
+
+vic_ids = which(V(hyp_lcc)$vic==TRUE)
+
+edgeWeights = function(eis){return(c(hyp_lcc_edges$weight[eis],Inf,Inf)[1:3])}
+
+dag_dat_all = data.frame(matrix(nrow=1,ncol=8))
+hyp_lcc2 = remove.edge.attribute(hyp_lcc,'weight')
+ei = 1
+ptm=proc.time()
+for (u in vic_ids){
+  if ((which(vic_ids==u) %% 1000)==0)  print(which(vic_ids==u))
+  tu    = hyp_lcc_verts$vic.day[u]
+  u_spawn = hyp_lcc_verts$spawn.date[u]
+  nbhd  = unlist(neighborhood(hyp_lcc,nodes=u,order=3)) # get nodes within neighborhood
+  nbhd  = nbhd[-1] # don't want to include u in the neighborhood
+  tvs   = hyp_lcc_verts$vic.day[nbhd]
+  v_spawn = hyp_lcc_verts$spawn.date[nbhd]
+  nbhd  = nbhd[tu>v_spawn & (is.na(tvs) | tu<tvs)]
+  tvs   = hyp_lcc_verts$vic.day[nbhd]
+  dists = as.numeric(shortest.paths(hyp_lcc2,u,nbhd))
+  
+  es = get.shortest.paths(hyp_lcc2,u,nbhd,output='epath')$epath
+  weights = matrix(unlist(lapply(es,edgeWeights),use.names = F),ncol=3,byrow=T)
+  
+  #will be faster to pre-allocate and fill in rather than rbind each time
+  dag_dat_all[ei:(ei+length(nbhd)-1),] = data.frame(rep(u,length(nbhd)), nbhd, 
+                                                    rep(tu,length(nbhd)), tvs, dists, 
+                                                    weights, row.names=NULL)
+  ei = ei + length(nbhd)
+}
+print(proc.time()-ptm) #3.5 hours
+colnames(dag_dat_all) = c('from','to','t1','t2','dist','w1','w2','w3')
+rownames(dag_dat_all) = NULL
+
+save(dag_dat_all, file='Results/dag_dat_all.RData')
+write.csv(dag_dat_all, file='Results/dag_dat_all.csv')
+
+dag_dat_vics = dag_dat_all[!is.na(dag_dat_all$t2),]
+save(dag_dat_vics, file='Results/dag_dat_vics.RData')
+write.csv(dag_dat_vics, file='Results/dag_dat_vics.csv')
+
+# analyze min possible infection time
+i = 1
+min_time = 0#rep(Inf,length(unique(dag_dat_vics$to)))
+min_time_dist = 0#rep(Inf,length(unique(dag_dat_vics$to)))
+for(to in unique(dag_dat_vics$to)){
+  rows = which(dag_dat_vics$to==to & dag_dat_vics$dist<2)
+  if(length(rows)>0){
+    min_time[i] = min(dag_dat_vics$t2[rows]-dag_dat_vics$t1[rows])
+    min_time_dist[i] = dag_dat_vics$dist[rows[which.min(dag_dat_vics$t2[rows]-dag_dat_vics$t1[rows])]]
+    i = i + 1
+  }
+}
+median(min_time)
+mean(min_time<100)
+save(min_time_1,min_time_2,min_time_3,file='Results/min_inf_time.RData')
author	Ben Green <bgreen@g.harvard.edu>	2015-06-08 15:21:51 -0400
committer	Ben Green <bgreen@g.harvard.edu>	2015-06-08 15:21:51 -0400
commit	1739e9f5706bb8a73de5dbf0b467de49ea040898 (patch)
tree	6f1d0f166986c5f0757be9b40d8eeb3409ab022c /R Scripts/generate-dag-dat.R
parent	e5dada202c34521618bf82a086093c342841e5e8 (diff)
download	criminal_cascades-1739e9f5706bb8a73de5dbf0b467de49ea040898.tar.gz