summaryrefslogtreecommitdiffstats
path: root/R Scripts/generate-dag-dat.R
diff options
context:
space:
mode:
authorBen Green <bgreen@g.harvard.edu>2015-06-08 15:21:51 -0400
committerBen Green <bgreen@g.harvard.edu>2015-06-08 15:21:51 -0400
commit1739e9f5706bb8a73de5dbf0b467de49ea040898 (patch)
tree6f1d0f166986c5f0757be9b40d8eeb3409ab022c /R Scripts/generate-dag-dat.R
parente5dada202c34521618bf82a086093c342841e5e8 (diff)
downloadcriminal_cascades-1739e9f5706bb8a73de5dbf0b467de49ea040898.tar.gz
added my R scripts
Diffstat (limited to 'R Scripts/generate-dag-dat.R')
-rwxr-xr-xR Scripts/generate-dag-dat.R59
1 files changed, 59 insertions, 0 deletions
diff --git a/R Scripts/generate-dag-dat.R b/R Scripts/generate-dag-dat.R
new file mode 100755
index 0000000..660a503
--- /dev/null
+++ b/R Scripts/generate-dag-dat.R
@@ -0,0 +1,59 @@
+library(igraph)
+setwd("~/Documents/Cascade Project/")
+load('Results/hyper-lcc.RData')
+
+vic_ids = which(V(hyp_lcc)$vic==TRUE)
+
+edgeWeights = function(eis){return(c(hyp_lcc_edges$weight[eis],Inf,Inf)[1:3])}
+
+dag_dat_all = data.frame(matrix(nrow=1,ncol=8))
+hyp_lcc2 = remove.edge.attribute(hyp_lcc,'weight')
+ei = 1
+ptm=proc.time()
+for (u in vic_ids){
+ if ((which(vic_ids==u) %% 1000)==0) print(which(vic_ids==u))
+ tu = hyp_lcc_verts$vic.day[u]
+ u_spawn = hyp_lcc_verts$spawn.date[u]
+ nbhd = unlist(neighborhood(hyp_lcc,nodes=u,order=3)) # get nodes within neighborhood
+ nbhd = nbhd[-1] # don't want to include u in the neighborhood
+ tvs = hyp_lcc_verts$vic.day[nbhd]
+ v_spawn = hyp_lcc_verts$spawn.date[nbhd]
+ nbhd = nbhd[tu>v_spawn & (is.na(tvs) | tu<tvs)]
+ tvs = hyp_lcc_verts$vic.day[nbhd]
+ dists = as.numeric(shortest.paths(hyp_lcc2,u,nbhd))
+
+ es = get.shortest.paths(hyp_lcc2,u,nbhd,output='epath')$epath
+ weights = matrix(unlist(lapply(es,edgeWeights),use.names = F),ncol=3,byrow=T)
+
+ #will be faster to pre-allocate and fill in rather than rbind each time
+ dag_dat_all[ei:(ei+length(nbhd)-1),] = data.frame(rep(u,length(nbhd)), nbhd,
+ rep(tu,length(nbhd)), tvs, dists,
+ weights, row.names=NULL)
+ ei = ei + length(nbhd)
+}
+print(proc.time()-ptm) #3.5 hours
+colnames(dag_dat_all) = c('from','to','t1','t2','dist','w1','w2','w3')
+rownames(dag_dat_all) = NULL
+
+save(dag_dat_all, file='Results/dag_dat_all.RData')
+write.csv(dag_dat_all, file='Results/dag_dat_all.csv')
+
+dag_dat_vics = dag_dat_all[!is.na(dag_dat_all$t2),]
+save(dag_dat_vics, file='Results/dag_dat_vics.RData')
+write.csv(dag_dat_vics, file='Results/dag_dat_vics.csv')
+
+# analyze min possible infection time
+i = 1
+min_time = 0#rep(Inf,length(unique(dag_dat_vics$to)))
+min_time_dist = 0#rep(Inf,length(unique(dag_dat_vics$to)))
+for(to in unique(dag_dat_vics$to)){
+ rows = which(dag_dat_vics$to==to & dag_dat_vics$dist<2)
+ if(length(rows)>0){
+ min_time[i] = min(dag_dat_vics$t2[rows]-dag_dat_vics$t1[rows])
+ min_time_dist[i] = dag_dat_vics$dist[rows[which.min(dag_dat_vics$t2[rows]-dag_dat_vics$t1[rows])]]
+ i = i + 1
+ }
+}
+median(min_time)
+mean(min_time<100)
+save(min_time_1,min_time_2,min_time_3,file='Results/min_inf_time.RData')