diff options
| author | Ben Green <bgreen@g.harvard.edu> | 2015-06-08 15:21:51 -0400 |
|---|---|---|
| committer | Ben Green <bgreen@g.harvard.edu> | 2015-06-08 15:21:51 -0400 |
| commit | 1739e9f5706bb8a73de5dbf0b467de49ea040898 (patch) | |
| tree | 6f1d0f166986c5f0757be9b40d8eeb3409ab022c /R Scripts/generate-dag-dat.R | |
| parent | e5dada202c34521618bf82a086093c342841e5e8 (diff) | |
| download | criminal_cascades-1739e9f5706bb8a73de5dbf0b467de49ea040898.tar.gz | |
added my R scripts
Diffstat (limited to 'R Scripts/generate-dag-dat.R')
| -rwxr-xr-x | R Scripts/generate-dag-dat.R | 59 |
1 files changed, 59 insertions, 0 deletions
diff --git a/R Scripts/generate-dag-dat.R b/R Scripts/generate-dag-dat.R new file mode 100755 index 0000000..660a503 --- /dev/null +++ b/R Scripts/generate-dag-dat.R @@ -0,0 +1,59 @@ +library(igraph) +setwd("~/Documents/Cascade Project/") +load('Results/hyper-lcc.RData') + +vic_ids = which(V(hyp_lcc)$vic==TRUE) + +edgeWeights = function(eis){return(c(hyp_lcc_edges$weight[eis],Inf,Inf)[1:3])} + +dag_dat_all = data.frame(matrix(nrow=1,ncol=8)) +hyp_lcc2 = remove.edge.attribute(hyp_lcc,'weight') +ei = 1 +ptm=proc.time() +for (u in vic_ids){ + if ((which(vic_ids==u) %% 1000)==0) print(which(vic_ids==u)) + tu = hyp_lcc_verts$vic.day[u] + u_spawn = hyp_lcc_verts$spawn.date[u] + nbhd = unlist(neighborhood(hyp_lcc,nodes=u,order=3)) # get nodes within neighborhood + nbhd = nbhd[-1] # don't want to include u in the neighborhood + tvs = hyp_lcc_verts$vic.day[nbhd] + v_spawn = hyp_lcc_verts$spawn.date[nbhd] + nbhd = nbhd[tu>v_spawn & (is.na(tvs) | tu<tvs)] + tvs = hyp_lcc_verts$vic.day[nbhd] + dists = as.numeric(shortest.paths(hyp_lcc2,u,nbhd)) + + es = get.shortest.paths(hyp_lcc2,u,nbhd,output='epath')$epath + weights = matrix(unlist(lapply(es,edgeWeights),use.names = F),ncol=3,byrow=T) + + #will be faster to pre-allocate and fill in rather than rbind each time + dag_dat_all[ei:(ei+length(nbhd)-1),] = data.frame(rep(u,length(nbhd)), nbhd, + rep(tu,length(nbhd)), tvs, dists, + weights, row.names=NULL) + ei = ei + length(nbhd) +} +print(proc.time()-ptm) #3.5 hours +colnames(dag_dat_all) = c('from','to','t1','t2','dist','w1','w2','w3') +rownames(dag_dat_all) = NULL + +save(dag_dat_all, file='Results/dag_dat_all.RData') +write.csv(dag_dat_all, file='Results/dag_dat_all.csv') + +dag_dat_vics = dag_dat_all[!is.na(dag_dat_all$t2),] +save(dag_dat_vics, file='Results/dag_dat_vics.RData') +write.csv(dag_dat_vics, file='Results/dag_dat_vics.csv') + +# analyze min possible infection time +i = 1 +min_time = 0#rep(Inf,length(unique(dag_dat_vics$to))) +min_time_dist = 0#rep(Inf,length(unique(dag_dat_vics$to))) +for(to in unique(dag_dat_vics$to)){ + rows = which(dag_dat_vics$to==to & dag_dat_vics$dist<2) + if(length(rows)>0){ + min_time[i] = min(dag_dat_vics$t2[rows]-dag_dat_vics$t1[rows]) + min_time_dist[i] = dag_dat_vics$dist[rows[which.min(dag_dat_vics$t2[rows]-dag_dat_vics$t1[rows])]] + i = i + 1 + } +} +median(min_time) +mean(min_time<100) +save(min_time_1,min_time_2,min_time_3,file='Results/min_inf_time.RData') |
