R Scripts/generate-dag-dat.R


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59

library(igraph)
setwd("~/Documents/Cascade Project/")
load('Results/hyper-lcc.RData')

vic_ids = which(V(hyp_lcc)$vic==TRUE)

edgeWeights = function(eis){return(c(hyp_lcc_edges$weight[eis],Inf,Inf)[1:3])}

dag_dat_all = data.frame(matrix(nrow=1,ncol=8))
hyp_lcc2 = remove.edge.attribute(hyp_lcc,'weight')
ei = 1
ptm=proc.time()
for (u in vic_ids){
  if ((which(vic_ids==u) %% 1000)==0)  print(which(vic_ids==u))
  tu    = hyp_lcc_verts$vic.day[u]
  u_spawn = hyp_lcc_verts$spawn.date[u]
  nbhd  = unlist(neighborhood(hyp_lcc,nodes=u,order=3)) # get nodes within neighborhood
  nbhd  = nbhd[-1] # don't want to include u in the neighborhood
  tvs   = hyp_lcc_verts$vic.day[nbhd]
  v_spawn = hyp_lcc_verts$spawn.date[nbhd]
  nbhd  = nbhd[tu>v_spawn & (is.na(tvs) | tu<tvs)]
  tvs   = hyp_lcc_verts$vic.day[nbhd]
  dists = as.numeric(shortest.paths(hyp_lcc2,u,nbhd))
  
  es = get.shortest.paths(hyp_lcc2,u,nbhd,output='epath')$epath
  weights = matrix(unlist(lapply(es,edgeWeights),use.names = F),ncol=3,byrow=T)
  
  #will be faster to pre-allocate and fill in rather than rbind each time
  dag_dat_all[ei:(ei+length(nbhd)-1),] = data.frame(rep(u,length(nbhd)), nbhd, 
                                                    rep(tu,length(nbhd)), tvs, dists, 
                                                    weights, row.names=NULL)
  ei = ei + length(nbhd)
}
print(proc.time()-ptm) #3.5 hours
colnames(dag_dat_all) = c('from','to','t1','t2','dist','w1','w2','w3')
rownames(dag_dat_all) = NULL

save(dag_dat_all, file='Results/dag_dat_all.RData')
write.csv(dag_dat_all, file='Results/dag_dat_all.csv')

dag_dat_vics = dag_dat_all[!is.na(dag_dat_all$t2),]
save(dag_dat_vics, file='Results/dag_dat_vics.RData')
write.csv(dag_dat_vics, file='Results/dag_dat_vics.csv')

# analyze min possible infection time
i = 1
min_time = 0#rep(Inf,length(unique(dag_dat_vics$to)))
min_time_dist = 0#rep(Inf,length(unique(dag_dat_vics$to)))
for(to in unique(dag_dat_vics$to)){
  rows = which(dag_dat_vics$to==to & dag_dat_vics$dist<2)
  if(length(rows)>0){
    min_time[i] = min(dag_dat_vics$t2[rows]-dag_dat_vics$t1[rows])
    min_time_dist[i] = dag_dat_vics$dist[rows[which.min(dag_dat_vics$t2[rows]-dag_dat_vics$t1[rows])]]
    i = i + 1
  }
}
median(min_time)
mean(min_time<100)
save(min_time_1,min_time_2,min_time_3,file='Results/min_inf_time.RData')