R Scripts/generate-network.R


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

library(igraph)
setwd("~/Documents/Cascade Project/")
source('criminal_cascades/R Scripts/temporal.R')
source('criminal_cascades/R Scripts/structural.R')

alpha = 0.1
beta = 0.25
delta = 0.1
# lmbda = 1/10
t_max = 1000

N = 5000
g = forest.fire.game(nodes=N, fw.prob=0.3, ambs=1, directed=F)
plot(g, vertex.size=3, vertex.label=NA)

V(g)$seed = runif(vcount(g))<beta
seeds = which(V(g)$seed)
V(g)$vic = V(g)$seed
V(g)$vic.day[V(g)$seed] = sample(1:t_max, sum(V(g)$seed), replace=T) #1 if testing ml2
V(g)$spawn.date = 0
V(g)$infector = NA

for (day in 1:t_max){
  day.vics = match(day,V(g)$vic.day)
  if (is.na(day.vics)) next
  for (vic in day.vics){
    neighbors = (unlist(neighborhood(g,3,vic)))[-1]
    dists = as.numeric(shortest.paths(g,vic,neighbors))
    infected = neighbors[which(runif(length(neighbors))<structural(delta, dists))]
    infected = setdiff(infected,seeds) # don't try to infect seeds
    inf.days = day + ceiling(rexp(length(infected),alpha))
    realized = ((inf.days <= V(g)$vic.day[infected]) %in% c(NA,T)) & (inf.days<=t_max)
    infected = infected[realized]
    if(sum(realized)){
      V(g)$vic[infected] = TRUE
      V(g)$vic.day[infected] = inf.days[realized]
      V(g)$infector[infected] = vic
    }
  }
}

vic_ids = which(V(g)$vic)
print(length(vic_ids))
cols = rep('lightblue',N); cols[V(g)$vic]='red'; cols[V(g)$seed]='darkred'
plot(g, vertex.size=3, vertex.label=NA, vertex.color=cols)

##### generate dag_dat
dag_dat_test = data.frame(matrix(nrow=1,ncol=10))
ei = 1
for (u in vic_ids){
  if ((which(vic_ids==u) %% 1000)==0)  print(which(vic_ids==u))
  tu    = V(g)$vic.day[u]
  u_spawn = V(g)$spawn.date[u]
  nbhd  = (unlist(neighborhood(g,nodes=u,order=3)))[-1] # get nodes within neighborhood
  tvs   = V(g)$vic.day[nbhd]
  v_spawn = V(g)$spawn.date[nbhd]
  nbhd  = nbhd[tu>v_spawn & (is.na(tvs) | tu<tvs)]
  tvs   = V(g)$vic.day[nbhd]
  dists = as.numeric(shortest.paths(g,u,nbhd))
  
  weights = matrix(1,nrow=length(nbhd),ncol=3)
  
  #will be faster to pre-allocate and fill in rather than rbind each time
  dag_dat_test[ei:(ei+length(nbhd)-1),] = data.frame(rep(u,length(nbhd)), nbhd, 
                                                    rep(tu,length(nbhd)), tvs, dists, 
                                                    weights, rep(u_spawn,length(nbhd)),
                                                    rep(0,length(nbhd)), row.names=NULL)
  ei = ei + length(nbhd)
}
colnames(dag_dat_test) = c('from','to','t1','t2','dist','w1','w2','w3','spawn1','spawn2')
rownames(dag_dat_test) = NULL

write.csv(dag_dat_test, file='Results/dag_dat_test.csv')

##### analyze performance of recovery algorithm ------
# recovered = read.csv('Results/infectors.csv',header=F,col.names=c('victim','infector'))
# recovered = recovered[order(recovered$victim),]
# infectors = cbind(setdiff(vic_ids,seeds),
#                   V(g)$infector[setdiff(vic_ids,seeds)],
#                   recovered$infector[recovered$victim %in% setdiff(vic_ids,seeds)])
# mean(infectors[,2]==infectors[,3])
# 
# dag_dat_test[dag_dat_test$to==4984,]