summaryrefslogtreecommitdiffstats
path: root/R Scripts/generate-network.R
blob: 3b40969aef0104b4f1196644ba4a060cf20ffee4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
library(igraph)
setwd("~/Documents/Cascade Project/")
source('criminal_cascades/R Scripts/temporal.R')
source('criminal_cascades/R Scripts/structural.R')

alpha = 1/100
beta = 0.02
delta = 0.15
# lmbda = 1/10
t_max = 1000

N = 5000
g = forest.fire.game(nodes=N, fw.prob=0.3, ambs=1, directed=F)
plot(g, vertex.size=5, vertex.label=NA)

V(g)$seed = runif(vcount(g))<beta
seeds = which(V(g)$seed)
V(g)$vic = V(g)$seed
V(g)$vic.day[V(g)$seed] = sample(1:t_max, sum(V(g)$seed))
V(g)$spawn.date = 0
V(g)$infector = NA

for (day in 1:t_max){
  day.vics = match(day,V(g)$vic.day)
  if (is.na(day.vics)) next
  for (vic in day.vics){
    neighbors = (unlist(neighborhood(g,3,vic)))[-1]
    dists = as.numeric(shortest.paths(g,vic,neighbors))
    infected = neighbors[which(runif(length(neighbors))<structural(delta, dists))]
    infected = setdiff(infected,seeds) # don't try to infect seeds
    inf.days = day + ceiling(alpha*rexp(length(infected),alpha))
    V(g)$vic[infected] = TRUE
    infects = (inf.days <= V(g)$vic.day[infected]) %in% c(NA,T)
    V(g)$vic.day[infected[infects]] = inf.days[infects]
    V(g)$infector[infected[infects]] = vic
  }
}

vic_ids = which(V(g)$vic)
cols = rep('lightblue',N); cols[V(g)$vic]='red'; cols[V(g)$seed]='darkred'
plot(g, vertex.size=5, vertex.label=NA, vertex.color=cols)

##### generate dag_dat
dag_dat_test = data.frame(matrix(nrow=1,ncol=10))
ei = 1
for (u in vic_ids){
  if ((which(vic_ids==u) %% 1000)==0)  print(which(vic_ids==u))
  tu    = V(g)$vic.day[u]
  u_spawn = V(g)$spawn.date[u]
  nbhd  = (unlist(neighborhood(g,nodes=u,order=3)))[-1] # get nodes within neighborhood
  tvs   = V(g)$vic.day[nbhd]
  v_spawn = V(g)$spawn.date[nbhd]
  nbhd  = nbhd[tu>v_spawn & (is.na(tvs) | tu<tvs)]
  tvs   = V(g)$vic.day[nbhd]
  dists = as.numeric(shortest.paths(g,u,nbhd))
  
  weights = matrix(1,nrow=length(nbhd),ncol=3)
  
  #will be faster to pre-allocate and fill in rather than rbind each time
  dag_dat_test[ei:(ei+length(nbhd)-1),] = data.frame(rep(u,length(nbhd)), nbhd, 
                                                    rep(tu,length(nbhd)), tvs, dists, 
                                                    weights, rep(u_spawn,length(nbhd)),
                                                    rep(0,length(nbhd)), row.names=NULL)
  ei = ei + length(nbhd)
}
colnames(dag_dat_test) = c('from','to','t1','t2','dist','w1','w2','w3','spawn1','spawn2')
rownames(dag_dat_test) = NULL

write.csv(dag_dat_test, file='Results/dag_dat_test.csv')

##### analyze performance of recovery algorithm
recovered = read.csv('Results/infectors.csv',header=F,col.names=c('victim','infector'))
recovered = recovered[order(recovered$victim),]
infectors = cbind(setdiff(vic_ids,seeds),
                  V(g)$infector[setdiff(vic_ids,seeds)],
                  recovered$infector[recovered$victim %in% setdiff(vic_ids,seeds)])
mean(infectors[,2]==infectors[,3])

dag_dat_test[dag_dat_test$to==4984,]