summaryrefslogtreecommitdiffstats
path: root/R Scripts/sim-analysis.R
blob: 39605780db375da467a48b6056320c72d3435b1e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
library(igraph)
setwd("~/Documents/Cascade Project/")

load('Results/hyper-lcc.RData')
vic_ids = which(V(hyp_lcc)$vic==1)
n.infections = length(vic_ids)
n.days = max(hyp_lcc_verts$vic.day,na.rm=T) - min(hyp_lcc_verts$vic.day,na.rm=T)
inf.dates = hyp_lcc_verts$vic.day[vic_ids]

ptm = proc.time()
n = 1500
mean.time = matrix(0,3,n)
med.time = matrix(0,3,n)
mean.50 = matrix(0,3,n)
mean.100 = matrix(0,3,n)
n.vicpairs = matrix(0,3,n)
for(sim in 1:3){
  print(paste('sim:',sim))
  for(q in 1:n){
    if (q%%250==0) print(paste('run:',q))
    graph = hyp_lcc
    if (sim<3) sim.dates = sample(n.days, n.infections, replace=TRUE) # sims 1 + 2
    if (sim==3) sim.dates = sample(inf.dates) # sim 3
    if (sim==1) vics = sample(vcount(hyp_lcc), n.infections, replace=FALSE) # sim 1
    if (sim>1) vics = vic_ids # sims 2 + 3
    if (sim==0) {vics = vic_ids; sim.dates = inf.dates} # data
    
    vic.time = c()
    for (i in 1:n.infections){
      u = vics[i]
      nbhd = unlist(neighborhood(graph, nodes=u, order=1))
      nbhd = intersect(vic_ids,nbhd)
      nbhd = setdiff(nbhd,u)
      nbhd = nbhd[u<nbhd] # only looks at pairs with u<v to avoid double-counting
      nbhd = nbhd[hyp_lcc_verts$ir_no[u] != hyp_lcc_verts$ir_no[nbhd]] # don't count infections of the same person
      tu = sim.dates[i]
      tvs = sim.dates[match(nbhd,vic_ids)]
      vic.time = c(vic.time,abs(tu-tvs))
    }
    n.vicpairs[sim,q] = length(vic.time)
    mean.time[sim,q] = mean(vic.time)
    med.time[sim,q] = median(vic.time)
    mean.50[sim,q] = mean(vic.time<=50)
    mean.100[sim,q] = mean(vic.time<=100)
    #     decay.rate[sim,q] = getDecayRate(vic.time) #get lm of histogram. or maybe just cor?
  }
}
print(proc.time()-ptm)


####### plot simulation data #######
data = 0.1176774 # 774.5483, 634, 0.07252472, 0.1176774
simdata = mean.100
xl = c(0.9*min(min(simdata),data),1.1*max(max(simdata),data))
yl = c(0,max(c(hist(simdata[1,],15)$density,hist(simdata[2,],15)$density,hist(simdata[3,],15)$density)))
hist(simdata[1,],15,xlim=xl,ylim=yl,col=rgb(0,1,0,1/4),freq=F,
     xlab='Neighbors Infected Within 100 Days', main=NULL)
hist(simdata[2,],15,col=rgb(0,0,1,1/4),freq=F,add=TRUE)
hist(simdata[3,],15,col=rgb(1,0,1,1/4),freq=F,add=TRUE)
abline(v=data,lwd=4,col='red')


##### inf dates histogram
dates = c(V(lcc)$fatal_date[!is.na(V(lcc)$fatal_date)],
          V(lcc)$nonfatal_date_1[!is.na(V(lcc)$nonfatal_date_1)],
          V(lcc)$nonfatal_date_2[!is.na(V(lcc)$nonfatal_date_2)],
          V(lcc)$nonfatal_date_3[!is.na(V(lcc)$nonfatal_date_3)],
          V(lcc)$nonfatal_date_4[!is.na(V(lcc)$nonfatal_date_4)],
          V(lcc)$nonfatal_date_5[!is.na(V(lcc)$nonfatal_date_5)])
dates = as.Date(dates)
hist(dates, breaks='months',freq=T,col='lightblue',format='%m/%Y',
     xlab='Month',ylab='Number of Shootings',main='')