summaryrefslogtreecommitdiffstats
path: root/R Scripts/sim-analysis.R
blob: 51659f501fb7ba5f4ac488921b168e4f47f3dad5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
library(igraph)
setwd("~/Documents/Violence Cascades/")
load('Raw Data/lcc.RData')
library(foreach)
library(doMC)
registerDoMC(cores=4)

# gather data
vic_days = as.numeric(unlist(lcc_verts[,16:21]))
idxs = which(!is.na(vic_days)) %% vcount(lcc)
vic_days = vic_days[!is.na(vic_days)]
n.infected = sum(lcc_verts$vic)
birthyears = as.numeric(format(as.Date(lcc_verts$dob[idxs]),'%Y'))

# find all pairs of vic neighbors to check
## nbrs = neighborhood(lcc,order=1,nodes=vic_ids); save(nbrs,file='Raw Data/vic-nbrs.RData')
load('Raw Data/vic-nbrs.RData')
vic.nbrs = matrix(NA,9568,2)
idx = 1
for (i in 1:n.infected){
  u = vic_ids[i]
  nbhd = as.numeric(nbrs[[i]])
  nbhd = intersect(vic_ids,nbhd)
  nbhd = nbhd[u<nbhd] # only looks at pairs with u<v to avoid double-counting
  if (length(nbhd)==0) next
  vic.nbrs[idx:(idx+length(nbhd)-1),] = cbind(rep(u,length(nbhd)),nbhd)
  idx = idx + length(nbhd)
}

# run simulation
n = 10000

ptm = proc.time()
simdata = foreach(q=1:n, .combine=rbind) %dopar% {  
  # shuffle infection dates
  sim.dates = rep(0,length(vic_days))
  for(y in unique(birthyears)){
    year_ids = which(birthyears==y)
    if (length(year_ids)>1) sim.dates[year_ids] = sample(vic_days[year_ids])
  }
#   sim.dates = vic_days # data
  
  vic.dt = rep(0,dim(vic.nbrs)[1])
  for(i in 1:dim(vic.nbrs)[1]){
    mindt = 5000
    tus = sim.dates[which(vic.nbrs[i,1]==idxs)]
    tvs = sim.dates[which(vic.nbrs[i,2]==idxs)]
    for(tu in tus){
      for(tv in tvs){
        dt = abs(tu-tv)
        if(dt<mindt) mindt = dt
      }
    }
    vic.dt[i] = mindt
  }
  
#   mean.time[q] = mean(vic.dt)
#   med.time[q] = median(vic.dt)
#   mean.30[q] = mean(vic.dt<=30)
#   mean.60[q] = mean(vic.dt<=60)
  return(c(mean(vic.dt),median(vic.dt),mean(vic.dt<=30),mean(vic.dt<=100)))
}
print(proc.time()-ptm)
simdata = as.data.frame(simdata,row.names=1:dim(simdata)[1])
colnames(simdata)=c('mean.time','med.time','mean.30','mean.100')
# save(simdata,file='Results/sims-Sept21.RData')

####### plot simulation data #######
# mean=662.6, med=488, m30=0.076, m60=0.119, m100=0.167
d = c(662.6,488,0.076,0.167)
xlabs = c('Mean days between infections',
          'Median days between infections',
          'Infections within 30 days',
          'Infections within 100 days')
xlims = matrix(c(660,765,475,625,0.03,0.08,0.1,0.18),ncol=2,byrow=T)
par(mfrow=c(2,2))
for(i in 1:4){
  data = d[i] 
  sdata = simdata[,i]
  xl = xlims[i,]
  h = hist(sdata,50,xlim=xl,col='#1f78b4',freq=T,border=NA,axes=F,
       xlab=xlabs[i],ylab='Relative frequency',main=NULL)
  if(i %in% c(1,2)) axis(1,at=pretty(xl,5))
  if(i %in% c(3,4)) axis(1,at=pretty(xl,5),lab=paste(pretty(xl,5)*100,'%',sep=''))
  axis(2,at=pretty(h$counts,3),lab=pretty(h$counts/n,3))
  abline(v=data,lwd=4,col='#e41a1c')
  box(lwd=1.1)
}

##### inf dates histogram
vic_dates = as.Date(unlist(lcc_verts[,10:15]))
vic_dates = vic_dates[!is.na(vic_dates)]
hist(vic_dates, breaks='months',freq=T,col='lightblue',format='%Y',
     xlab='Month',ylab='Number of Shootings',main='')

### data vs sim hist comparison
hist(vic.time.data,breaks=seq(0,3000,by=10),col=rgb(0,0,1,1/2),freq=F,
     xlim=c(0,max(vic.time,vic.time.data)),
     xlab='Days between infections',main='')
hist(vic.time,breaks=seq(0,3000,by=10),col=rgb(1,0,0,1/2),add=T,freq=F)
legend("topright", inset=0.05, 
       c("Data", "Simulation"), 
       fill=c(rgb(0,0,1,1/2),rgb(1,0,0,1/2)))
box(which='plot')