summaryrefslogtreecommitdiffstats
path: root/R Scripts/sim-analysis.R
blob: 39fd4763e0691b06aa254a0b224e86ed2a4f9b49 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
library(igraph)
setwd("~/Documents/Violence Cascades/")
load('Raw Data/lcc.RData')
library(foreach)
library(doMC)
registerDoMC(cores=4)

# gather data
vic_days = as.numeric(unlist(lcc_verts[,16:21]))
idxs = which(!is.na(vic_days)) %% vcount(lcc)
vic_days = vic_days[!is.na(vic_days)]
n.infected = sum(lcc_verts$vic)
birthyears = as.numeric(format(as.Date(lcc_verts$dob[idxs]),'%Y'))
districts = as.numeric(lcc_verts$district[idxs])

# find all pairs of vic neighbors to check
## nbrs = neighborhood(lcc,order=1,nodes=vic_ids); save(nbrs,file='Raw Data/vic-nbrs.RData')
load('Raw Data/vic-nbrs.RData')
vic.nbrs = matrix(NA,9568,2)
idx = 1
for (i in 1:n.infected){
  u = vic_ids[i]
  nbhd = as.numeric(nbrs[[i]])
  nbhd = intersect(vic_ids,nbhd)
  nbhd = nbhd[u<nbhd] # only looks at pairs with u<v to avoid double-counting
  if (length(nbhd)==0) next
  vic.nbrs[idx:(idx+length(nbhd)-1),] = cbind(rep(u,length(nbhd)),nbhd)
  idx = idx + length(nbhd)
}

# run simulation
n = 10000

ptm = proc.time()
simdata = foreach(q=1:n, .combine=rbind) %dopar% {  
  # shuffle infection dates
  sim.dates = vic_days # data
  for(y in unique(birthyears)){
    for(d in unique(districts)){
      ids = which(birthyears==y & districts==d)
      if (length(ids)>1) sim.dates[ids] = sample(vic_days[ids])
    }
  }
  
  vic.dt = rep(0,dim(vic.nbrs)[1])
  for(i in 1:dim(vic.nbrs)[1]){
    mindt = 50000
    tus = sim.dates[which(vic.nbrs[i,1]==idxs)]
    tvs = sim.dates[which(vic.nbrs[i,2]==idxs)]
    for(tu in tus){
      for(tv in tvs){
        dt = abs(tu-tv)
        if(dt<mindt) mindt = dt
      }
    }
    vic.dt[i] = mindt
  }
  
#   mean.time[q] = mean(vic.dt)
#   med.time[q] = median(vic.dt)
#   mean.30[q] = mean(vic.dt<=30)
#   mean.60[q] = mean(vic.dt<=60)
  return(c(mean(vic.dt),median(vic.dt),mean(vic.dt<=30),mean(vic.dt<=100)))
}
print(proc.time()-ptm)
simdata = as.data.frame(simdata,row.names=1:dim(simdata)[1])
colnames(simdata)=c('mean.time','med.time','mean.30','mean.100')
# save(simdata,file='Results/sims-Sept21.RData')

####### plot simulation data #######
# mean=662.6, med=488, m30=0.076, m60=0.119, m100=0.167
d = c(662.6,488,0.076,0.167)
xlabs = c('Mean days between infections',
          'Median days between infections',
          'Infections within 30 days',
          'Infections within 100 days')
xlims = matrix(c(660,765,475,625,0.03,0.08,0.1,0.18),ncol=2,byrow=T)
yl = c(0,0.11*n)
par(mfrow=c(2,2))
for(i in 1:4){
  data = d[i] 
  sdata = simdata[,i]
  xl = xlims[i,]
  h = hist(sdata,50,xlim=xl,ylim=yl,col='#1f78b4',freq=T,border=NA,axes=F,
       xlab=xlabs[i],ylab='Relative frequency',main=NULL)
  if(i %in% c(1,2)) axis(1,at=pretty(xl,5))
  if(i %in% c(3,4)) axis(1,at=pretty(xl,5),lab=paste(pretty(xl,5)*100,'%',sep=''))
  axis(2,at=c(0,0.05,0.1)*n,lab=c(0,0.05,0.1))
  abline(v=data,lwd=4,col='#e41a1c')
  box(lwd=1.1)
}

##### inf dates histogram
vic_dates = as.Date(unlist(lcc_verts[,10:15]))
vic_dates = vic_dates[!is.na(vic_dates)]
hist(vic_dates, breaks='months',freq=T,col='lightblue',format='%Y',
     xlab='Month',ylab='Number of Shootings',main='')

### data vs sim hist comparison
hist(vic.time.data,breaks=seq(0,3000,by=10),col=rgb(0,0,1,1/2),freq=F,
     xlim=c(0,max(vic.time,vic.time.data)),
     xlab='Days between infections',main='')
hist(vic.time,breaks=seq(0,3000,by=10),col=rgb(1,0,0,1/2),add=T,freq=F)
legend("topright", inset=0.05, 
       c("Data", "Simulation"), 
       fill=c(rgb(0,0,1,1/2),rgb(1,0,0,1/2)))
box(which='plot')