summaryrefslogtreecommitdiffstats
path: root/R Scripts/sim-analysis.R
diff options
context:
space:
mode:
Diffstat (limited to 'R Scripts/sim-analysis.R')
-rwxr-xr-xR Scripts/sim-analysis.R121
1 files changed, 63 insertions, 58 deletions
diff --git a/R Scripts/sim-analysis.R b/R Scripts/sim-analysis.R
index 92c3d26..e56f575 100755
--- a/R Scripts/sim-analysis.R
+++ b/R Scripts/sim-analysis.R
@@ -1,89 +1,94 @@
library(igraph)
setwd("~/Documents/Violence Cascades/")
+load('Raw Data/lcc.RData')
+library(foreach)
+library(doMC)
+registerDoMC(cores=4)
-load('Results/hyper-lcc.RData')
-vic_ids = which(V(hyp_lcc)$vic==1)
-n.infections = length(vic_ids)
-n.days = max(hyp_lcc_verts$vic.day,na.rm=T) - min(hyp_lcc_verts$vic.day,na.rm=T)
-inf.dates = hyp_lcc_verts$vic.day[vic_ids]
-
-birthyears = as.numeric(format(as.Date(hyp_lcc_verts$dob[vic_ids]),'%Y'))
-
-# nbrs = neighborhood(graph, nodes=vic_ids, order=1)
-load('Results/vic-nbrs.RData')
+# gather data
+vic_days = as.numeric(unlist(lcc_verts[,16:21]))
+idxs = which(!is.na(vic_days)) %% vcount(lcc)
+vic_days = vic_days[!is.na(vic_days)]
+n.infected = sum(lcc_verts$vic)
+birthyears = as.numeric(format(as.Date(lcc_verts$dob[idxs]),'%Y'))
# find all pairs of vic neighbors to check
-vic.nbrs = matrix(NA,14885,2)
+## nbrs = neighborhood(lcc,order=1,nodes=vic_ids); save(nbrs,file='Raw Data/vic-nbrs.RData')
+load('Raw Data/vic-nbrs.RData')
+vic.nbrs = matrix(NA,9568,2)
idx = 1
-for (i in 1:n.infections){
+for (i in 1:n.infected){
u = vic_ids[i]
- nbhd = nbrs[[i]]
+ nbhd = as.numeric(nbrs[[i]])
nbhd = intersect(vic_ids,nbhd)
nbhd = nbhd[u<nbhd] # only looks at pairs with u<v to avoid double-counting
- nbhd = setdiff(nbhd,u)
- nbhd = nbhd[hyp_lcc_verts$ir_no[u] != hyp_lcc_verts$ir_no[nbhd]] # don't count infections of the same person
if (length(nbhd)==0) next
- vic.nbrs[idx:(idx+length(nbhd)-1),] = cbind(rep(match(u,vic_ids),length(nbhd)),match(nbhd,vic_ids))
+ vic.nbrs[idx:(idx+length(nbhd)-1),] = cbind(rep(u,length(nbhd)),nbhd)
idx = idx + length(nbhd)
}
# run simulation
n = 10000
-mean.time = rep(NA,n)
-med.time = rep(NA,n)
-mean.30 = rep(NA,n)
-mean.50 = rep(NA,n)
-n.vicpairs = rep(NA,n)
-
-hyp_lcc = upgrade_graph(hyp_lcc)
ptm = proc.time()
-for(q in 1:n){
- if (q%%250==0) print(paste('run:',q))
- graph = hyp_lcc
- vics = vic_ids
-
+simdata = foreach(q=1:n, .combine=rbind) %dopar% {
# shuffle infection dates
- sim.dates = rep(0,length(vic_ids))
+ sim.dates = rep(0,length(vic_days))
for(y in unique(birthyears)){
year_ids = which(birthyears==y)
- if (length(year_ids)>1) sim.dates[year_ids] = sample(inf.dates[year_ids])
+ if (length(year_ids)>1) sim.dates[year_ids] = sample(vic_days[year_ids])
}
-# vics = vic_ids; sim.dates = inf.dates # data
+# sim.dates = vic_days # data
- vic.time = abs(sim.dates[vic.nbrs[,1]] - sim.dates[vic.nbrs[,2]])
-
- n.vicpairs[q] = length(vic.time)
- mean.time[q] = mean(vic.time)
- med.time[q] = median(vic.time)
- mean.30[q] = mean(vic.time<=30)
- mean.50[q] = mean(vic.time<=50)
- # decay.rate[sim,q] = getDecayRate(vic.time) #get lm of histogram. or maybe just cor?
+ vic.dt = rep(0,dim(vic.nbrs)[1])
+ for(i in 1:dim(vic.nbrs)[1]){
+ mindt = 5000
+ tus = sim.dates[which(vic.nbrs[i,1]==idxs)]
+ tvs = sim.dates[which(vic.nbrs[i,2]==idxs)]
+ for(tu in tus){
+ for(tv in tvs){
+ dt = abs(tu-tv)
+ if(dt<mindt) mindt = dt
+ }
+ }
+ vic.dt[i] = mindt
+ }
+
+# mean.time[q] = mean(vic.dt)
+# med.time[q] = median(vic.dt)
+# mean.30[q] = mean(vic.dt<=30)
+# mean.60[q] = mean(vic.dt<=60)
+ return(c(mean(vic.dt),median(vic.dt),mean(vic.dt<=30),mean(vic.dt<=100)))
}
print(proc.time()-ptm)
-
-# save(mean.time,med.time,mean.30,mean.50,file='Results/sims-Aug28.RData')
+simdata = as.data.frame(simdata,row.names=1:dim(simdata)[1])
+colnames(simdata)=c('mean.time','med.time','mean.30','mean.100')
+# save(simdata,file='Results/sims-Sept21.RData')
####### plot simulation data #######
-data = 0.1179039 # 778.2056, 638, 0.05105811, 0.07248908
-simdata = mean.100
-xl = c(0.95*min(min(simdata),data),1.05*max(max(simdata),data))
-yl = c(0,max(hist(simdata,25)$density))
-hist(simdata,25,xlim=xl,ylim=yl,col=rgb(0,1,0,1/4),freq=F,
- xlab='Neighbor Infections Within 100 Days', main=NULL)
-abline(v=data,lwd=4,col='red')
-box()
-
+# mean=662.6, med=488, m30=0.076, m60=0.119, m100=0.167
+d = c(662.6,488,0.076,0.167)
+xlabs = c('Mean time between infections',
+ 'Median time between infections',
+ 'Infections within 30 days',
+ 'Infections within 100 days')
+par(mfrow=c(2,2))
+for(i in 1:4){
+ data = d[i]
+ sdata = simdata[,i]
+ xl = c(0.99*min(min(sdata),data),1.01*max(max(sdata),data))
+ h = hist(sdata,50,xlim=xl,col='#1f78b4',freq=T,border=NA,axes=F,
+ xlab=xlabs[i],ylab='Relative Frequency',main=NULL)
+ axis(1,at=pretty(xl,5))
+ axis(2,at=pretty(h$counts,3),lab=pretty(h$counts/n,3))
+ abline(v=data,lwd=4,col='#e41a1c')
+ box(lwd=1.1)
+}
##### inf dates histogram
-dates = c(V(lcc)$fatal_date[!is.na(V(lcc)$fatal_date)],
- V(lcc)$nonfatal_date_1[!is.na(V(lcc)$nonfatal_date_1)],
- V(lcc)$nonfatal_date_2[!is.na(V(lcc)$nonfatal_date_2)],
- V(lcc)$nonfatal_date_3[!is.na(V(lcc)$nonfatal_date_3)],
- V(lcc)$nonfatal_date_4[!is.na(V(lcc)$nonfatal_date_4)],
- V(lcc)$nonfatal_date_5[!is.na(V(lcc)$nonfatal_date_5)])
-dates = as.Date(dates)
-hist(dates, breaks='months',freq=T,col='lightblue',format='%m/%Y',
+vic_dates = as.Date(unlist(lcc_verts[,10:15]))
+vic_dates = vic_dates[!is.na(vic_dates)]
+hist(vic_dates, breaks='months',freq=T,col='lightblue',format='%Y',
xlab='Month',ylab='Number of Shootings',main='')
### data vs sim hist comparison