diff options
| author | Ben Green <bgreen@g.harvard.edu> | 2015-09-22 08:28:13 -0400 |
|---|---|---|
| committer | Ben Green <bgreen@g.harvard.edu> | 2015-09-22 08:28:16 -0400 |
| commit | 28945245f49fafed3d05579166a46530845d2d8a (patch) | |
| tree | 9703409e142334339d5c145f9415501be8597ec7 /R Scripts/sim-analysis.R | |
| parent | 8c2d3d070a3db1b469e3e32e3c20ef67bc274b3b (diff) | |
| download | criminal_cascades-28945245f49fafed3d05579166a46530845d2d8a.tar.gz | |
script to plot entire lcc
Diffstat (limited to 'R Scripts/sim-analysis.R')
| -rwxr-xr-x | R Scripts/sim-analysis.R | 121 |
1 files changed, 63 insertions, 58 deletions
diff --git a/R Scripts/sim-analysis.R b/R Scripts/sim-analysis.R index 92c3d26..e56f575 100755 --- a/R Scripts/sim-analysis.R +++ b/R Scripts/sim-analysis.R @@ -1,89 +1,94 @@ library(igraph) setwd("~/Documents/Violence Cascades/") +load('Raw Data/lcc.RData') +library(foreach) +library(doMC) +registerDoMC(cores=4) -load('Results/hyper-lcc.RData') -vic_ids = which(V(hyp_lcc)$vic==1) -n.infections = length(vic_ids) -n.days = max(hyp_lcc_verts$vic.day,na.rm=T) - min(hyp_lcc_verts$vic.day,na.rm=T) -inf.dates = hyp_lcc_verts$vic.day[vic_ids] - -birthyears = as.numeric(format(as.Date(hyp_lcc_verts$dob[vic_ids]),'%Y')) - -# nbrs = neighborhood(graph, nodes=vic_ids, order=1) -load('Results/vic-nbrs.RData') +# gather data +vic_days = as.numeric(unlist(lcc_verts[,16:21])) +idxs = which(!is.na(vic_days)) %% vcount(lcc) +vic_days = vic_days[!is.na(vic_days)] +n.infected = sum(lcc_verts$vic) +birthyears = as.numeric(format(as.Date(lcc_verts$dob[idxs]),'%Y')) # find all pairs of vic neighbors to check -vic.nbrs = matrix(NA,14885,2) +## nbrs = neighborhood(lcc,order=1,nodes=vic_ids); save(nbrs,file='Raw Data/vic-nbrs.RData') +load('Raw Data/vic-nbrs.RData') +vic.nbrs = matrix(NA,9568,2) idx = 1 -for (i in 1:n.infections){ +for (i in 1:n.infected){ u = vic_ids[i] - nbhd = nbrs[[i]] + nbhd = as.numeric(nbrs[[i]]) nbhd = intersect(vic_ids,nbhd) nbhd = nbhd[u<nbhd] # only looks at pairs with u<v to avoid double-counting - nbhd = setdiff(nbhd,u) - nbhd = nbhd[hyp_lcc_verts$ir_no[u] != hyp_lcc_verts$ir_no[nbhd]] # don't count infections of the same person if (length(nbhd)==0) next - vic.nbrs[idx:(idx+length(nbhd)-1),] = cbind(rep(match(u,vic_ids),length(nbhd)),match(nbhd,vic_ids)) + vic.nbrs[idx:(idx+length(nbhd)-1),] = cbind(rep(u,length(nbhd)),nbhd) idx = idx + length(nbhd) } # run simulation n = 10000 -mean.time = rep(NA,n) -med.time = rep(NA,n) -mean.30 = rep(NA,n) -mean.50 = rep(NA,n) -n.vicpairs = rep(NA,n) - -hyp_lcc = upgrade_graph(hyp_lcc) ptm = proc.time() -for(q in 1:n){ - if (q%%250==0) print(paste('run:',q)) - graph = hyp_lcc - vics = vic_ids - +simdata = foreach(q=1:n, .combine=rbind) %dopar% { # shuffle infection dates - sim.dates = rep(0,length(vic_ids)) + sim.dates = rep(0,length(vic_days)) for(y in unique(birthyears)){ year_ids = which(birthyears==y) - if (length(year_ids)>1) sim.dates[year_ids] = sample(inf.dates[year_ids]) + if (length(year_ids)>1) sim.dates[year_ids] = sample(vic_days[year_ids]) } -# vics = vic_ids; sim.dates = inf.dates # data +# sim.dates = vic_days # data - vic.time = abs(sim.dates[vic.nbrs[,1]] - sim.dates[vic.nbrs[,2]]) - - n.vicpairs[q] = length(vic.time) - mean.time[q] = mean(vic.time) - med.time[q] = median(vic.time) - mean.30[q] = mean(vic.time<=30) - mean.50[q] = mean(vic.time<=50) - # decay.rate[sim,q] = getDecayRate(vic.time) #get lm of histogram. or maybe just cor? + vic.dt = rep(0,dim(vic.nbrs)[1]) + for(i in 1:dim(vic.nbrs)[1]){ + mindt = 5000 + tus = sim.dates[which(vic.nbrs[i,1]==idxs)] + tvs = sim.dates[which(vic.nbrs[i,2]==idxs)] + for(tu in tus){ + for(tv in tvs){ + dt = abs(tu-tv) + if(dt<mindt) mindt = dt + } + } + vic.dt[i] = mindt + } + +# mean.time[q] = mean(vic.dt) +# med.time[q] = median(vic.dt) +# mean.30[q] = mean(vic.dt<=30) +# mean.60[q] = mean(vic.dt<=60) + return(c(mean(vic.dt),median(vic.dt),mean(vic.dt<=30),mean(vic.dt<=100))) } print(proc.time()-ptm) - -# save(mean.time,med.time,mean.30,mean.50,file='Results/sims-Aug28.RData') +simdata = as.data.frame(simdata,row.names=1:dim(simdata)[1]) +colnames(simdata)=c('mean.time','med.time','mean.30','mean.100') +# save(simdata,file='Results/sims-Sept21.RData') ####### plot simulation data ####### -data = 0.1179039 # 778.2056, 638, 0.05105811, 0.07248908 -simdata = mean.100 -xl = c(0.95*min(min(simdata),data),1.05*max(max(simdata),data)) -yl = c(0,max(hist(simdata,25)$density)) -hist(simdata,25,xlim=xl,ylim=yl,col=rgb(0,1,0,1/4),freq=F, - xlab='Neighbor Infections Within 100 Days', main=NULL) -abline(v=data,lwd=4,col='red') -box() - +# mean=662.6, med=488, m30=0.076, m60=0.119, m100=0.167 +d = c(662.6,488,0.076,0.167) +xlabs = c('Mean time between infections', + 'Median time between infections', + 'Infections within 30 days', + 'Infections within 100 days') +par(mfrow=c(2,2)) +for(i in 1:4){ + data = d[i] + sdata = simdata[,i] + xl = c(0.99*min(min(sdata),data),1.01*max(max(sdata),data)) + h = hist(sdata,50,xlim=xl,col='#1f78b4',freq=T,border=NA,axes=F, + xlab=xlabs[i],ylab='Relative Frequency',main=NULL) + axis(1,at=pretty(xl,5)) + axis(2,at=pretty(h$counts,3),lab=pretty(h$counts/n,3)) + abline(v=data,lwd=4,col='#e41a1c') + box(lwd=1.1) +} ##### inf dates histogram -dates = c(V(lcc)$fatal_date[!is.na(V(lcc)$fatal_date)], - V(lcc)$nonfatal_date_1[!is.na(V(lcc)$nonfatal_date_1)], - V(lcc)$nonfatal_date_2[!is.na(V(lcc)$nonfatal_date_2)], - V(lcc)$nonfatal_date_3[!is.na(V(lcc)$nonfatal_date_3)], - V(lcc)$nonfatal_date_4[!is.na(V(lcc)$nonfatal_date_4)], - V(lcc)$nonfatal_date_5[!is.na(V(lcc)$nonfatal_date_5)]) -dates = as.Date(dates) -hist(dates, breaks='months',freq=T,col='lightblue',format='%m/%Y', +vic_dates = as.Date(unlist(lcc_verts[,10:15])) +vic_dates = vic_dates[!is.na(vic_dates)] +hist(vic_dates, breaks='months',freq=T,col='lightblue',format='%Y', xlab='Month',ylab='Number of Shootings',main='') ### data vs sim hist comparison |
