diff options
| -rwxr-xr-x | R Scripts/analyze-cascades.R | 2 | ||||
| -rw-r--r-- | R Scripts/fit-background.R | 8 | ||||
| -rw-r--r-- | R Scripts/plot-network.R | 19 | ||||
| -rwxr-xr-x | R Scripts/sim-analysis.R | 121 | ||||
| -rw-r--r-- | supplements/background.png | bin | 120272 -> 0 bytes |
5 files changed, 88 insertions, 62 deletions
diff --git a/R Scripts/analyze-cascades.R b/R Scripts/analyze-cascades.R index 9a47ece..f03af36 100755 --- a/R Scripts/analyze-cascades.R +++ b/R Scripts/analyze-cascades.R @@ -43,7 +43,7 @@ start_date + range(times) cb = brewer.pal(3,'Paired') cols = rep('#1f78b4',vcount(cc)) seed = which(degree(cc,mode='in')==0) -cols[seed] = '#d95f02' +cols[seed] = '#e41a1c' plot(cc,vertex.size=8,edge.arrow.size=0.2,vertex.color=cols,vertex.label=NA, layout=layout.reingold.tilford(cc,root=seed), edge.color='black',vertex.frame.color=NA) diff --git a/R Scripts/fit-background.R b/R Scripts/fit-background.R index 06e91c9..5bc1ce0 100644 --- a/R Scripts/fit-background.R +++ b/R Scripts/fit-background.R @@ -7,14 +7,16 @@ load('Raw Data/lcc.RData') vic_days = as.numeric(unlist(lcc_verts[,16:21])) vic_days = vic_days[!is.na(vic_days)] -hist(as.numeric(vic_days,2000,col='lightblue', +hist(vic_days,2000,col='lightblue', xlab='Day of Study Period',main='Infections During the Study Period') # get infection counts per day days = 1:3012 -t = table(factor(hyp_lcc_verts$vic.day[vic_ids],levels=days)) +t = table(factor(vic_days,levels=days)) counts = as.vector(t) -infs = data.frame(days,counts) +start_date = as.Date("2005-12-31") +dates = start_date + days +infs = data.frame(days,counts,dates) # define background function fit = function(x, lambda, A, phi) {lambda + A*(sin((2*pi/365.24)*x+phi))} diff --git a/R Scripts/plot-network.R b/R Scripts/plot-network.R new file mode 100644 index 0000000..38e241d --- /dev/null +++ b/R Scripts/plot-network.R @@ -0,0 +1,19 @@ +library(igraph) +setwd('~/Documents/Violence Cascades/') +load('Raw Data/lcc.RData') + +non_vic_ids = setdiff(1:vcount(lcc),vic_ids) +perm = rep(0,vcount(lcc)) +perm[non_vic_ids] = 1:length(non_vic_ids) +perm[vic_ids] = (length(non_vic_ids)+1):vcount(lcc) +lcc2 = permute(lcc,perm) +vic_ids2 = which(V(lcc2)$vic==T) + +cols = rep('#1f78b4',vcount(lcc)) +cols[vic_ids2] = '#e41a1c' + +layout.drl = layout_with_drl(lcc2,,weights=NULL) +save(layout.drl,file='Results/layout.RData') + +plot(lcc,vertex.size=1,vertex.color=cols,vertex.label=NA, + layout=layout.drl,edge.color=NA,vertex.frame.color=NA) diff --git a/R Scripts/sim-analysis.R b/R Scripts/sim-analysis.R index 92c3d26..e56f575 100755 --- a/R Scripts/sim-analysis.R +++ b/R Scripts/sim-analysis.R @@ -1,89 +1,94 @@ library(igraph) setwd("~/Documents/Violence Cascades/") +load('Raw Data/lcc.RData') +library(foreach) +library(doMC) +registerDoMC(cores=4) -load('Results/hyper-lcc.RData') -vic_ids = which(V(hyp_lcc)$vic==1) -n.infections = length(vic_ids) -n.days = max(hyp_lcc_verts$vic.day,na.rm=T) - min(hyp_lcc_verts$vic.day,na.rm=T) -inf.dates = hyp_lcc_verts$vic.day[vic_ids] - -birthyears = as.numeric(format(as.Date(hyp_lcc_verts$dob[vic_ids]),'%Y')) - -# nbrs = neighborhood(graph, nodes=vic_ids, order=1) -load('Results/vic-nbrs.RData') +# gather data +vic_days = as.numeric(unlist(lcc_verts[,16:21])) +idxs = which(!is.na(vic_days)) %% vcount(lcc) +vic_days = vic_days[!is.na(vic_days)] +n.infected = sum(lcc_verts$vic) +birthyears = as.numeric(format(as.Date(lcc_verts$dob[idxs]),'%Y')) # find all pairs of vic neighbors to check -vic.nbrs = matrix(NA,14885,2) +## nbrs = neighborhood(lcc,order=1,nodes=vic_ids); save(nbrs,file='Raw Data/vic-nbrs.RData') +load('Raw Data/vic-nbrs.RData') +vic.nbrs = matrix(NA,9568,2) idx = 1 -for (i in 1:n.infections){ +for (i in 1:n.infected){ u = vic_ids[i] - nbhd = nbrs[[i]] + nbhd = as.numeric(nbrs[[i]]) nbhd = intersect(vic_ids,nbhd) nbhd = nbhd[u<nbhd] # only looks at pairs with u<v to avoid double-counting - nbhd = setdiff(nbhd,u) - nbhd = nbhd[hyp_lcc_verts$ir_no[u] != hyp_lcc_verts$ir_no[nbhd]] # don't count infections of the same person if (length(nbhd)==0) next - vic.nbrs[idx:(idx+length(nbhd)-1),] = cbind(rep(match(u,vic_ids),length(nbhd)),match(nbhd,vic_ids)) + vic.nbrs[idx:(idx+length(nbhd)-1),] = cbind(rep(u,length(nbhd)),nbhd) idx = idx + length(nbhd) } # run simulation n = 10000 -mean.time = rep(NA,n) -med.time = rep(NA,n) -mean.30 = rep(NA,n) -mean.50 = rep(NA,n) -n.vicpairs = rep(NA,n) - -hyp_lcc = upgrade_graph(hyp_lcc) ptm = proc.time() -for(q in 1:n){ - if (q%%250==0) print(paste('run:',q)) - graph = hyp_lcc - vics = vic_ids - +simdata = foreach(q=1:n, .combine=rbind) %dopar% { # shuffle infection dates - sim.dates = rep(0,length(vic_ids)) + sim.dates = rep(0,length(vic_days)) for(y in unique(birthyears)){ year_ids = which(birthyears==y) - if (length(year_ids)>1) sim.dates[year_ids] = sample(inf.dates[year_ids]) + if (length(year_ids)>1) sim.dates[year_ids] = sample(vic_days[year_ids]) } -# vics = vic_ids; sim.dates = inf.dates # data +# sim.dates = vic_days # data - vic.time = abs(sim.dates[vic.nbrs[,1]] - sim.dates[vic.nbrs[,2]]) - - n.vicpairs[q] = length(vic.time) - mean.time[q] = mean(vic.time) - med.time[q] = median(vic.time) - mean.30[q] = mean(vic.time<=30) - mean.50[q] = mean(vic.time<=50) - # decay.rate[sim,q] = getDecayRate(vic.time) #get lm of histogram. or maybe just cor? + vic.dt = rep(0,dim(vic.nbrs)[1]) + for(i in 1:dim(vic.nbrs)[1]){ + mindt = 5000 + tus = sim.dates[which(vic.nbrs[i,1]==idxs)] + tvs = sim.dates[which(vic.nbrs[i,2]==idxs)] + for(tu in tus){ + for(tv in tvs){ + dt = abs(tu-tv) + if(dt<mindt) mindt = dt + } + } + vic.dt[i] = mindt + } + +# mean.time[q] = mean(vic.dt) +# med.time[q] = median(vic.dt) +# mean.30[q] = mean(vic.dt<=30) +# mean.60[q] = mean(vic.dt<=60) + return(c(mean(vic.dt),median(vic.dt),mean(vic.dt<=30),mean(vic.dt<=100))) } print(proc.time()-ptm) - -# save(mean.time,med.time,mean.30,mean.50,file='Results/sims-Aug28.RData') +simdata = as.data.frame(simdata,row.names=1:dim(simdata)[1]) +colnames(simdata)=c('mean.time','med.time','mean.30','mean.100') +# save(simdata,file='Results/sims-Sept21.RData') ####### plot simulation data ####### -data = 0.1179039 # 778.2056, 638, 0.05105811, 0.07248908 -simdata = mean.100 -xl = c(0.95*min(min(simdata),data),1.05*max(max(simdata),data)) -yl = c(0,max(hist(simdata,25)$density)) -hist(simdata,25,xlim=xl,ylim=yl,col=rgb(0,1,0,1/4),freq=F, - xlab='Neighbor Infections Within 100 Days', main=NULL) -abline(v=data,lwd=4,col='red') -box() - +# mean=662.6, med=488, m30=0.076, m60=0.119, m100=0.167 +d = c(662.6,488,0.076,0.167) +xlabs = c('Mean time between infections', + 'Median time between infections', + 'Infections within 30 days', + 'Infections within 100 days') +par(mfrow=c(2,2)) +for(i in 1:4){ + data = d[i] + sdata = simdata[,i] + xl = c(0.99*min(min(sdata),data),1.01*max(max(sdata),data)) + h = hist(sdata,50,xlim=xl,col='#1f78b4',freq=T,border=NA,axes=F, + xlab=xlabs[i],ylab='Relative Frequency',main=NULL) + axis(1,at=pretty(xl,5)) + axis(2,at=pretty(h$counts,3),lab=pretty(h$counts/n,3)) + abline(v=data,lwd=4,col='#e41a1c') + box(lwd=1.1) +} ##### inf dates histogram -dates = c(V(lcc)$fatal_date[!is.na(V(lcc)$fatal_date)], - V(lcc)$nonfatal_date_1[!is.na(V(lcc)$nonfatal_date_1)], - V(lcc)$nonfatal_date_2[!is.na(V(lcc)$nonfatal_date_2)], - V(lcc)$nonfatal_date_3[!is.na(V(lcc)$nonfatal_date_3)], - V(lcc)$nonfatal_date_4[!is.na(V(lcc)$nonfatal_date_4)], - V(lcc)$nonfatal_date_5[!is.na(V(lcc)$nonfatal_date_5)]) -dates = as.Date(dates) -hist(dates, breaks='months',freq=T,col='lightblue',format='%m/%Y', +vic_dates = as.Date(unlist(lcc_verts[,10:15])) +vic_dates = vic_dates[!is.na(vic_dates)] +hist(vic_dates, breaks='months',freq=T,col='lightblue',format='%Y', xlab='Month',ylab='Number of Shootings',main='') ### data vs sim hist comparison diff --git a/supplements/background.png b/supplements/background.png Binary files differdeleted file mode 100644 index 79c3895..0000000 --- a/supplements/background.png +++ /dev/null |
