From 3969a594c49e58aafe04ff352b02d0d61eb228cf Mon Sep 17 00:00:00 2001 From: Ben Green Date: Tue, 29 Sep 2015 18:36:38 -0400 Subject: finalizing plots! --- R Scripts/analyze-cascades.R | 7 +++-- R Scripts/arrest-vs-infection-times.R | 35 ++++++++++++++++++++++- R Scripts/data-exploration.R | 6 ++-- R Scripts/fit-background.R | 5 ++-- R Scripts/plot-crime-data.R | 54 +++++++++++++++++++++++++++++++++++ R Scripts/plot-network.R | 4 +-- R Scripts/sim-analysis.R | 12 ++++---- 7 files changed, 107 insertions(+), 16 deletions(-) create mode 100644 R Scripts/plot-crime-data.R (limited to 'R Scripts') diff --git a/R Scripts/analyze-cascades.R b/R Scripts/analyze-cascades.R index 02f9349..8bbb598 100755 --- a/R Scripts/analyze-cascades.R +++ b/R Scripts/analyze-cascades.R @@ -12,9 +12,10 @@ counts = rep(0,max(sizes)) counts[data$V1] = data$V2 pl = power.law.fit(counts) plot(sizes,counts,log='xy',pch=20,col='#1f78b4',xaxt='n',yaxt='n', - xlab='Size of Cascade', ylab='Number of Cascades', main='') -axis(1,at=c(1,5,10,50,100,500)) -axis(2,at=c(1,10,100,1000)) + xlab='Size of cascade', ylab='Number of cascades', main='', + cex.axis=0.6,cex.lab=0.6) +axis(1,at=c(1,5,10,50,100,500),cex.axis=0.6) +axis(2,at=c(1,10,100,1000),cex.axis=0.6) sizes_pl = displ$new(counts[counts>1]) est = estimate_xmin(sizes_pl) diff --git a/R Scripts/arrest-vs-infection-times.R b/R Scripts/arrest-vs-infection-times.R index 222be3e..7c0bfcf 100644 --- a/R Scripts/arrest-vs-infection-times.R +++ b/R Scripts/arrest-vs-infection-times.R @@ -1,6 +1,38 @@ library(igraph) -setwd('~/Documents/Cascade Project/Raw Data/') +setwd('~/Documents/Violence Cascades/Raw Data/') + +person_verts = get.data.frame(person,'vertices') +vics = person_verts$vic +vic_verts = person_verts[vics,14:19] +first_vics = apply(vic_verts,1,min,na.rm=T) + + +load('arrests.RData') +arr = arrests[order(arrests$date2),] +arr = arr[match(unique(arr$ir2),arr$ir2),] +arr = arr[match(rownames(person_verts),arr$ir2),] +arr = arr[vics,] +offset = 16802 +first_arrests = as.numeric(arr$date2) - offset + +lag = first_vics - first_arrests +hist(lag,100,col='') +h = hist(lag,150,col='#1f78b4',border=NA,axes=T, + xlab='',ylab='Frequency',main=NULL) +box(lwd=1.1) + + + + + + + + + + + +#### OLD VERSION ##### Load arrest data arrests <- read.csv("2006to2014arrests2.csv", header=T, colClass=c("character")) arrests$ir2 <- paste("ir", arrests$ir_no) @@ -41,3 +73,4 @@ murders = murders[murders$ir2 %in% arrests$ir2,] mur_dates = murders$INJURY_DATE arrest_dates = sub.arrests$dates[match(murders$ir2, sub.arrests$individuals)] sum(mur_dates==arrest_dates) + diff --git a/R Scripts/data-exploration.R b/R Scripts/data-exploration.R index 16e7bba..f8f6892 100755 --- a/R Scripts/data-exploration.R +++ b/R Scripts/data-exploration.R @@ -1,6 +1,6 @@ library(igraph) setwd("~/Documents/Violence Cascades/") -load('Results/hyper-lcc.RData') +load('Raw Data/lcc.RData') d = remove.edge.attribute(person,'weight') lcc = induced.subgraph(d,which(clusters(d)$membership==which.max(clusters(d)$csize))) @@ -29,8 +29,8 @@ S ##### Degree Distribution plot(1:max(degree(lcc)),degree.distribution(lcc)[-1]*vcount(lcc), - log='xy',col='#377EB8',pch=20, - xlab='Degree', ylab='Number of Vertices', main='') + log='xy',col='#1f78b4',pch=20,cex.axis=0.6,cex.lab=0.6, + xlab='Degree', ylab='Number of vertices', main='') pl = power.law.fit(degree.distribution(lcc)) ##### Victims diff --git a/R Scripts/fit-background.R b/R Scripts/fit-background.R index a5f935b..49fb64d 100644 --- a/R Scripts/fit-background.R +++ b/R Scripts/fit-background.R @@ -30,8 +30,9 @@ res = nls(formula=fit_form, data=infs, start=list(lambda=3, A=2, phi=4)) co = coef(res); co plot(t) -plot(days,counts,pch=20,cex=0.5,col='#1f78b4', - xlab='Day',ylab='Number of Infections') +plot(days,counts,pch=20,cex=0.5,col='#1f78b4',xaxt='n', + xlab='',ylab='Number of shootings',cex.axis=0.6,cex.lab=0.6) curve(fit(x, lambda=co["lambda"], A=co["A"], phi=co["phi"]), add=TRUE ,lwd=3, col="#1a9850") +axis(1,at=seq(1,max(days),365),lab=2006:2014,cex.axis=.6) diff --git a/R Scripts/plot-crime-data.R b/R Scripts/plot-crime-data.R new file mode 100644 index 0000000..13800cc --- /dev/null +++ b/R Scripts/plot-crime-data.R @@ -0,0 +1,54 @@ +################### +# load crime data +library(xlsx) +setwd("~/Documents/Violence Cascades/Raw Data/") +data = read.xlsx('Chicago-crime-data.xlsx',1) + +# crime rate +plot(data$Year,data$Index.Rate.per.100.000,type='l',col='#1f78b4',lwd=2, + xlab='',ylab='Rate per 100,000',cex.lab=.6,cex.axis=.6) + +# homocide rate +plot(data$Year,data$Homicide.Rate,type='l',col='#1f78b4',lwd=2, + xlab='',ylab='Rate per 100,000',cex.lab=.6,cex.axis=.6) + +################### +# load shootings data +# for lcc +load('lcc.RData') +vic_dates = as.Date(unlist(lcc_verts[,10:15])) +vic_dates = vic_dates[!is.na(vic_dates)] +vdh = hist(vic_dates, breaks='months') + +plot(vdh$mids,vdh$counts,type='l',col='#1f78b4',lwd=2, + xlab='',ylab='Shootings',xaxt='n',cex.lab=.6,cex.axis=.6) +axis(1,at=vdh$breaks[seq(1,102,12)], + lab=2006:2014,cex.axis=.6) + + +# for all recorded shootings +shootings <- read.csv("shooting-data-withdate2.csv", header = T) +victims = shootings[shootings$INV_PARTY_TYPE_CD=="VIC",] +victims = victims[!is.na(victims$IR_NO),] +victims$ir2 <- paste("ir", victims$IR_NO) + +# get murder victim attributes +murders = read.csv("murder-victims-13nov.csv", header=T) +murders = murders[!is.na(murders$VICTIM_IR_NO),] +murders = murders[murders$INJURY_DESCR=="SHOT",] +murders = murders[match(unique(murders$VICTIM_IR_NO),murders$VICTIM_IR_NO),] +murders = murders[as.Date(murders$INJURY_DATE,format='%m/%d/%y')>=start_date,] +murders$ir2 = paste("ir", murders$VICTIM_IR_NO) + +# clear nonfatals that led to death +v = victims[victims$IR_NO %in% murders$VICTIM_IR_NO,] +rows = c() +for(i in 1:dim(v)[1]){ + row = which(rownames(victims)==as.numeric(rownames(v[i,]))) + m = murders[murders$VICTIM_IR_NO==v$IR_NO[i],] + dup = as.Date(v$INCIDENT_DATE[i],format='%m/%d/%y') %in% as.Date(m$INJURY_DATE,format='%m/%d/%y') + if(dup==T) rows = c(rows,row) +} +victims = victims[-rows,] + +vic_dates = c(as.Date(murders$INJURY_DATE,format='%m/%d/%y'),as.Date(victims$INCIDENT_DATE,format='%m/%d/%y')) \ No newline at end of file diff --git a/R Scripts/plot-network.R b/R Scripts/plot-network.R index 93c48fb..1339dd6 100644 --- a/R Scripts/plot-network.R +++ b/R Scripts/plot-network.R @@ -21,5 +21,5 @@ plot(lcc,vertex.size=1,vertex.color=cols,vertex.label=NA, layout=layout.drl,edge.color=NA,vertex.frame.color=NA) # library(rgl) -rglplot(lcc,vertex.size=1,vertex.color=cols,vertex.label=NA, - layout=layout.drl,edge.color=NA,vertex.frame.color=NA) +# rglplot(lcc,vertex.size=1,vertex.color=cols,vertex.label=NA, +# layout=layout.drl,edge.color=NA,vertex.frame.color=NA) diff --git a/R Scripts/sim-analysis.R b/R Scripts/sim-analysis.R index e56f575..51659f5 100755 --- a/R Scripts/sim-analysis.R +++ b/R Scripts/sim-analysis.R @@ -68,18 +68,20 @@ colnames(simdata)=c('mean.time','med.time','mean.30','mean.100') ####### plot simulation data ####### # mean=662.6, med=488, m30=0.076, m60=0.119, m100=0.167 d = c(662.6,488,0.076,0.167) -xlabs = c('Mean time between infections', - 'Median time between infections', +xlabs = c('Mean days between infections', + 'Median days between infections', 'Infections within 30 days', 'Infections within 100 days') +xlims = matrix(c(660,765,475,625,0.03,0.08,0.1,0.18),ncol=2,byrow=T) par(mfrow=c(2,2)) for(i in 1:4){ data = d[i] sdata = simdata[,i] - xl = c(0.99*min(min(sdata),data),1.01*max(max(sdata),data)) + xl = xlims[i,] h = hist(sdata,50,xlim=xl,col='#1f78b4',freq=T,border=NA,axes=F, - xlab=xlabs[i],ylab='Relative Frequency',main=NULL) - axis(1,at=pretty(xl,5)) + xlab=xlabs[i],ylab='Relative frequency',main=NULL) + if(i %in% c(1,2)) axis(1,at=pretty(xl,5)) + if(i %in% c(3,4)) axis(1,at=pretty(xl,5),lab=paste(pretty(xl,5)*100,'%',sep='')) axis(2,at=pretty(h$counts,3),lab=pretty(h$counts/n,3)) abline(v=data,lwd=4,col='#e41a1c') box(lwd=1.1) -- cgit v1.2.3-70-g09d2