diff options
| author | Ben Green <bgreen@g.harvard.edu> | 2015-10-04 17:18:05 -0400 |
|---|---|---|
| committer | Ben Green <bgreen@g.harvard.edu> | 2015-10-04 17:18:05 -0400 |
| commit | 75868035a6e363c2ad67ff0342440aff3972a970 (patch) | |
| tree | 5bf65d9f3d5fa47dda8c1695ce34aa24e0f811dd /R Scripts | |
| parent | 3969a594c49e58aafe04ff352b02d0d61eb228cf (diff) | |
| download | criminal_cascades-75868035a6e363c2ad67ff0342440aff3972a970.tar.gz | |
..
Diffstat (limited to 'R Scripts')
| -rwxr-xr-x | R Scripts/analyze-cascades.R | 1 | ||||
| -rw-r--r-- | R Scripts/arrest-vs-infection-times.R | 8 | ||||
| -rwxr-xr-x | R Scripts/data-exploration.R | 14 | ||||
| -rwxr-xr-x | R Scripts/data-prep.R | 21 | ||||
| -rw-r--r-- | R Scripts/plot-crime-data.R | 29 | ||||
| -rw-r--r-- | R Scripts/plot-data-prep.R | 16 | ||||
| -rw-r--r-- | R Scripts/predict-victims.R | 8 | ||||
| -rwxr-xr-x | R Scripts/sim-analysis.R | 17 |
8 files changed, 79 insertions, 35 deletions
diff --git a/R Scripts/analyze-cascades.R b/R Scripts/analyze-cascades.R index 8bbb598..affd77a 100755 --- a/R Scripts/analyze-cascades.R +++ b/R Scripts/analyze-cascades.R @@ -17,6 +17,7 @@ plot(sizes,counts,log='xy',pch=20,col='#1f78b4',xaxt='n',yaxt='n', axis(1,at=c(1,5,10,50,100,500),cex.axis=0.6) axis(2,at=c(1,10,100,1000),cex.axis=0.6) +mean(rep(sizes,counts)) sizes_pl = displ$new(counts[counts>1]) est = estimate_xmin(sizes_pl) sizes_pl$setXmin(est) diff --git a/R Scripts/arrest-vs-infection-times.R b/R Scripts/arrest-vs-infection-times.R index 7c0bfcf..4d2a81d 100644 --- a/R Scripts/arrest-vs-infection-times.R +++ b/R Scripts/arrest-vs-infection-times.R @@ -19,11 +19,13 @@ first_arrests = as.numeric(arr$date2) - offset lag = first_vics - first_arrests hist(lag,100,col='') h = hist(lag,150,col='#1f78b4',border=NA,axes=T, - xlab='',ylab='Frequency',main=NULL) + xlab='Days between first arrest and first gunshot victimization', + ylab='Frequency',main=NULL,cex.axis=0.6,cex.lab=0.6) box(lwd=1.1) - - +dobs = as.Date(arr$birth_date,format='%m/%d/%Y') +ages = as.numeric(start_date + first_vics - dobs) +plot(table(round(ages/365))) diff --git a/R Scripts/data-exploration.R b/R Scripts/data-exploration.R index f8f6892..22ec60c 100755 --- a/R Scripts/data-exploration.R +++ b/R Scripts/data-exploration.R @@ -33,6 +33,17 @@ plot(1:max(degree(lcc)),degree.distribution(lcc)[-1]*vcount(lcc), xlab='Degree', ylab='Number of vertices', main='') pl = power.law.fit(degree.distribution(lcc)) +##### small-worldness of lcc +trg = transitivity(lcc,type='global') +trl = mean(transitivity(lcc,type='local',isolates='zero')) +apl = average.path.length(lcc) + +erg = erdos.renyi.game(n=vcount(lcc),p.or.m=ecount(lcc),type='gnm') +transitivity(erg,type='global') +mean(transitivity(erg,type='local',isolates='zero')) +average.path.length(erg) + + ##### Victims vic_ids = which(V(lcc)$vic==TRUE) non_vic_ids = which(V(lcc)$vic==FALSE) @@ -53,3 +64,6 @@ sum(sum(!is.na(lcc_verts$fatal_day)), sum(!is.na(lcc_verts$nonfatal_day_3)), sum(!is.na(lcc_verts$nonfatal_day_4)), sum(!is.na(lcc_verts$nonfatal_day_5))) + + + diff --git a/R Scripts/data-prep.R b/R Scripts/data-prep.R index ca2fdc2..a6c31e6 100755 --- a/R Scripts/data-prep.R +++ b/R Scripts/data-prep.R @@ -86,15 +86,30 @@ for (i in 1:dim(a)[1]){ ir = a$ir_no[i] arr = arrests[arrests$ir_no==ir,] arr = arr[arr$sex_code_cd != 'X',] - if(dim(arr)[1]>0){ + if(dim(arr)[1]>0){# need to match rownames like districts? arrests$sex_code_cd[as.numeric(rownames(a[i,]))] = names(which.max(table(arr$sex_code_cd))) } } arrests$sex_code_cd[arrests$sex_code_cd=='X'] = 'M' +##### residential districts +arrests$o_district[arrests$o_district=='31'] = '' +a = arrests[arrests$o_district=='' & arrests$o_city=='CHICAGO',] +for (i in 1:dim(a)[1]){ + if(i%%200==0) print(i) + ir = a$ir_no[i] + arr = arrests[arrests$ir_no==ir,] + arr = arr[arr$o_district != '',] + if(dim(arr)[1]>0){ + arrests$o_district[match(as.numeric(rownames(a[i,])),rownames(arrests))] = names(which.max(table(arr$o_district))) + } +} +arrests$o_district[arrests$o_district==''] = 0 +# lcc_verts$district = arrests$o_district[match(lcc_verts$ir_no,arrests$ir2)] +# V(lcc)$district = arrests$o_district[match(lcc_verts$ir_no,arrests$ir2)] + #I need to add the "ir" for this to make sense when I "project" arrests$ir2 <- paste("ir", arrests$ir_no) - # save altered arrests data save(arrests,file='arrests.RData') @@ -180,7 +195,7 @@ murders = read.csv("murder-victims-13nov.csv", header=T) murders = murders[!is.na(murders$VICTIM_IR_NO),] murders = murders[murders$INJURY_DESCR=="SHOT",] murders = murders[match(unique(murders$VICTIM_IR_NO),murders$VICTIM_IR_NO),] -murders = murders[as.Date(murders$INJURY_DATE,format='%m/%d/%y')>=start_date,] +murders = murders[as.Date(murders$INJURY_DATE,format='%m/%d/%y')>start_date,] murders$ir2 = paste("ir", murders$VICTIM_IR_NO) # clear nonfatals that led to death diff --git a/R Scripts/plot-crime-data.R b/R Scripts/plot-crime-data.R index 13800cc..7bb768a 100644 --- a/R Scripts/plot-crime-data.R +++ b/R Scripts/plot-crime-data.R @@ -15,17 +15,10 @@ plot(data$Year,data$Homicide.Rate,type='l',col='#1f78b4',lwd=2, ################### # load shootings data # for lcc -load('lcc.RData') -vic_dates = as.Date(unlist(lcc_verts[,10:15])) -vic_dates = vic_dates[!is.na(vic_dates)] -vdh = hist(vic_dates, breaks='months') - -plot(vdh$mids,vdh$counts,type='l',col='#1f78b4',lwd=2, - xlab='',ylab='Shootings',xaxt='n',cex.lab=.6,cex.axis=.6) -axis(1,at=vdh$breaks[seq(1,102,12)], - lab=2006:2014,cex.axis=.6) - - +# load('lcc.RData') +# vic_dates = as.Date(unlist(lcc_verts[,10:15])) +# vic_dates = vic_dates[!is.na(vic_dates)] +start_date = as.Date("2005-12-31") # for all recorded shootings shootings <- read.csv("shooting-data-withdate2.csv", header = T) victims = shootings[shootings$INV_PARTY_TYPE_CD=="VIC",] @@ -37,7 +30,7 @@ murders = read.csv("murder-victims-13nov.csv", header=T) murders = murders[!is.na(murders$VICTIM_IR_NO),] murders = murders[murders$INJURY_DESCR=="SHOT",] murders = murders[match(unique(murders$VICTIM_IR_NO),murders$VICTIM_IR_NO),] -murders = murders[as.Date(murders$INJURY_DATE,format='%m/%d/%y')>=start_date,] +murders = murders[as.Date(murders$INJURY_DATE,format='%m/%d/%y')>start_date,] murders$ir2 = paste("ir", murders$VICTIM_IR_NO) # clear nonfatals that led to death @@ -51,4 +44,14 @@ for(i in 1:dim(v)[1]){ } victims = victims[-rows,] -vic_dates = c(as.Date(murders$INJURY_DATE,format='%m/%d/%y'),as.Date(victims$INCIDENT_DATE,format='%m/%d/%y'))
\ No newline at end of file +vic_dates = c(as.Date(murders$INJURY_DATE,format='%m/%d/%y'),as.Date(victims$INCIDENT_DATE,format='%m/%d/%y')) + +vdh = hist(vic_dates, breaks='months') + +plot(vdh$mids[1:99],vdh$counts[1:99],type='l',col='#1f78b4',lwd=2,ylim=c(11,289), + xlab='',ylab='Shootings',xaxt='n',cex.lab=.6,cex.axis=.6) +axis(1,at=vdh$breaks[seq(1,100,12)], + lab=2006:2014,cex.axis=.6) + +plot(colMeans(matrix(vdh$counts[1:96],ncol=12,byrow=T))) + diff --git a/R Scripts/plot-data-prep.R b/R Scripts/plot-data-prep.R index 27c2416..b91d8de 100644 --- a/R Scripts/plot-data-prep.R +++ b/R Scripts/plot-data-prep.R @@ -28,7 +28,8 @@ plot.new() vps <- baseViewports() pushViewport(vps$figure) vp1 <-plotViewport() -grid.table(data) +theme = ttheme_default(core=list(bg_params = list(fill = c("grey95")))) +grid.table(data,theme = theme) # (B) bipartite person-event graph layout = matrix(c(rep(seq(4,0,-1),2),rep(0,5),rep(1,5)),ncol=2) @@ -36,7 +37,8 @@ labels = c( 1:5, toupper(letters[1:5]) ) cols = c( rep('#1a9850',5), rep('#1f78b4',5) ) plot(g, layout=layout[,2:1], vertex.color=cols,edge.color='black', vertex.frame.color=NA,vertex.size=30,vertex.label=labels, - vertex.label.color='white',vertex.label.family='sans') + vertex.label.color='white',vertex.label.family='sans', + vertex.label.font=2,vertex.label.cex=1.25) # (C) unipartite person-person graph par(mar=rep(1,4)) @@ -44,13 +46,13 @@ g2 = bipartite.projection(g)$proj1 layout.g2 = matrix(c(0,.33,-.75,-.33,0,-1,.75,-.33,0,1),ncol=2,byrow=T) plot(g2, vertex.color='#1f78b4', edge.color='black',layout=layout.g2,rescale=F, vertex.frame.color=NA,vertex.size=30,vertex.label=toupper(letters[1:5]), - vertex.label.color='white',vertex.label.family='sans') + vertex.label.color='white',vertex.label.family='sans', + vertex.label.font=2,vertex.label.cex=1.25) ############################################################################## ############################################################################## #### Hawkes Process Diagram layout(matrix(c(1,2,3,3,3,3), ncol=2, byrow = TRUE)) -par(mar=c(1,1,1,1)) vics = data.frame(IC = toupper(letters[1:5]), Victim = c('TRUE','FALSE','FALSE','TRUE','FALSE'), @@ -62,15 +64,17 @@ plot.new() vps <- baseViewports() pushViewport(vps$figure) vp1 <-plotViewport() -grid.table(vics) +grid.table(vics,theme=theme) cols = rep('#1f78b4',vcount(g2)) cols[vics$Victim==T] = '#e41a1c' # B +# par(mar=c(1,1,1,1)) plot(g2, vertex.color=cols, edge.color='black',layout=layout.g2,rescale=F, vertex.frame.color=NA,vertex.size=30,vertex.label=toupper(letters[1:5]), - vertex.label.color='white',vertex.label.family='sans',axes=F) + vertex.label.color='white',vertex.label.family='sans',axes=F, + vertex.label.font=2,vertex.label.cex=1.25) # C diff --git a/R Scripts/predict-victims.R b/R Scripts/predict-victims.R index d17ef83..3cffcbe 100644 --- a/R Scripts/predict-victims.R +++ b/R Scripts/predict-victims.R @@ -22,7 +22,8 @@ edges_all = data.table(dag_dat_lcc) vic_ids = which(lcc_verts$vic==T) lcc_verts = lcc_verts[,c('id','ir_no','nonfatal_day_1','nonfatal_day_2', 'nonfatal_day_3','nonfatal_day_4','nonfatal_day_5', - 'fatal_day','sex','race','dob','gang.member','gang.name')] + 'fatal_day','sex','race','dob','gang.member', + 'gang.name','district')] lcc_verts$vic.day = NA lcc_verts$vic.day[vic_ids] = as.numeric(apply(vic_times_lcc[vic_ids,2:7],1,min,na.rm=T)) lcc_verts$age = FALSE @@ -35,10 +36,11 @@ vic_times_lcc$ir_no = lcc_verts$ir_no rownames(vic_times_lcc) = vic_times_lcc$ir_no ##### Initialize data -formula = vic ~ sex + race + age + gang.member + gang.name + arrests +formula = vic ~ sex + race + age + gang.member + gang.name + arrests + district lcc_verts$sex = as.factor(lcc_verts$sex) lcc_verts$race = as.factor(lcc_verts$race) lcc_verts$gang.name = as.factor(lcc_verts$gang.name) +lcc_verts$district = as.factor(lcc_verts$district) dt = data.table(ir=lcc_verts$ir_no, dem=0, cas=0, comb=0) ##### @@ -48,7 +50,7 @@ days = Reduce(union, list(lcc_verts$fatal_day,lcc_verts$nonfatal_day_1, days = days[!is.na(days)] days = sort(days) days = split(days, ceiling(seq_along(days)/92)) -lambdas = c(0, exp(seq(log(0.0001), log(0.01), length.out=100)), 1) +lambdas = c(0, exp(seq(log(0.00001), log(0.01), length.out=100)), 1) ##### Loop through days correct_rank = c() diff --git a/R Scripts/sim-analysis.R b/R Scripts/sim-analysis.R index 51659f5..39fd476 100755 --- a/R Scripts/sim-analysis.R +++ b/R Scripts/sim-analysis.R @@ -11,6 +11,7 @@ idxs = which(!is.na(vic_days)) %% vcount(lcc) vic_days = vic_days[!is.na(vic_days)] n.infected = sum(lcc_verts$vic) birthyears = as.numeric(format(as.Date(lcc_verts$dob[idxs]),'%Y')) +districts = as.numeric(lcc_verts$district[idxs]) # find all pairs of vic neighbors to check ## nbrs = neighborhood(lcc,order=1,nodes=vic_ids); save(nbrs,file='Raw Data/vic-nbrs.RData') @@ -33,16 +34,17 @@ n = 10000 ptm = proc.time() simdata = foreach(q=1:n, .combine=rbind) %dopar% { # shuffle infection dates - sim.dates = rep(0,length(vic_days)) + sim.dates = vic_days # data for(y in unique(birthyears)){ - year_ids = which(birthyears==y) - if (length(year_ids)>1) sim.dates[year_ids] = sample(vic_days[year_ids]) + for(d in unique(districts)){ + ids = which(birthyears==y & districts==d) + if (length(ids)>1) sim.dates[ids] = sample(vic_days[ids]) + } } -# sim.dates = vic_days # data vic.dt = rep(0,dim(vic.nbrs)[1]) for(i in 1:dim(vic.nbrs)[1]){ - mindt = 5000 + mindt = 50000 tus = sim.dates[which(vic.nbrs[i,1]==idxs)] tvs = sim.dates[which(vic.nbrs[i,2]==idxs)] for(tu in tus){ @@ -73,16 +75,17 @@ xlabs = c('Mean days between infections', 'Infections within 30 days', 'Infections within 100 days') xlims = matrix(c(660,765,475,625,0.03,0.08,0.1,0.18),ncol=2,byrow=T) +yl = c(0,0.11*n) par(mfrow=c(2,2)) for(i in 1:4){ data = d[i] sdata = simdata[,i] xl = xlims[i,] - h = hist(sdata,50,xlim=xl,col='#1f78b4',freq=T,border=NA,axes=F, + h = hist(sdata,50,xlim=xl,ylim=yl,col='#1f78b4',freq=T,border=NA,axes=F, xlab=xlabs[i],ylab='Relative frequency',main=NULL) if(i %in% c(1,2)) axis(1,at=pretty(xl,5)) if(i %in% c(3,4)) axis(1,at=pretty(xl,5),lab=paste(pretty(xl,5)*100,'%',sep='')) - axis(2,at=pretty(h$counts,3),lab=pretty(h$counts/n,3)) + axis(2,at=c(0,0.05,0.1)*n,lab=c(0,0.05,0.1)) abline(v=data,lwd=4,col='#e41a1c') box(lwd=1.1) } |
