summaryrefslogtreecommitdiffstats
path: root/R Scripts
diff options
context:
space:
mode:
authorBen Green <bgreen@g.harvard.edu>2015-09-22 08:28:13 -0400
committerBen Green <bgreen@g.harvard.edu>2015-09-22 08:28:16 -0400
commit28945245f49fafed3d05579166a46530845d2d8a (patch)
tree9703409e142334339d5c145f9415501be8597ec7 /R Scripts
parent8c2d3d070a3db1b469e3e32e3c20ef67bc274b3b (diff)
downloadcriminal_cascades-28945245f49fafed3d05579166a46530845d2d8a.tar.gz
script to plot entire lcc
Diffstat (limited to 'R Scripts')
-rwxr-xr-xR Scripts/analyze-cascades.R2
-rw-r--r--R Scripts/fit-background.R8
-rw-r--r--R Scripts/plot-network.R19
-rwxr-xr-xR Scripts/sim-analysis.R121
4 files changed, 88 insertions, 62 deletions
diff --git a/R Scripts/analyze-cascades.R b/R Scripts/analyze-cascades.R
index 9a47ece..f03af36 100755
--- a/R Scripts/analyze-cascades.R
+++ b/R Scripts/analyze-cascades.R
@@ -43,7 +43,7 @@ start_date + range(times)
cb = brewer.pal(3,'Paired')
cols = rep('#1f78b4',vcount(cc))
seed = which(degree(cc,mode='in')==0)
-cols[seed] = '#d95f02'
+cols[seed] = '#e41a1c'
plot(cc,vertex.size=8,edge.arrow.size=0.2,vertex.color=cols,vertex.label=NA,
layout=layout.reingold.tilford(cc,root=seed),
edge.color='black',vertex.frame.color=NA)
diff --git a/R Scripts/fit-background.R b/R Scripts/fit-background.R
index 06e91c9..5bc1ce0 100644
--- a/R Scripts/fit-background.R
+++ b/R Scripts/fit-background.R
@@ -7,14 +7,16 @@ load('Raw Data/lcc.RData')
vic_days = as.numeric(unlist(lcc_verts[,16:21]))
vic_days = vic_days[!is.na(vic_days)]
-hist(as.numeric(vic_days,2000,col='lightblue',
+hist(vic_days,2000,col='lightblue',
xlab='Day of Study Period',main='Infections During the Study Period')
# get infection counts per day
days = 1:3012
-t = table(factor(hyp_lcc_verts$vic.day[vic_ids],levels=days))
+t = table(factor(vic_days,levels=days))
counts = as.vector(t)
-infs = data.frame(days,counts)
+start_date = as.Date("2005-12-31")
+dates = start_date + days
+infs = data.frame(days,counts,dates)
# define background function
fit = function(x, lambda, A, phi) {lambda + A*(sin((2*pi/365.24)*x+phi))}
diff --git a/R Scripts/plot-network.R b/R Scripts/plot-network.R
new file mode 100644
index 0000000..38e241d
--- /dev/null
+++ b/R Scripts/plot-network.R
@@ -0,0 +1,19 @@
+library(igraph)
+setwd('~/Documents/Violence Cascades/')
+load('Raw Data/lcc.RData')
+
+non_vic_ids = setdiff(1:vcount(lcc),vic_ids)
+perm = rep(0,vcount(lcc))
+perm[non_vic_ids] = 1:length(non_vic_ids)
+perm[vic_ids] = (length(non_vic_ids)+1):vcount(lcc)
+lcc2 = permute(lcc,perm)
+vic_ids2 = which(V(lcc2)$vic==T)
+
+cols = rep('#1f78b4',vcount(lcc))
+cols[vic_ids2] = '#e41a1c'
+
+layout.drl = layout_with_drl(lcc2,,weights=NULL)
+save(layout.drl,file='Results/layout.RData')
+
+plot(lcc,vertex.size=1,vertex.color=cols,vertex.label=NA,
+ layout=layout.drl,edge.color=NA,vertex.frame.color=NA)
diff --git a/R Scripts/sim-analysis.R b/R Scripts/sim-analysis.R
index 92c3d26..e56f575 100755
--- a/R Scripts/sim-analysis.R
+++ b/R Scripts/sim-analysis.R
@@ -1,89 +1,94 @@
library(igraph)
setwd("~/Documents/Violence Cascades/")
+load('Raw Data/lcc.RData')
+library(foreach)
+library(doMC)
+registerDoMC(cores=4)
-load('Results/hyper-lcc.RData')
-vic_ids = which(V(hyp_lcc)$vic==1)
-n.infections = length(vic_ids)
-n.days = max(hyp_lcc_verts$vic.day,na.rm=T) - min(hyp_lcc_verts$vic.day,na.rm=T)
-inf.dates = hyp_lcc_verts$vic.day[vic_ids]
-
-birthyears = as.numeric(format(as.Date(hyp_lcc_verts$dob[vic_ids]),'%Y'))
-
-# nbrs = neighborhood(graph, nodes=vic_ids, order=1)
-load('Results/vic-nbrs.RData')
+# gather data
+vic_days = as.numeric(unlist(lcc_verts[,16:21]))
+idxs = which(!is.na(vic_days)) %% vcount(lcc)
+vic_days = vic_days[!is.na(vic_days)]
+n.infected = sum(lcc_verts$vic)
+birthyears = as.numeric(format(as.Date(lcc_verts$dob[idxs]),'%Y'))
# find all pairs of vic neighbors to check
-vic.nbrs = matrix(NA,14885,2)
+## nbrs = neighborhood(lcc,order=1,nodes=vic_ids); save(nbrs,file='Raw Data/vic-nbrs.RData')
+load('Raw Data/vic-nbrs.RData')
+vic.nbrs = matrix(NA,9568,2)
idx = 1
-for (i in 1:n.infections){
+for (i in 1:n.infected){
u = vic_ids[i]
- nbhd = nbrs[[i]]
+ nbhd = as.numeric(nbrs[[i]])
nbhd = intersect(vic_ids,nbhd)
nbhd = nbhd[u<nbhd] # only looks at pairs with u<v to avoid double-counting
- nbhd = setdiff(nbhd,u)
- nbhd = nbhd[hyp_lcc_verts$ir_no[u] != hyp_lcc_verts$ir_no[nbhd]] # don't count infections of the same person
if (length(nbhd)==0) next
- vic.nbrs[idx:(idx+length(nbhd)-1),] = cbind(rep(match(u,vic_ids),length(nbhd)),match(nbhd,vic_ids))
+ vic.nbrs[idx:(idx+length(nbhd)-1),] = cbind(rep(u,length(nbhd)),nbhd)
idx = idx + length(nbhd)
}
# run simulation
n = 10000
-mean.time = rep(NA,n)
-med.time = rep(NA,n)
-mean.30 = rep(NA,n)
-mean.50 = rep(NA,n)
-n.vicpairs = rep(NA,n)
-
-hyp_lcc = upgrade_graph(hyp_lcc)
ptm = proc.time()
-for(q in 1:n){
- if (q%%250==0) print(paste('run:',q))
- graph = hyp_lcc
- vics = vic_ids
-
+simdata = foreach(q=1:n, .combine=rbind) %dopar% {
# shuffle infection dates
- sim.dates = rep(0,length(vic_ids))
+ sim.dates = rep(0,length(vic_days))
for(y in unique(birthyears)){
year_ids = which(birthyears==y)
- if (length(year_ids)>1) sim.dates[year_ids] = sample(inf.dates[year_ids])
+ if (length(year_ids)>1) sim.dates[year_ids] = sample(vic_days[year_ids])
}
-# vics = vic_ids; sim.dates = inf.dates # data
+# sim.dates = vic_days # data
- vic.time = abs(sim.dates[vic.nbrs[,1]] - sim.dates[vic.nbrs[,2]])
-
- n.vicpairs[q] = length(vic.time)
- mean.time[q] = mean(vic.time)
- med.time[q] = median(vic.time)
- mean.30[q] = mean(vic.time<=30)
- mean.50[q] = mean(vic.time<=50)
- # decay.rate[sim,q] = getDecayRate(vic.time) #get lm of histogram. or maybe just cor?
+ vic.dt = rep(0,dim(vic.nbrs)[1])
+ for(i in 1:dim(vic.nbrs)[1]){
+ mindt = 5000
+ tus = sim.dates[which(vic.nbrs[i,1]==idxs)]
+ tvs = sim.dates[which(vic.nbrs[i,2]==idxs)]
+ for(tu in tus){
+ for(tv in tvs){
+ dt = abs(tu-tv)
+ if(dt<mindt) mindt = dt
+ }
+ }
+ vic.dt[i] = mindt
+ }
+
+# mean.time[q] = mean(vic.dt)
+# med.time[q] = median(vic.dt)
+# mean.30[q] = mean(vic.dt<=30)
+# mean.60[q] = mean(vic.dt<=60)
+ return(c(mean(vic.dt),median(vic.dt),mean(vic.dt<=30),mean(vic.dt<=100)))
}
print(proc.time()-ptm)
-
-# save(mean.time,med.time,mean.30,mean.50,file='Results/sims-Aug28.RData')
+simdata = as.data.frame(simdata,row.names=1:dim(simdata)[1])
+colnames(simdata)=c('mean.time','med.time','mean.30','mean.100')
+# save(simdata,file='Results/sims-Sept21.RData')
####### plot simulation data #######
-data = 0.1179039 # 778.2056, 638, 0.05105811, 0.07248908
-simdata = mean.100
-xl = c(0.95*min(min(simdata),data),1.05*max(max(simdata),data))
-yl = c(0,max(hist(simdata,25)$density))
-hist(simdata,25,xlim=xl,ylim=yl,col=rgb(0,1,0,1/4),freq=F,
- xlab='Neighbor Infections Within 100 Days', main=NULL)
-abline(v=data,lwd=4,col='red')
-box()
-
+# mean=662.6, med=488, m30=0.076, m60=0.119, m100=0.167
+d = c(662.6,488,0.076,0.167)
+xlabs = c('Mean time between infections',
+ 'Median time between infections',
+ 'Infections within 30 days',
+ 'Infections within 100 days')
+par(mfrow=c(2,2))
+for(i in 1:4){
+ data = d[i]
+ sdata = simdata[,i]
+ xl = c(0.99*min(min(sdata),data),1.01*max(max(sdata),data))
+ h = hist(sdata,50,xlim=xl,col='#1f78b4',freq=T,border=NA,axes=F,
+ xlab=xlabs[i],ylab='Relative Frequency',main=NULL)
+ axis(1,at=pretty(xl,5))
+ axis(2,at=pretty(h$counts,3),lab=pretty(h$counts/n,3))
+ abline(v=data,lwd=4,col='#e41a1c')
+ box(lwd=1.1)
+}
##### inf dates histogram
-dates = c(V(lcc)$fatal_date[!is.na(V(lcc)$fatal_date)],
- V(lcc)$nonfatal_date_1[!is.na(V(lcc)$nonfatal_date_1)],
- V(lcc)$nonfatal_date_2[!is.na(V(lcc)$nonfatal_date_2)],
- V(lcc)$nonfatal_date_3[!is.na(V(lcc)$nonfatal_date_3)],
- V(lcc)$nonfatal_date_4[!is.na(V(lcc)$nonfatal_date_4)],
- V(lcc)$nonfatal_date_5[!is.na(V(lcc)$nonfatal_date_5)])
-dates = as.Date(dates)
-hist(dates, breaks='months',freq=T,col='lightblue',format='%m/%Y',
+vic_dates = as.Date(unlist(lcc_verts[,10:15]))
+vic_dates = vic_dates[!is.na(vic_dates)]
+hist(vic_dates, breaks='months',freq=T,col='lightblue',format='%Y',
xlab='Month',ylab='Number of Shootings',main='')
### data vs sim hist comparison