summaryrefslogtreecommitdiffstats
path: root/R Scripts/sim-analysis.R
diff options
context:
space:
mode:
authorBen Green <bgreen@g.harvard.edu>2015-08-21 13:06:12 -0400
committerThibaut Horel <thibaut.horel@gmail.com>2015-08-22 17:01:37 -0700
commitef61ece9773e8a865b57f60ca1e1b9faa903af23 (patch)
tree577ff3fad1750cc824c1cb732bc05046c36efc11 /R Scripts/sim-analysis.R
parent542012fc5ab0b373d85d1d13852daf834193bd33 (diff)
downloadcriminal_cascades-ef61ece9773e8a865b57f60ca1e1b9faa903af23.tar.gz
added age to sim analysis and updated data generation for new model
Diffstat (limited to 'R Scripts/sim-analysis.R')
-rwxr-xr-xR Scripts/sim-analysis.R94
1 files changed, 52 insertions, 42 deletions
diff --git a/R Scripts/sim-analysis.R b/R Scripts/sim-analysis.R
index 3960578..5010485 100755
--- a/R Scripts/sim-analysis.R
+++ b/R Scripts/sim-analysis.R
@@ -1,5 +1,5 @@
library(igraph)
-setwd("~/Documents/Cascade Project/")
+setwd("~/Documents/Violence Cascades/")
load('Results/hyper-lcc.RData')
vic_ids = which(V(hyp_lcc)$vic==1)
@@ -7,56 +7,66 @@ n.infections = length(vic_ids)
n.days = max(hyp_lcc_verts$vic.day,na.rm=T) - min(hyp_lcc_verts$vic.day,na.rm=T)
inf.dates = hyp_lcc_verts$vic.day[vic_ids]
+birthyears = as.numeric(format(as.Date(hyp_lcc_verts$dob[vic_ids]),'%Y'))
+
+# nbrs = neighborhood(graph, nodes=vic_ids, order=1)
+load('Results/vic-nbrs.RData')
+
+n = 150
+mean.time = matrix(0,1,n)
+med.time = matrix(0,1,n)
+mean.50 = matrix(0,1,n)
+mean.100 = matrix(0,1,n)
+n.vicpairs = matrix(0,1,n)
+
+hyp_lcc = upgrade_graph(hyp_lcc)
+
ptm = proc.time()
-n = 1500
-mean.time = matrix(0,3,n)
-med.time = matrix(0,3,n)
-mean.50 = matrix(0,3,n)
-mean.100 = matrix(0,3,n)
-n.vicpairs = matrix(0,3,n)
-for(sim in 1:3){
- print(paste('sim:',sim))
- for(q in 1:n){
- if (q%%250==0) print(paste('run:',q))
- graph = hyp_lcc
- if (sim<3) sim.dates = sample(n.days, n.infections, replace=TRUE) # sims 1 + 2
- if (sim==3) sim.dates = sample(inf.dates) # sim 3
- if (sim==1) vics = sample(vcount(hyp_lcc), n.infections, replace=FALSE) # sim 1
- if (sim>1) vics = vic_ids # sims 2 + 3
- if (sim==0) {vics = vic_ids; sim.dates = inf.dates} # data
-
- vic.time = c()
- for (i in 1:n.infections){
- u = vics[i]
- nbhd = unlist(neighborhood(graph, nodes=u, order=1))
- nbhd = intersect(vic_ids,nbhd)
- nbhd = setdiff(nbhd,u)
- nbhd = nbhd[u<nbhd] # only looks at pairs with u<v to avoid double-counting
- nbhd = nbhd[hyp_lcc_verts$ir_no[u] != hyp_lcc_verts$ir_no[nbhd]] # don't count infections of the same person
- tu = sim.dates[i]
- tvs = sim.dates[match(nbhd,vic_ids)]
- vic.time = c(vic.time,abs(tu-tvs))
- }
- n.vicpairs[sim,q] = length(vic.time)
- mean.time[sim,q] = mean(vic.time)
- med.time[sim,q] = median(vic.time)
- mean.50[sim,q] = mean(vic.time<=50)
- mean.100[sim,q] = mean(vic.time<=100)
- # decay.rate[sim,q] = getDecayRate(vic.time) #get lm of histogram. or maybe just cor?
+for(q in 1:n){
+ if (q%%250==0) print(paste('run:',q))
+ graph = hyp_lcc
+ vics = vic_ids
+
+ sim.dates = rep(0,length(vic_ids))
+ for(y in unique(birthyears)){
+ year_ids = which(birthyears==y)
+ if (length(year_ids)>1) sim.dates[year_ids] = sample(inf.dates[year_ids])
+ }
+# {vics = vic_ids; sim.dates = inf.dates} # data
+
+ vic.time = rep(NA,14885)
+ idx = 1
+ for (i in 1:n.infections){
+ u = vics[i]
+ nbhd = nbrs[[i]]
+ nbhd = intersect(vic_ids,nbhd)
+ nbhd = nbhd[u<nbhd] # only looks at pairs with u<v to avoid double-counting
+ nbhd = setdiff(nbhd,u)
+ nbhd = nbhd[hyp_lcc_verts$ir_no[u] != hyp_lcc_verts$ir_no[nbhd]] # don't count infections of the same person
+ if (length(nbhd)==0) next
+ tu = sim.dates[i]
+ tvs = sim.dates[match(nbhd,vic_ids)]
+ vic.time[idx:(idx+length(nbhd)-1)] = abs(tu-tvs)
+ idx = idx + length(nbhd)
}
+
+ n.vicpairs[q] = length(vic.time)
+ mean.time[q] = mean(vic.time)
+ med.time[q] = median(vic.time)
+ mean.50[q] = mean(vic.time<=50)
+ mean.100[q] = mean(vic.time<=100)
+ # decay.rate[sim,q] = getDecayRate(vic.time) #get lm of histogram. or maybe just cor?
}
print(proc.time()-ptm)
####### plot simulation data #######
-data = 0.1176774 # 774.5483, 634, 0.07252472, 0.1176774
-simdata = mean.100
+data = 634 # 774.5483, 634, 0.07252472, 0.1176774
+simdata = med.time
xl = c(0.9*min(min(simdata),data),1.1*max(max(simdata),data))
-yl = c(0,max(c(hist(simdata[1,],15)$density,hist(simdata[2,],15)$density,hist(simdata[3,],15)$density)))
-hist(simdata[1,],15,xlim=xl,ylim=yl,col=rgb(0,1,0,1/4),freq=F,
+yl = c(0,max(hist(simdata,15)$density))
+hist(simdata,15,xlim=xl,ylim=yl,col=rgb(0,1,0,1/4),freq=F,
xlab='Neighbors Infected Within 100 Days', main=NULL)
-hist(simdata[2,],15,col=rgb(0,0,1,1/4),freq=F,add=TRUE)
-hist(simdata[3,],15,col=rgb(1,0,1,1/4),freq=F,add=TRUE)
abline(v=data,lwd=4,col='red')