diff options
| author | Ben Green <bgreen@g.harvard.edu> | 2015-09-13 23:52:40 -0400 |
|---|---|---|
| committer | Ben Green <bgreen@g.harvard.edu> | 2015-09-13 23:52:42 -0400 |
| commit | f23e9e5da58d1d19913703e11f7ff96a7adff96b (patch) | |
| tree | b9459b3670cdf125f8bdf0888288be3a0a1357a8 /R Scripts/predict-victims.R | |
| parent | fffb68b515a5cae198d8b756b562ddea6f2c814c (diff) | |
| download | criminal_cascades-f23e9e5da58d1d19913703e11f7ff96a7adff96b.tar.gz | |
updated prediction code for new model and data format
Diffstat (limited to 'R Scripts/predict-victims.R')
| -rw-r--r-- | R Scripts/predict-victims.R | 71 |
1 files changed, 52 insertions, 19 deletions
diff --git a/R Scripts/predict-victims.R b/R Scripts/predict-victims.R index c662651..fc6cb7f 100644 --- a/R Scripts/predict-victims.R +++ b/R Scripts/predict-victims.R @@ -5,26 +5,50 @@ library(doMC) registerDoMC(cores=4) setwd('~/Documents/Violence Cascades/') load('Raw Data/lcc.RData') -load('Results/hyper-lcc.RData') -load('Results/dag_dat_all.RData') +load('Raw Data/dag_dat_lcc.RData') +load('Raw Data/vic_times_lcc.RData') +load('Raw Data/prior-arrests.RData') source('criminal_cascades/R Scripts/temporal.R') source('criminal_cascades/R Scripts/structural.R') +nArrests = function(arrests,day){return(sum(arrests<day))} + +##### Set parameters +alpha = 0.00317 +beta = 0.0039 +start_date = as.Date("2005-12-31") + +edges_all = data.table(dag_dat_lcc) +vic_ids = which(lcc_verts$vic==T) +lcc_verts = lcc_verts[,c('id','ir_no','nonfatal_day_1','nonfatal_day_2', + 'nonfatal_day_3','nonfatal_day_4','nonfatal_day_5', + 'fatal_day','sex','race','dob','gang.member','gang.name')] +lcc_verts$vic.day = NA +lcc_verts$vic.day[vic_ids] = as.numeric(apply(vic_times_lcc[vic_ids,2:7],1,min,na.rm=T)) +lcc_verts$age = FALSE +lcc_verts$age[vic_ids] = as.numeric(start_date + lcc_verts$vic.day[vic_ids] - as.Date(lcc_verts$dob[vic_ids])) +lcc_verts$arrests = NA +lcc_verts$arrests[vic_ids] = unlist(mapply(nArrests,prior_arrests[vic_ids],day=lcc_verts$vic.day[vic_ids])) +lcc_verts$vic = FALSE +rownames(lcc_verts) = lcc_verts$ir_no +vic_times_lcc$ir_no = lcc_verts$ir_no +rownames(vic_times_lcc) = vic_times_lcc$ir_no + ##### Initialize data -formula = vic ~ sex + race + age + gang.member + gang.name +formula = vic ~ sex + race + age + gang.member + gang.name + arrests lcc_verts$sex = as.factor(lcc_verts$sex) lcc_verts$race = as.factor(lcc_verts$race) -lcc_verts$age = as.numeric(lcc_verts$age) lcc_verts$gang.name = as.factor(lcc_verts$gang.name) dt = data.table(ir=lcc_verts$ir_no, dem=0, cas=0, comb=0) -alpha = 0.0028 -delta = 0.06 -days = sort(unique(hyp_lcc_verts$vic.day)) # 70:max(hyp_lcc_verts$vic.day, na.rm=T) +##### +days = Reduce(union, list(lcc_verts$fatal_day,lcc_verts$nonfatal_day_1, + lcc_verts$nonfatal_day_2,lcc_verts$nonfatal_day_3, + lcc_verts$nonfatal_day_4,lcc_verts$nonfatal_day_5)) +days = days[!is.na(days)] +days = sort(days) days = split(days, ceiling(seq_along(days)/456)) -lambdas = c(0, exp(seq(log(0.0000001), log(.95), length.out=50)), 1) -nvics = sum(hyp_lcc_verts$vic) -edges_all = data.table(dag_dat_all) +lambdas = c(0, exp(seq(log(0.0000001), log(.95), length.out=100)), 1) ##### Loop through days correct_rank = c() @@ -35,22 +59,31 @@ for(i in 1:length(days)){ cr = foreach (day = ds, .combine=rbind) %dopar% { ##### Demographics model - vics = match(unique(hyp_lcc_verts$ir_no[which(hyp_lcc_verts$vic.day<day)]),lcc_verts$name) - victims = lcc_verts[,c('vic','sex','race','age','gang.member','gang.name')] + victims = lcc_verts + + vics = which(victims$vic.day<day) + vic.days = victims$vic.day[vics] + + victims$age[-vics] = as.numeric(start_date + day - 1 - as.Date(victims$dob[-vics])) + victims$arrests[-vics] = unlist(lapply(prior_arrests[-vics],nArrests,day=day)) + victims$vic[vics] = TRUE victims$vic[-vics] = FALSE -# fit = lm(formula, data=victims) + fit = glm(formula, data=victims, family=binomial) - # fit = randomForest(formula, data=victims[,1:5], ntree=100) - probs = predict(fit, newdata=lcc_verts, type='response') + + lcc_verts_day = victims + lcc_verts_day$age[vics] = as.numeric(start_date + day - 1 - as.Date(victims$dob[vics])) + lcc_verts_day$arrests[vics] = unlist(lapply(prior_arrests[vics],nArrests,day=day)) + probs = predict(fit, newdata=lcc_verts_day, type='response') ##### Cascade Model edges = edges_all[which(edges_all$t1<day),] - f = temporal(edges$t1, day, alpha) - h = structural(delta,edges$dist) + f = temporal(edges$t1, day, beta) + h = structural(alpha,edges$dist) weights = f*h ids = edges$to - irs = hyp_lcc_verts$ir_no[ids] + irs = lcc_verts$ir_no[ids] risk = data.table(id=ids, ir=irs, weight=weights) if (dim(risk)[1]>0) risk = risk[, list(weight=sum(weight)), by=ir] # max or sum @@ -61,7 +94,7 @@ for(i in 1:length(days)){ combined$cas[match(risk$ir, dt$ir)] = risk$weight ##### Gather results - infected_irs = hyp_lcc_verts$ir_no[which(hyp_lcc_verts$vic.day==day)] + infected_irs = attr(which(rowSums(vic_times_lcc[,2:7]==day,na.rm=T)==1),'name') crday = matrix(nrow=length(infected_irs), ncol=length(lambdas)) for (lambda in lambdas){ combined$comb = lambda*combined$dem + (1-lambda)*combined$cas |
