diff options
Diffstat (limited to 'R Scripts/predict-victims.R')
| -rw-r--r-- | R Scripts/predict-victims.R | 39 |
1 files changed, 23 insertions, 16 deletions
diff --git a/R Scripts/predict-victims.R b/R Scripts/predict-victims.R index 470815d..2bda7e2 100644 --- a/R Scripts/predict-victims.R +++ b/R Scripts/predict-victims.R @@ -1,4 +1,7 @@ library(igraph) +library(foreach) +library(doMC) +registerDoMC(cores=4) setwd('~/Documents/Cascade Project') load('Raw Data/lcc.RData') load('Results/hyper-lcc.RData') @@ -7,34 +10,36 @@ source('criminal_cascades/R Scripts/temporal.R') source('criminal_cascades/R Scripts/structural.R') ##### Initialize data -formula = vic ~ sex + race + age + gang.member + gang.name +formula = vic ~ sex + race + age + gang.member #+ gang.name lcc_verts$sex = as.factor(lcc_verts$sex) lcc_verts$race = as.factor(lcc_verts$race) lcc_verts$age = as.numeric(lcc_verts$age) lcc_verts$gang.name = as.factor(lcc_verts$gang.name) # sum(hyp_lcc_verts$vic)/length(days) +df = data.frame(ir=lcc_verts$ir_no, dem=0, cas=0, comb=0) alpha = 0.0028 delta = 0.06 days = sort(unique(hyp_lcc_verts$vic.day)) # 70:max(hyp_lcc_verts$vic.day, na.rm=T) lambdas = c(0,1)#c(0, exp(seq(log(0.0000001), log(.0005), length.out=150)), 1) -nvics = sum(lcc_verts$vic)#sum(hyp_lcc_verts$vic.day %in% days) -correct_rank = matrix(nrow=nvics, ncol=length(lambdas)) +nvics = sum(hyp_lcc_verts$vic.day %in% days) edges_all = dag_dat_all ##### Loop through days +writeLines(c(""), "Results/log.txt") ptm = proc.time() -for (day in days){ - if (which(day==days) %% 100 == 0) print(day) - +correct_rank = foreach (day = days, .combine=rbind) %dopar% { + if (which(day==days) %% 100 == 0){sink("Results/log.txt", append=TRUE);cat(paste("day:",day,"\n"))} + ##### Demographics model vics = match(unique(hyp_lcc_verts$ir_no[which(hyp_lcc_verts$vic.day<day)]),lcc_verts$name) victims = lcc_verts[,c('vic','sex','race','age','gang.member','gang.name')] victims$vic[vics] = TRUE victims$vic[-vics] = FALSE -# glm.fit = glm(formula, data=victims, family=binomial) - glm.fit = lm(formula, data=victims) - glm.probs = predict(glm.fit, newdata=lcc_verts, type='response') + fit = lm(formula, data=victims) +# fit = glm(formula, data=victims, family=binomial) +# fit = randomForest(formula, data=victims[,1:5], ntree=100) + probs = predict(fit, newdata=lcc_verts, type='response') ##### Cascade Model edges = edges_all[which(edges_all$t1<day),] @@ -49,19 +54,21 @@ for (day in days){ # maybe need to change this to reflect new algorithm that accounts for \tilde{p} ##### Combined Model - combined = data.frame(ir=attr(glm.probs,'name'), dem=as.numeric(glm.probs), cas=0, comb=0) - combined$cas[match(risk$ir, attr(glm.probs,'name'))] = risk$weight + combined = df#data.frame(ir=attr(probs,'name'), dem=as.numeric(probs), cas=0, comb=0) + combined$dem[match(attr(probs,'name'), df$ir)] = as.numeric(probs) + combined$cas[match(risk$ir, attr(probs,'name'))] = risk$weight ##### Gather results infected_irs = hyp_lcc_verts$ir_no[which(hyp_lcc_verts$vic.day==day)] + crday = matrix(nrow=length(infected_irs), ncol=length(lambdas)) for (lambda in lambdas){ combined$comb = lambda*combined$dem + (1-lambda)*combined$cas c_idx = which(lambdas==lambda) - r_idx = head(which(is.na(correct_rank[,c_idx])),length(infected_irs)) - # !! order should be first: rank of (3,5,5,7) should be (1,2,2,4), may need to do n-rank - correct_rank[r_idx,c_idx] = match(infected_irs, combined$ir[order(combined$comb, decreasing=T)]) - # maybe should also mark down vic/nonvic status of each? + crday[,c_idx] = rank(-combined$comb,ties.method='average')[match(infected_irs,combined$ir)] } - + + return(crday) } print(proc.time()-ptm) + +# save(correct_rank, file='Results/correct_rank_62815.RData')
\ No newline at end of file |
