summaryrefslogtreecommitdiffstats
path: root/R Scripts/predict-victims.R
diff options
context:
space:
mode:
authorThibaut Horel <thibaut.horel@gmail.com>2015-07-02 22:37:45 -0700
committerThibaut Horel <thibaut.horel@gmail.com>2015-07-02 22:37:45 -0700
commit58ed50d980a1ba240bb50b27c42db0a679f00b43 (patch)
treee7f410984ead0c8b2163edbae9d95dae66a7134b /R Scripts/predict-victims.R
parent5a76e2393e4f2d89f885ea99c473da840d0cd7db (diff)
parent110069d77815a3d62e3526f18b2a34fb79beff1e (diff)
downloadcriminal_cascades-58ed50d980a1ba240bb50b27c42db0a679f00b43.tar.gz
Merge branch 'master' of github.com:Thibauth/criminal_cascades
Diffstat (limited to 'R Scripts/predict-victims.R')
-rw-r--r--R Scripts/predict-victims.R39
1 files changed, 23 insertions, 16 deletions
diff --git a/R Scripts/predict-victims.R b/R Scripts/predict-victims.R
index 470815d..2bda7e2 100644
--- a/R Scripts/predict-victims.R
+++ b/R Scripts/predict-victims.R
@@ -1,4 +1,7 @@
library(igraph)
+library(foreach)
+library(doMC)
+registerDoMC(cores=4)
setwd('~/Documents/Cascade Project')
load('Raw Data/lcc.RData')
load('Results/hyper-lcc.RData')
@@ -7,34 +10,36 @@ source('criminal_cascades/R Scripts/temporal.R')
source('criminal_cascades/R Scripts/structural.R')
##### Initialize data
-formula = vic ~ sex + race + age + gang.member + gang.name
+formula = vic ~ sex + race + age + gang.member #+ gang.name
lcc_verts$sex = as.factor(lcc_verts$sex)
lcc_verts$race = as.factor(lcc_verts$race)
lcc_verts$age = as.numeric(lcc_verts$age)
lcc_verts$gang.name = as.factor(lcc_verts$gang.name)
# sum(hyp_lcc_verts$vic)/length(days)
+df = data.frame(ir=lcc_verts$ir_no, dem=0, cas=0, comb=0)
alpha = 0.0028
delta = 0.06
days = sort(unique(hyp_lcc_verts$vic.day)) # 70:max(hyp_lcc_verts$vic.day, na.rm=T)
lambdas = c(0,1)#c(0, exp(seq(log(0.0000001), log(.0005), length.out=150)), 1)
-nvics = sum(lcc_verts$vic)#sum(hyp_lcc_verts$vic.day %in% days)
-correct_rank = matrix(nrow=nvics, ncol=length(lambdas))
+nvics = sum(hyp_lcc_verts$vic.day %in% days)
edges_all = dag_dat_all
##### Loop through days
+writeLines(c(""), "Results/log.txt")
ptm = proc.time()
-for (day in days){
- if (which(day==days) %% 100 == 0) print(day)
-
+correct_rank = foreach (day = days, .combine=rbind) %dopar% {
+ if (which(day==days) %% 100 == 0){sink("Results/log.txt", append=TRUE);cat(paste("day:",day,"\n"))}
+
##### Demographics model
vics = match(unique(hyp_lcc_verts$ir_no[which(hyp_lcc_verts$vic.day<day)]),lcc_verts$name)
victims = lcc_verts[,c('vic','sex','race','age','gang.member','gang.name')]
victims$vic[vics] = TRUE
victims$vic[-vics] = FALSE
-# glm.fit = glm(formula, data=victims, family=binomial)
- glm.fit = lm(formula, data=victims)
- glm.probs = predict(glm.fit, newdata=lcc_verts, type='response')
+ fit = lm(formula, data=victims)
+# fit = glm(formula, data=victims, family=binomial)
+# fit = randomForest(formula, data=victims[,1:5], ntree=100)
+ probs = predict(fit, newdata=lcc_verts, type='response')
##### Cascade Model
edges = edges_all[which(edges_all$t1<day),]
@@ -49,19 +54,21 @@ for (day in days){
# maybe need to change this to reflect new algorithm that accounts for \tilde{p}
##### Combined Model
- combined = data.frame(ir=attr(glm.probs,'name'), dem=as.numeric(glm.probs), cas=0, comb=0)
- combined$cas[match(risk$ir, attr(glm.probs,'name'))] = risk$weight
+ combined = df#data.frame(ir=attr(probs,'name'), dem=as.numeric(probs), cas=0, comb=0)
+ combined$dem[match(attr(probs,'name'), df$ir)] = as.numeric(probs)
+ combined$cas[match(risk$ir, attr(probs,'name'))] = risk$weight
##### Gather results
infected_irs = hyp_lcc_verts$ir_no[which(hyp_lcc_verts$vic.day==day)]
+ crday = matrix(nrow=length(infected_irs), ncol=length(lambdas))
for (lambda in lambdas){
combined$comb = lambda*combined$dem + (1-lambda)*combined$cas
c_idx = which(lambdas==lambda)
- r_idx = head(which(is.na(correct_rank[,c_idx])),length(infected_irs))
- # !! order should be first: rank of (3,5,5,7) should be (1,2,2,4), may need to do n-rank
- correct_rank[r_idx,c_idx] = match(infected_irs, combined$ir[order(combined$comb, decreasing=T)])
- # maybe should also mark down vic/nonvic status of each?
+ crday[,c_idx] = rank(-combined$comb,ties.method='average')[match(infected_irs,combined$ir)]
}
-
+
+ return(crday)
}
print(proc.time()-ptm)
+
+# save(correct_rank, file='Results/correct_rank_62815.RData') \ No newline at end of file