diff options
Diffstat (limited to 'R Scripts/benchmarking.R')
| -rw-r--r-- | R Scripts/benchmarking.R | 130 |
1 files changed, 0 insertions, 130 deletions
diff --git a/R Scripts/benchmarking.R b/R Scripts/benchmarking.R deleted file mode 100644 index 1cf99e0..0000000 --- a/R Scripts/benchmarking.R +++ /dev/null @@ -1,130 +0,0 @@ -library(igraph) -setwd('~/Documents/Cascade Project') -load('Raw Data/lcc.RData') -load('Results/hyper-lcc.RData') -load('Results/dag_dat_all.RData') -source('Scripts/temporal.R') -source('Scripts/structural.R') - -##### Initialize data -formula = vic ~ sex + race + age + gang.member + gang.name -lcc_verts$sex = as.factor(lcc_verts$sex) -lcc_verts$race = as.factor(lcc_verts$race) -lcc_verts$age = as.numeric(lcc_verts$age) -lcc_verts$gang.name = as.factor(lcc_verts$gang.name) -# sum(hyp_lcc_verts$vic)/length(days) - -##### Loop through days -alpha = 1/100 -gamma = 0.18 -days = 70:max(hyp_lcc_verts$vic.day, na.rm=T) -lambdas = 0#c(0, exp(seq(log(0.0000001), log(.0005), length.out=150)), 1) -nvics = sum(hyp_lcc_verts$vic.day %in% days) -correct_rank = matrix(nrow=nvics, ncol=length(lambdas)) -# correct_rank1 = correct_rank2 = correct_rank3 = c() -edges_all = dag_dat_all[dag_dat_all$dist<2,] -ptm = proc.time() -for (day in days){ - if (day %% 100 == 0) print(day) - - ##### Demographics model -# vics = match(unique(hyp_lcc_verts$ir_no[which(hyp_lcc_verts$vic.day<day)]),lcc_verts$name) -# victims = lcc_verts[,c('vic','sex','race','age','gang.member','gang.name')] -# victims$vic[vics] = TRUE -# victims$vic[-vics] = FALSE -# # glm.fit = glm(formula, data=victims, family=binomial) -# glm.fit = lm(formula, data=victims) -# glm.probs = predict(glm.fit, newdata=lcc_verts, type='response') - - ##### Cascade Model - edges = edges_all[which(edges_all$t1<day),] - f = temporal(edges$t1, day, alpha) - h = structural(gamma,edges$dist) - weights = f*h - ids = edges$to - irs = hyp_lcc_verts$ir_no[ids] - risk = data.frame(id=ids, ir=irs, weight=weights) - risk = risk[order(weights, decreasing=T),] - risk = risk[match(unique(risk$ir),risk$ir),] -# maybe need to change this to reflect new algorithm that accounts for \tilde{p} - - ##### Combined Model -# combined = data.frame(ir=attr(glm.probs,'name'), dem=as.numeric(glm.probs), cas=0, comb=0) - combined$cas[match(risk$ir, attr(glm.probs,'name'))] = risk$weight - - ##### Gather results - infected_irs = hyp_lcc_verts$ir_no[which(hyp_lcc_verts$vic.day==day)] - for (lambda in lambdas){ - combined$comb = combined$cas#lambda*combined$dem + (1-lambda)*(1-combined$dem)*combined$cas - c_idx = which(lambdas==lambda) - r_idx = head(which(is.na(correct_rank[,c_idx])),length(infected_irs)) - # !! order should be first: rank of (3,5,5,7) should be (1,2,2,4), may need to do n-rank - correct_rank[r_idx,c_idx] = match(infected_irs, combined$ir[order(combined$comb, decreasing=T)]) - # maybe should also mark down vic/nonvic status of each? - } - -} -print(proc.time()-ptm) - - -##### Plot results -hist(correct_rank3,150,xlim=c(0,vcount(lcc)),col=rgb(0,0,1,1/8), - xlab='Risk Ranking of Victims',main='') -hist(correct_rank1,150,xlim=c(0,vcount(lcc)),col=rgb(1,0,1,1/8),add=T) -hist(correct_rank2,150,xlim=c(0,vcount(lcc)),col=rgb(1,0,1,1/8),add=T) -legend("topright", c("Demographics Model", "Cascade Model"), - fill=c(rgb(1,0,1,1/8), rgb(0,0,1,1/8))) - -counts = matrix(c(colSums(correct_rank<(vcount(lcc)/1000))*100/nvics, - colSums(correct_rank<(vcount(lcc)/200))*100/nvics, - colSums(correct_rank<(vcount(lcc)/100))*100/nvics), - nrow=3, byrow=T) -plot(lambdas,counts[1,],log='x',type='l') - -correct_rank1 = correct_rank[,length(lambdas)] -correct_rank2 = correct_rank[,1] -correct_rank3 = correct_rank[,which.min(colMeans(correct_rank))] -counts = matrix(c(sum(correct_rank1<(vcount(lcc)*0.001)), - sum(correct_rank1<(vcount(lcc)*0.005)), - sum(correct_rank1<(vcount(lcc)*0.01)), - sum(correct_rank2<(vcount(lcc)*0.001)), - sum(correct_rank2<(vcount(lcc)*0.005)), - sum(correct_rank2<(vcount(lcc)*0.01)), - sum(correct_rank3<(vcount(lcc)*0.001)), - sum(correct_rank3<(vcount(lcc)*0.005)), - sum(correct_rank3<(vcount(lcc)*0.01))), - nrow=3, byrow=T) -counts = counts*100/nvics -barplot(counts, - xlab="Size of High-Risk Population", - ylab="Percent of Victims Predicted", - names.arg=c('0.1%','0.5%','1%'),ylim=c(0,max(counts)*1.1), - col=c(rgb(0,0,1,1/2),rgb(1,0,0,1/2),rgb(0,1,0,1/2)), - beside=TRUE) -legend("topleft", inset=0.05, - c("Demographics Model", "Cascade Model", "Combined Model"), - fill=c(rgb(0,0,1,1/2),rgb(1,0,0,1/2),rgb(0,1,0,1/2))) -box(which='plot') -par(new=T) -counts = counts/(100/nvics) -barplot(counts, - ylim=c(0,max(counts)*1.1), - col=c(rgb(0,0,1,0),rgb(1,0,0,0),rgb(0,1,0,0)), - beside=TRUE) -axis(side = 4) -mtext(side = 4, line = 3, "Number of Victims Predicted") - -popsizes = c(0.1, 0.5, 1) -plot(popsizes,counts[1,],type='l',ylim=c(0,max(counts))) -lines(popsizes,counts[2,]) -lines(popsizes,counts[3,]) -lines(c(0,1),c(0,1)) - -#### Precision-Recall Curve -plot(ecdf(correct_rank1),col='red',xlim=c(0,vcount(lcc)),lwd=2) -plot(ecdf(correct_rank2),col='darkblue',lwd=2,add=T) -plot(ecdf(correct_rank3),col='darkgreen',lwd=2,add=T) -legend("bottomright", inset=0.05, - c("Demographics Model", "Cascade Model", "Combined Model"), - fill=c('red','darkblue','darkgreen')) -lines(c(0,vcount(lcc)),c(0,1)) |
