summaryrefslogtreecommitdiffstats
path: root/R Scripts/benchmarking.R
diff options
context:
space:
mode:
Diffstat (limited to 'R Scripts/benchmarking.R')
-rw-r--r--R Scripts/benchmarking.R130
1 files changed, 0 insertions, 130 deletions
diff --git a/R Scripts/benchmarking.R b/R Scripts/benchmarking.R
deleted file mode 100644
index 1cf99e0..0000000
--- a/R Scripts/benchmarking.R
+++ /dev/null
@@ -1,130 +0,0 @@
-library(igraph)
-setwd('~/Documents/Cascade Project')
-load('Raw Data/lcc.RData')
-load('Results/hyper-lcc.RData')
-load('Results/dag_dat_all.RData')
-source('Scripts/temporal.R')
-source('Scripts/structural.R')
-
-##### Initialize data
-formula = vic ~ sex + race + age + gang.member + gang.name
-lcc_verts$sex = as.factor(lcc_verts$sex)
-lcc_verts$race = as.factor(lcc_verts$race)
-lcc_verts$age = as.numeric(lcc_verts$age)
-lcc_verts$gang.name = as.factor(lcc_verts$gang.name)
-# sum(hyp_lcc_verts$vic)/length(days)
-
-##### Loop through days
-alpha = 1/100
-gamma = 0.18
-days = 70:max(hyp_lcc_verts$vic.day, na.rm=T)
-lambdas = 0#c(0, exp(seq(log(0.0000001), log(.0005), length.out=150)), 1)
-nvics = sum(hyp_lcc_verts$vic.day %in% days)
-correct_rank = matrix(nrow=nvics, ncol=length(lambdas))
-# correct_rank1 = correct_rank2 = correct_rank3 = c()
-edges_all = dag_dat_all[dag_dat_all$dist<2,]
-ptm = proc.time()
-for (day in days){
- if (day %% 100 == 0) print(day)
-
- ##### Demographics model
-# vics = match(unique(hyp_lcc_verts$ir_no[which(hyp_lcc_verts$vic.day<day)]),lcc_verts$name)
-# victims = lcc_verts[,c('vic','sex','race','age','gang.member','gang.name')]
-# victims$vic[vics] = TRUE
-# victims$vic[-vics] = FALSE
-# # glm.fit = glm(formula, data=victims, family=binomial)
-# glm.fit = lm(formula, data=victims)
-# glm.probs = predict(glm.fit, newdata=lcc_verts, type='response')
-
- ##### Cascade Model
- edges = edges_all[which(edges_all$t1<day),]
- f = temporal(edges$t1, day, alpha)
- h = structural(gamma,edges$dist)
- weights = f*h
- ids = edges$to
- irs = hyp_lcc_verts$ir_no[ids]
- risk = data.frame(id=ids, ir=irs, weight=weights)
- risk = risk[order(weights, decreasing=T),]
- risk = risk[match(unique(risk$ir),risk$ir),]
-# maybe need to change this to reflect new algorithm that accounts for \tilde{p}
-
- ##### Combined Model
-# combined = data.frame(ir=attr(glm.probs,'name'), dem=as.numeric(glm.probs), cas=0, comb=0)
- combined$cas[match(risk$ir, attr(glm.probs,'name'))] = risk$weight
-
- ##### Gather results
- infected_irs = hyp_lcc_verts$ir_no[which(hyp_lcc_verts$vic.day==day)]
- for (lambda in lambdas){
- combined$comb = combined$cas#lambda*combined$dem + (1-lambda)*(1-combined$dem)*combined$cas
- c_idx = which(lambdas==lambda)
- r_idx = head(which(is.na(correct_rank[,c_idx])),length(infected_irs))
- # !! order should be first: rank of (3,5,5,7) should be (1,2,2,4), may need to do n-rank
- correct_rank[r_idx,c_idx] = match(infected_irs, combined$ir[order(combined$comb, decreasing=T)])
- # maybe should also mark down vic/nonvic status of each?
- }
-
-}
-print(proc.time()-ptm)
-
-
-##### Plot results
-hist(correct_rank3,150,xlim=c(0,vcount(lcc)),col=rgb(0,0,1,1/8),
- xlab='Risk Ranking of Victims',main='')
-hist(correct_rank1,150,xlim=c(0,vcount(lcc)),col=rgb(1,0,1,1/8),add=T)
-hist(correct_rank2,150,xlim=c(0,vcount(lcc)),col=rgb(1,0,1,1/8),add=T)
-legend("topright", c("Demographics Model", "Cascade Model"),
- fill=c(rgb(1,0,1,1/8), rgb(0,0,1,1/8)))
-
-counts = matrix(c(colSums(correct_rank<(vcount(lcc)/1000))*100/nvics,
- colSums(correct_rank<(vcount(lcc)/200))*100/nvics,
- colSums(correct_rank<(vcount(lcc)/100))*100/nvics),
- nrow=3, byrow=T)
-plot(lambdas,counts[1,],log='x',type='l')
-
-correct_rank1 = correct_rank[,length(lambdas)]
-correct_rank2 = correct_rank[,1]
-correct_rank3 = correct_rank[,which.min(colMeans(correct_rank))]
-counts = matrix(c(sum(correct_rank1<(vcount(lcc)*0.001)),
- sum(correct_rank1<(vcount(lcc)*0.005)),
- sum(correct_rank1<(vcount(lcc)*0.01)),
- sum(correct_rank2<(vcount(lcc)*0.001)),
- sum(correct_rank2<(vcount(lcc)*0.005)),
- sum(correct_rank2<(vcount(lcc)*0.01)),
- sum(correct_rank3<(vcount(lcc)*0.001)),
- sum(correct_rank3<(vcount(lcc)*0.005)),
- sum(correct_rank3<(vcount(lcc)*0.01))),
- nrow=3, byrow=T)
-counts = counts*100/nvics
-barplot(counts,
- xlab="Size of High-Risk Population",
- ylab="Percent of Victims Predicted",
- names.arg=c('0.1%','0.5%','1%'),ylim=c(0,max(counts)*1.1),
- col=c(rgb(0,0,1,1/2),rgb(1,0,0,1/2),rgb(0,1,0,1/2)),
- beside=TRUE)
-legend("topleft", inset=0.05,
- c("Demographics Model", "Cascade Model", "Combined Model"),
- fill=c(rgb(0,0,1,1/2),rgb(1,0,0,1/2),rgb(0,1,0,1/2)))
-box(which='plot')
-par(new=T)
-counts = counts/(100/nvics)
-barplot(counts,
- ylim=c(0,max(counts)*1.1),
- col=c(rgb(0,0,1,0),rgb(1,0,0,0),rgb(0,1,0,0)),
- beside=TRUE)
-axis(side = 4)
-mtext(side = 4, line = 3, "Number of Victims Predicted")
-
-popsizes = c(0.1, 0.5, 1)
-plot(popsizes,counts[1,],type='l',ylim=c(0,max(counts)))
-lines(popsizes,counts[2,])
-lines(popsizes,counts[3,])
-lines(c(0,1),c(0,1))
-
-#### Precision-Recall Curve
-plot(ecdf(correct_rank1),col='red',xlim=c(0,vcount(lcc)),lwd=2)
-plot(ecdf(correct_rank2),col='darkblue',lwd=2,add=T)
-plot(ecdf(correct_rank3),col='darkgreen',lwd=2,add=T)
-legend("bottomright", inset=0.05,
- c("Demographics Model", "Cascade Model", "Combined Model"),
- fill=c('red','darkblue','darkgreen'))
-lines(c(0,vcount(lcc)),c(0,1))