summaryrefslogtreecommitdiffstats
path: root/R Scripts/predict-victims-plots.R
diff options
context:
space:
mode:
authorBen Green <ben@SEASITs-MacBook-Pro.local>2015-06-28 17:38:33 -0400
committerBen Green <ben@SEASITs-MacBook-Pro.local>2015-06-28 17:38:33 -0400
commit6e527bbf612465bf5d739b9652abc0165550993c (patch)
tree9525bed16d9e4568747855afd84a03937090f1cb /R Scripts/predict-victims-plots.R
parent7167a81cfb8b872dd1547e5a8669004b191417db (diff)
downloadcriminal_cascades-6e527bbf612465bf5d739b9652abc0165550993c.tar.gz
Worked on synthetic data recovery so we can tell how high the actual
infector is ranked among all potential parents. Cleaned up code for the predicting victims benchmarking test.
Diffstat (limited to 'R Scripts/predict-victims-plots.R')
-rw-r--r--R Scripts/predict-victims-plots.R61
1 files changed, 61 insertions, 0 deletions
diff --git a/R Scripts/predict-victims-plots.R b/R Scripts/predict-victims-plots.R
new file mode 100644
index 0000000..8a93667
--- /dev/null
+++ b/R Scripts/predict-victims-plots.R
@@ -0,0 +1,61 @@
+##### Plot results
+hist(correct_rank3,150,xlim=c(0,vcount(lcc)),col=rgb(0,0,1,1/8),
+ xlab='Risk Ranking of Victims',main='')
+hist(correct_rank1,150,xlim=c(0,vcount(lcc)),col=rgb(1,0,1,1/8),add=T)
+hist(correct_rank2,150,xlim=c(0,vcount(lcc)),col=rgb(1,0,1,1/8),add=T)
+legend("topright", c("Demographics Model", "Cascade Model"),
+ fill=c(rgb(1,0,1,1/8), rgb(0,0,1,1/8)))
+
+counts = matrix(c(colSums(correct_rank<(vcount(lcc)/1000))*100/nvics,
+ colSums(correct_rank<(vcount(lcc)/200))*100/nvics,
+ colSums(correct_rank<(vcount(lcc)/100))*100/nvics),
+ nrow=3, byrow=T)
+plot(lambdas,counts[1,],log='x',type='l')
+
+correct_rank1 = correct_rank[,length(lambdas)]
+correct_rank2 = correct_rank[,1]
+correct_rank3 = correct_rank[,which.min(colMeans(correct_rank))]
+counts = matrix(c(sum(correct_rank1<(vcount(lcc)*0.001)),
+ sum(correct_rank1<(vcount(lcc)*0.005)),
+ sum(correct_rank1<(vcount(lcc)*0.01)),
+ sum(correct_rank2<(vcount(lcc)*0.001)),
+ sum(correct_rank2<(vcount(lcc)*0.005)),
+ sum(correct_rank2<(vcount(lcc)*0.01)),
+ sum(correct_rank3<(vcount(lcc)*0.001)),
+ sum(correct_rank3<(vcount(lcc)*0.005)),
+ sum(correct_rank3<(vcount(lcc)*0.01))),
+ nrow=3, byrow=T)
+counts = counts*100/nvics
+barplot(counts,
+ xlab="Size of High-Risk Population",
+ ylab="Percent of Victims Predicted",
+ names.arg=c('0.1%','0.5%','1%'),ylim=c(0,max(counts)*1.1),
+ col=c(rgb(0,0,1,1/2),rgb(1,0,0,1/2),rgb(0,1,0,1/2)),
+ beside=TRUE)
+legend("topleft", inset=0.05,
+ c("Demographics Model", "Cascade Model", "Combined Model"),
+ fill=c(rgb(0,0,1,1/2),rgb(1,0,0,1/2),rgb(0,1,0,1/2)))
+box(which='plot')
+par(new=T)
+counts = counts/(100/nvics)
+barplot(counts,
+ ylim=c(0,max(counts)*1.1),
+ col=c(rgb(0,0,1,0),rgb(1,0,0,0),rgb(0,1,0,0)),
+ beside=TRUE)
+axis(side = 4)
+mtext(side = 4, line = 3, "Number of Victims Predicted")
+
+popsizes = c(0.1, 0.5, 1)
+plot(popsizes,counts[1,],type='l',ylim=c(0,max(counts)))
+lines(popsizes,counts[2,])
+lines(popsizes,counts[3,])
+lines(c(0,1),c(0,1))
+
+#### Precision-Recall Curve
+plot(ecdf(correct_rank1),col='red',xlim=c(0,vcount(lcc)),lwd=2)
+plot(ecdf(correct_rank2),col='darkblue',lwd=2,add=T)
+plot(ecdf(correct_rank3),col='darkgreen',lwd=2,add=T)
+legend("bottomright", inset=0.05,
+ c("Demographics Model", "Cascade Model", "Combined Model"),
+ fill=c('red','darkblue','darkgreen'))
+lines(c(0,vcount(lcc)),c(0,1))