Worked on synthetic data recovery so we can tell how high the actual

infector is ranked among all potential parents. Cleaned up code for the predicting victims benchmarking test.
author: Ben Green <ben@SEASITs-MacBook-Pro.local> 2015-06-28 17:38:33 -0400
committer: Ben Green <ben@SEASITs-MacBook-Pro.local> 2015-06-28 17:38:33 -0400
commit: 6e527bbf612465bf5d739b9652abc0165550993c (patch)
tree: 9525bed16d9e4568747855afd84a03937090f1cb /R Scripts/predict-victims-plots.R
parent: 7167a81cfb8b872dd1547e5a8669004b191417db (diff)
download: criminal_cascades-6e527bbf612465bf5d739b9652abc0165550993c.tar.gz
1 files changed, 61 insertions, 0 deletions
diff --git a/R Scripts/predict-victims-plots.R b/R Scripts/predict-victims-plots.R
new file mode 100644
index 0000000..8a93667
--- /dev/null
+++ b/R Scripts/predict-victims-plots.R
@@ -0,0 +1,61 @@
+##### Plot results
+hist(correct_rank3,150,xlim=c(0,vcount(lcc)),col=rgb(0,0,1,1/8),
+     xlab='Risk Ranking of Victims',main='')
+hist(correct_rank1,150,xlim=c(0,vcount(lcc)),col=rgb(1,0,1,1/8),add=T)
+hist(correct_rank2,150,xlim=c(0,vcount(lcc)),col=rgb(1,0,1,1/8),add=T)
+legend("topright", c("Demographics Model", "Cascade Model"), 
+       fill=c(rgb(1,0,1,1/8), rgb(0,0,1,1/8)))
+
+counts = matrix(c(colSums(correct_rank<(vcount(lcc)/1000))*100/nvics,
+                  colSums(correct_rank<(vcount(lcc)/200))*100/nvics,
+                  colSums(correct_rank<(vcount(lcc)/100))*100/nvics),
+                nrow=3, byrow=T)
+plot(lambdas,counts[1,],log='x',type='l')
+
+correct_rank1 = correct_rank[,length(lambdas)]
+correct_rank2 = correct_rank[,1]
+correct_rank3 = correct_rank[,which.min(colMeans(correct_rank))]
+counts = matrix(c(sum(correct_rank1<(vcount(lcc)*0.001)),
+                  sum(correct_rank1<(vcount(lcc)*0.005)),
+                  sum(correct_rank1<(vcount(lcc)*0.01)),
+                  sum(correct_rank2<(vcount(lcc)*0.001)),
+                  sum(correct_rank2<(vcount(lcc)*0.005)),
+                  sum(correct_rank2<(vcount(lcc)*0.01)),
+                  sum(correct_rank3<(vcount(lcc)*0.001)),
+                  sum(correct_rank3<(vcount(lcc)*0.005)),
+                  sum(correct_rank3<(vcount(lcc)*0.01))),
+                nrow=3, byrow=T)
+counts = counts*100/nvics
+barplot(counts, 
+        xlab="Size of High-Risk Population",
+        ylab="Percent of Victims Predicted",
+        names.arg=c('0.1%','0.5%','1%'),ylim=c(0,max(counts)*1.1),
+        col=c(rgb(0,0,1,1/2),rgb(1,0,0,1/2),rgb(0,1,0,1/2)),
+        beside=TRUE)
+legend("topleft", inset=0.05, 
+       c("Demographics Model", "Cascade Model", "Combined Model"), 
+       fill=c(rgb(0,0,1,1/2),rgb(1,0,0,1/2),rgb(0,1,0,1/2)))
+box(which='plot')
+par(new=T)
+counts = counts/(100/nvics)
+barplot(counts, 
+        ylim=c(0,max(counts)*1.1),
+        col=c(rgb(0,0,1,0),rgb(1,0,0,0),rgb(0,1,0,0)),
+        beside=TRUE)
+axis(side = 4)
+mtext(side = 4, line = 3, "Number of Victims Predicted")
+
+popsizes = c(0.1, 0.5, 1)
+plot(popsizes,counts[1,],type='l',ylim=c(0,max(counts)))
+lines(popsizes,counts[2,])
+lines(popsizes,counts[3,])
+lines(c(0,1),c(0,1))
+
+#### Precision-Recall Curve
+plot(ecdf(correct_rank1),col='red',xlim=c(0,vcount(lcc)),lwd=2)
+plot(ecdf(correct_rank2),col='darkblue',lwd=2,add=T)
+plot(ecdf(correct_rank3),col='darkgreen',lwd=2,add=T)
+legend("bottomright", inset=0.05, 
+       c("Demographics Model", "Cascade Model", "Combined Model"), 
+       fill=c('red','darkblue','darkgreen'))
+lines(c(0,vcount(lcc)),c(0,1))
author	Ben Green <ben@SEASITs-MacBook-Pro.local>	2015-06-28 17:38:33 -0400
committer	Ben Green <ben@SEASITs-MacBook-Pro.local>	2015-06-28 17:38:33 -0400
commit	6e527bbf612465bf5d739b9652abc0165550993c (patch)
tree	9525bed16d9e4568747855afd84a03937090f1cb /R Scripts/predict-victims-plots.R
parent	7167a81cfb8b872dd1547e5a8669004b191417db (diff)
download	criminal_cascades-6e527bbf612465bf5d739b9652abc0165550993c.tar.gz