From c2eec4bb82cffe6ac454f4de62e828fe176c7e5c Mon Sep 17 00:00:00 2001 From: Ben Green Date: Thu, 17 Sep 2015 23:37:30 -0400 Subject: some plotting stuff --- R Scripts/analyze-cascades.R | 41 +++++++++------------------------------ R Scripts/predict-victims-plots.R | 31 ++++++++++++++++------------- R Scripts/predict-victims.R | 2 +- 3 files changed, 27 insertions(+), 47 deletions(-) diff --git a/R Scripts/analyze-cascades.R b/R Scripts/analyze-cascades.R index b1ce3c3..60c59fb 100755 --- a/R Scripts/analyze-cascades.R +++ b/R Scripts/analyze-cascades.R @@ -1,5 +1,6 @@ library(igraph) setwd("~/Documents/Violence Cascades/") +start_date = as.Date("2005-12-31") # plot cascade sizes data = read.csv('Results/components_dist-91515.csv',header=F) @@ -13,52 +14,28 @@ plot(sizes,counts,log='xy',type='o',lwd=3, # plot cascades edges = read.csv('Results/edges-91515.csv',header=F, col.names=c('v1','t1','v2','t2','dist')) -for(id in unique(union(edges$v1,edges$v2))){ - e = edges[union(match(id,edges$v1), match(id,edges$v2)),] - times = sort(union(e$t1[e$v1==id],e$t2[e$v2==id])) - if (length(times)>1){ - for(time in times[-1]){ - idx = which(time==times) - edges$v1[as.numeric(rownames(e))[e$t1==time]] = e$v1[e$t1==time] + (idx-1)/length(times) - edges$v2[as.numeric(rownames(e))[e$t2==time]] = e$v2[e$t2==time] + (idx-1)/length(times) - } - } -} +edges$v1 = edges$v1*10000 + edges$t1 +edges$v2 = edges$v2*10000 + edges$t2 dag = graph.data.frame(edges[,c(1,3)], directed=TRUE) -table(clusters(dag)$csize) - - - +table(components(dag,mode='weak')$csize) clusters = clusters(dag) membership = clusters$membership csize = clusters$csize order = rev(order(csize)) - -i = 4 +i = 95 V = which(clusters(dag)$membership==order[i]) # get all nodes in cluster cc = induced.subgraph(dag,V) -Vi = vic_ids[V] -Ei = intersect(which(dag_dat_vics$from[realized] %in% Vi),which(dag_dat_vics$to[realized] %in% Vi)) -cc_dat = (dag_dat_vics[realized,])[Ei,] +times = as.numeric(V(cc)$name) %% 10000 +start_date + range(times) ### plot cascade ### cols = rep('lightblue',vcount(cc)) seed = which(degree(cc,mode='in')==0) cols[seed] = 'red' -plot(cc,vertex.size=10,edge.arrow.size=0.5,vertex.color=cols,vertex.label.cex=1, - edge.width=E(cc)$weight*20/max(E(cc)$weight),layout=layout.reingold.tilford(cc,root=seed), - vertex.label=V(hyp_lcc)$vic.day[Vi]) -plot(cc,vertex.size=10,edge.arrow.size=0.5,vertex.color=cols,vertex.label.cex=1, +plot(cc,vertex.size=5,edge.arrow.size=0.2,vertex.color=cols,vertex.label=NA, layout=layout.reingold.tilford(cc,root=seed)) - -### basic graph statistics -trl = mean(transitivity(cc,type='local',isolates='zero')) -apl = average.path.length(cc) -indeg = degree(cc,mode='in') -outdeg = degree(cc,mode='out') -ds = mean(cc_dat$dist) - +par(mfrow = c(2,3)) diff --git a/R Scripts/predict-victims-plots.R b/R Scripts/predict-victims-plots.R index 3dfba60..179a60b 100644 --- a/R Scripts/predict-victims-plots.R +++ b/R Scripts/predict-victims-plots.R @@ -1,5 +1,11 @@ ##### Plot results -# load('Results/correct_rank_91415.RData') +library(RColorBrewer) +library(igraph) +setwd('~/Documents/Violence Cascades/') +load('Raw Data/lcc.RData') +load('Results/correct_rank_91415.RData') + +####### nvics = dim(correct_rank)[1] correct_rank1 = correct_rank[,length(lambdas)] # demographics model @@ -18,33 +24,30 @@ counts = matrix(c( sum(correct_rank1<(vcount(lcc)*popsizes[1])), sum(correct_rank3<(vcount(lcc)*popsizes[2])), sum(correct_rank3<(vcount(lcc)*popsizes[3]))), nrow=3, byrow=T) -barplot(counts, +cols = brewer.pal(3,'Paired'); cols=cols[c(3,1,2)] +barplot(counts, border=NA, xlab="Size of High-Risk Population", ylab="Victims Predicted", names.arg=paste(as.character(popsizes*100),'%',sep=''), ylim=c(0,max(counts)*1.1),axes=F, - col=c(rgb(0,0,1,1/2),rgb(1,0,0,1/2),rgb(0,1,0,1/2)), + col=cols, beside=TRUE) axis(2, at=pretty(counts*100/nvics)*nvics/100, lab=paste0(pretty(counts*100/nvics), "%"), las=TRUE) legend("topleft", inset=0.05, c("Demographics Model", "Contagion Model", "Combined Model"), - fill=c(rgb(0,0,1,1/2),rgb(1,0,0,1/2),rgb(0,1,0,1/2))) + fill=cols,border=NA,bty='n') box(which='plot') - par(new=T) -barplot(counts, - ylim=c(0,max(counts)*1.1), - col=c(rgb(0,0,1,0),rgb(1,0,0,0),rgb(0,1,0,0)), - beside=TRUE,axes=F) axis(side=4, at=pretty(counts), lab=pretty(counts)) mtext(side = 4, line = 3, "Number of Victims Predicted") #### Precision-Recall Curve -plot(ecdf(correct_rank1),col='red',lwd=2,main='') -plot(ecdf(correct_rank2),col='darkblue',lwd=2,add=T) -plot(ecdf(correct_rank3),col='darkgreen',lwd=2,add=T) +plot(ecdf(correct_rank1),col=cols[1],lwd=2,main='', + xlab='Ranking',ylab='CDF',xlim=c(0,140000)) +plot(ecdf(correct_rank2),col=cols[2],lwd=2,add=T) +plot(ecdf(correct_rank3),col=cols[3],lwd=2,add=T) legend("bottomright", inset=0.05, - c("Demographics Model", "Cascade Model", "Combined Model"), - fill=c('red','darkblue','darkgreen')) + c("Demographics Model", "Contagion Model", "Combined Model"), + fill=cols,border=NA,bty='n') diff --git a/R Scripts/predict-victims.R b/R Scripts/predict-victims.R index 3a531db..d17ef83 100644 --- a/R Scripts/predict-victims.R +++ b/R Scripts/predict-victims.R @@ -106,4 +106,4 @@ for(i in 1:length(days)){ print(proc.time()-ptm) } -# save(correct_rank, file='Results/correct_rank_91515.RData') \ No newline at end of file +# save(correct_rank, lambdas, file='Results/correct_rank_91515.RData') \ No newline at end of file -- cgit v1.2.3-70-g09d2