diff options
Diffstat (limited to 'R Scripts')
| -rwxr-xr-x | R Scripts/analyze-cascades.R | 32 | ||||
| -rwxr-xr-x | R Scripts/data-exploration.R | 7 | ||||
| -rw-r--r-- | R Scripts/plot-power-law.R | 26 | ||||
| -rw-r--r-- | R Scripts/predict-victims-plots.R | 8 |
4 files changed, 56 insertions, 17 deletions
diff --git a/R Scripts/analyze-cascades.R b/R Scripts/analyze-cascades.R index 60c59fb..04cb0e3 100755 --- a/R Scripts/analyze-cascades.R +++ b/R Scripts/analyze-cascades.R @@ -1,15 +1,25 @@ library(igraph) +library(RColorBrewer) +library(poweRlaw) setwd("~/Documents/Violence Cascades/") start_date = as.Date("2005-12-31") # plot cascade sizes data = read.csv('Results/components_dist-91515.csv',header=F) data = data[order(data$V1),] -sizes = data$V1 -counts = data$V2 -plot(sizes,counts,log='xy',type='o',lwd=3, - xlab='Size of Cascade', ylab='Number of Cascades', main='Distribution of Cascade Sizes') +sizes = 1:max(data$V1) +counts = rep(0,max(sizes)) +counts[data$V1] = data$V2 +pl = power.law.fit(counts) +plot(sizes,counts,log='xy',pch=20,col='#377EB8', + xlab='Size of Cascade', ylab='Number of Cascades', main='') +lines(c(1,1)) +sizes_pl = displ$new(counts[counts>1]) +est = estimate_xmin(sizes_pl) +sizes_pl$setXmin(est) +plot(sizes_pl) +lines(sizes_pl, col=2) # plot cascades edges = read.csv('Results/edges-91515.csv',header=F, @@ -24,18 +34,20 @@ membership = clusters$membership csize = clusters$csize order = rev(order(csize)) -i = 95 +i = 25 V = which(clusters(dag)$membership==order[i]) # get all nodes in cluster cc = induced.subgraph(dag,V) times = as.numeric(V(cc)$name) %% 10000 start_date + range(times) ### plot cascade ### -cols = rep('lightblue',vcount(cc)) +cb = brewer.pal(3,'Paired') +cols = rep('#1f78b4',vcount(cc)) seed = which(degree(cc,mode='in')==0) -cols[seed] = 'red' -plot(cc,vertex.size=5,edge.arrow.size=0.2,vertex.color=cols,vertex.label=NA, - layout=layout.reingold.tilford(cc,root=seed)) +cols[seed] = '#d95f02' +plot(cc,vertex.size=8,edge.arrow.size=0.2,vertex.color=cols,vertex.label=NA, + layout=layout.reingold.tilford(cc,root=seed), + edge.color='black',vertex.frame.color=NA) -par(mfrow = c(2,3)) +par(mfrow = c(1,3)) diff --git a/R Scripts/data-exploration.R b/R Scripts/data-exploration.R index f9617f9..16e7bba 100755 --- a/R Scripts/data-exploration.R +++ b/R Scripts/data-exploration.R @@ -28,9 +28,10 @@ S = data.frame(C_dat = trl, S ##### Degree Distribution -plot(degree.distribution(lcc)*vcount(lcc),log='xy',type='l',col='red',lwd=2, - xlab='Degree', ylab='Number of Vertices', main='Degree Distribution') - +plot(1:max(degree(lcc)),degree.distribution(lcc)[-1]*vcount(lcc), + log='xy',col='#377EB8',pch=20, + xlab='Degree', ylab='Number of Vertices', main='') +pl = power.law.fit(degree.distribution(lcc)) ##### Victims vic_ids = which(V(lcc)$vic==TRUE) diff --git a/R Scripts/plot-power-law.R b/R Scripts/plot-power-law.R new file mode 100644 index 0000000..8cca228 --- /dev/null +++ b/R Scripts/plot-power-law.R @@ -0,0 +1,26 @@ +plot(1:max(degree(lcc)),degree.distribution(lcc)[-1], + log='xy',col='#377EB8',pch=20, + xlab='Degree', ylab='Number of Vertices', main='') + + +# plot and fit the power law distribution +# calculate degree +d = degree(graph, mode = "all") +dd = degree.distribution(graph, mode = "all", cumulative = FALSE) +degree = 1:max(d) +probability = dd[-1] +# delete blank values +nonzero.position = which(probability != 0) +probability = probability[nonzero.position] +degree = degree[nonzero.position] +reg = lm(log(probability) ~ log(degree)) +cozf = coef(reg) +power.law.fit = function(x) exp(cozf[[1]] + cozf[[2]] * log(x)) +alpha = -cozf[[2]] +print(paste("Alpha =", round(alpha, 3))) +# plot +print(d) +curve(power.law.fit, col = "red", add = T, n = length(d)) + + +# fit_power_law(lcc) diff --git a/R Scripts/predict-victims-plots.R b/R Scripts/predict-victims-plots.R index 179a60b..0e1b669 100644 --- a/R Scripts/predict-victims-plots.R +++ b/R Scripts/predict-victims-plots.R @@ -24,7 +24,7 @@ counts = matrix(c( sum(correct_rank1<(vcount(lcc)*popsizes[1])), sum(correct_rank3<(vcount(lcc)*popsizes[2])), sum(correct_rank3<(vcount(lcc)*popsizes[3]))), nrow=3, byrow=T) -cols = brewer.pal(3,'Paired'); cols=cols[c(3,1,2)] +cols = brewer.pal(3,'Dark2');# cols=cols[c(3,1,2)] barplot(counts, border=NA, xlab="Size of High-Risk Population", ylab="Victims Predicted", @@ -43,10 +43,10 @@ mtext(side = 4, line = 3, "Number of Victims Predicted") #### Precision-Recall Curve -plot(ecdf(correct_rank1),col=cols[1],lwd=2,main='', +plot(ecdf(correct_rank1),col=cols[1],lwd=3,main='', xlab='Ranking',ylab='CDF',xlim=c(0,140000)) -plot(ecdf(correct_rank2),col=cols[2],lwd=2,add=T) -plot(ecdf(correct_rank3),col=cols[3],lwd=2,add=T) +plot(ecdf(correct_rank2),col=cols[2],lwd=3,add=T) +plot(ecdf(correct_rank3),col=cols[3],lwd=3,add=T) legend("bottomright", inset=0.05, c("Demographics Model", "Contagion Model", "Combined Model"), fill=cols,border=NA,bty='n') |
