4 files changed, 56 insertions, 17 deletions
diff --git a/R Scripts/analyze-cascades.R b/R Scripts/analyze-cascades.R
index 60c59fb..04cb0e3 100755
--- a/R Scripts/analyze-cascades.R
+++ b/R Scripts/analyze-cascades.R
@@ -1,15 +1,25 @@
 library(igraph)
+library(RColorBrewer)
+library(poweRlaw)
 setwd("~/Documents/Violence Cascades/")
 start_date = as.Date("2005-12-31")
 
 # plot cascade sizes
 data = read.csv('Results/components_dist-91515.csv',header=F)
 data = data[order(data$V1),]
-sizes = data$V1
-counts = data$V2
-plot(sizes,counts,log='xy',type='o',lwd=3,
-     xlab='Size of Cascade', ylab='Number of Cascades', main='Distribution of Cascade Sizes')
+sizes = 1:max(data$V1)
+counts = rep(0,max(sizes))
+counts[data$V1] = data$V2
+pl = power.law.fit(counts)
+plot(sizes,counts,log='xy',pch=20,col='#377EB8',
+     xlab='Size of Cascade', ylab='Number of Cascades', main='')
+lines(c(1,1))
 
+sizes_pl = displ$new(counts[counts>1])
+est = estimate_xmin(sizes_pl)
+sizes_pl$setXmin(est)
+plot(sizes_pl)
+lines(sizes_pl, col=2)
 
 # plot cascades
 edges = read.csv('Results/edges-91515.csv',header=F,
@@ -24,18 +34,20 @@ membership = clusters$membership
 csize = clusters$csize
 order = rev(order(csize))
 
-i = 95
+i = 25
 V = which(clusters(dag)$membership==order[i]) # get all nodes in cluster
 cc = induced.subgraph(dag,V)
 times = as.numeric(V(cc)$name) %% 10000
 start_date + range(times)
 
 ### plot cascade ###
-cols = rep('lightblue',vcount(cc))
+cb = brewer.pal(3,'Paired')
+cols = rep('#1f78b4',vcount(cc))
 seed = which(degree(cc,mode='in')==0)
-cols[seed] = 'red'
-plot(cc,vertex.size=5,edge.arrow.size=0.2,vertex.color=cols,vertex.label=NA,
-     layout=layout.reingold.tilford(cc,root=seed))
+cols[seed] = '#d95f02'
+plot(cc,vertex.size=8,edge.arrow.size=0.2,vertex.color=cols,vertex.label=NA,
+     layout=layout.reingold.tilford(cc,root=seed),
+     edge.color='black',vertex.frame.color=NA)
 
-par(mfrow = c(2,3))
+par(mfrow = c(1,3))
 
diff --git a/R Scripts/data-exploration.R b/R Scripts/data-exploration.R
index f9617f9..16e7bba 100755
--- a/R Scripts/data-exploration.R
+++ b/R Scripts/data-exploration.R
@@ -28,9 +28,10 @@ S = data.frame(C_dat = trl,
 S
 
 ##### Degree Distribution
-plot(degree.distribution(lcc)*vcount(lcc),log='xy',type='l',col='red',lwd=2,
-      xlab='Degree', ylab='Number of Vertices', main='Degree Distribution')
-
+plot(1:max(degree(lcc)),degree.distribution(lcc)[-1]*vcount(lcc),
+     log='xy',col='#377EB8',pch=20,
+     xlab='Degree', ylab='Number of Vertices', main='')
+pl = power.law.fit(degree.distribution(lcc))
 
 ##### Victims
 vic_ids = which(V(lcc)$vic==TRUE)
diff --git a/R Scripts/plot-power-law.R b/R Scripts/plot-power-law.R
new file mode 100644
index 0000000..8cca228
--- /dev/null
+++ b/R Scripts/plot-power-law.R
@@ -0,0 +1,26 @@
+plot(1:max(degree(lcc)),degree.distribution(lcc)[-1],
+     log='xy',col='#377EB8',pch=20,
+     xlab='Degree', ylab='Number of Vertices', main='')
+
+
+# plot and fit the power law distribution
+# calculate degree
+d = degree(graph, mode = "all")
+dd = degree.distribution(graph, mode = "all", cumulative = FALSE)
+degree = 1:max(d)
+probability = dd[-1]
+# delete blank values
+nonzero.position = which(probability != 0)
+probability = probability[nonzero.position]
+degree = degree[nonzero.position]
+reg = lm(log(probability) ~ log(degree))
+cozf = coef(reg)
+power.law.fit = function(x) exp(cozf[[1]] + cozf[[2]] * log(x))
+alpha = -cozf[[2]]
+print(paste("Alpha =", round(alpha, 3)))
+# plot
+print(d)
+curve(power.law.fit, col = "red", add = T, n = length(d))
+
+
+# fit_power_law(lcc)
diff --git a/R Scripts/predict-victims-plots.R b/R Scripts/predict-victims-plots.R
index 179a60b..0e1b669 100644
--- a/R Scripts/predict-victims-plots.R
+++ b/R Scripts/predict-victims-plots.R
@@ -24,7 +24,7 @@ counts = matrix(c( sum(correct_rank1<(vcount(lcc)*popsizes[1])),
                   sum(correct_rank3<(vcount(lcc)*popsizes[2])),
                   sum(correct_rank3<(vcount(lcc)*popsizes[3]))),
                 nrow=3, byrow=T)
-cols = brewer.pal(3,'Paired'); cols=cols[c(3,1,2)]
+cols = brewer.pal(3,'Dark2');# cols=cols[c(3,1,2)]
 barplot(counts, border=NA,
         xlab="Size of High-Risk Population",
         ylab="Victims Predicted",
@@ -43,10 +43,10 @@ mtext(side = 4, line = 3, "Number of Victims Predicted")
 
 
 #### Precision-Recall Curve
-plot(ecdf(correct_rank1),col=cols[1],lwd=2,main='',
+plot(ecdf(correct_rank1),col=cols[1],lwd=3,main='',
      xlab='Ranking',ylab='CDF',xlim=c(0,140000))
-plot(ecdf(correct_rank2),col=cols[2],lwd=2,add=T)
-plot(ecdf(correct_rank3),col=cols[3],lwd=2,add=T)
+plot(ecdf(correct_rank2),col=cols[2],lwd=3,add=T)
+plot(ecdf(correct_rank3),col=cols[3],lwd=3,add=T)
 legend("bottomright", inset=0.05, 
        c("Demographics Model", "Contagion Model", "Combined Model"), 
        fill=cols,border=NA,bty='n')