summaryrefslogtreecommitdiffstats
path: root/R Scripts
diff options
context:
space:
mode:
Diffstat (limited to 'R Scripts')
-rwxr-xr-xR Scripts/analyze-cascades.R32
-rwxr-xr-xR Scripts/data-exploration.R7
-rw-r--r--R Scripts/plot-power-law.R26
-rw-r--r--R Scripts/predict-victims-plots.R8
4 files changed, 56 insertions, 17 deletions
diff --git a/R Scripts/analyze-cascades.R b/R Scripts/analyze-cascades.R
index 60c59fb..04cb0e3 100755
--- a/R Scripts/analyze-cascades.R
+++ b/R Scripts/analyze-cascades.R
@@ -1,15 +1,25 @@
library(igraph)
+library(RColorBrewer)
+library(poweRlaw)
setwd("~/Documents/Violence Cascades/")
start_date = as.Date("2005-12-31")
# plot cascade sizes
data = read.csv('Results/components_dist-91515.csv',header=F)
data = data[order(data$V1),]
-sizes = data$V1
-counts = data$V2
-plot(sizes,counts,log='xy',type='o',lwd=3,
- xlab='Size of Cascade', ylab='Number of Cascades', main='Distribution of Cascade Sizes')
+sizes = 1:max(data$V1)
+counts = rep(0,max(sizes))
+counts[data$V1] = data$V2
+pl = power.law.fit(counts)
+plot(sizes,counts,log='xy',pch=20,col='#377EB8',
+ xlab='Size of Cascade', ylab='Number of Cascades', main='')
+lines(c(1,1))
+sizes_pl = displ$new(counts[counts>1])
+est = estimate_xmin(sizes_pl)
+sizes_pl$setXmin(est)
+plot(sizes_pl)
+lines(sizes_pl, col=2)
# plot cascades
edges = read.csv('Results/edges-91515.csv',header=F,
@@ -24,18 +34,20 @@ membership = clusters$membership
csize = clusters$csize
order = rev(order(csize))
-i = 95
+i = 25
V = which(clusters(dag)$membership==order[i]) # get all nodes in cluster
cc = induced.subgraph(dag,V)
times = as.numeric(V(cc)$name) %% 10000
start_date + range(times)
### plot cascade ###
-cols = rep('lightblue',vcount(cc))
+cb = brewer.pal(3,'Paired')
+cols = rep('#1f78b4',vcount(cc))
seed = which(degree(cc,mode='in')==0)
-cols[seed] = 'red'
-plot(cc,vertex.size=5,edge.arrow.size=0.2,vertex.color=cols,vertex.label=NA,
- layout=layout.reingold.tilford(cc,root=seed))
+cols[seed] = '#d95f02'
+plot(cc,vertex.size=8,edge.arrow.size=0.2,vertex.color=cols,vertex.label=NA,
+ layout=layout.reingold.tilford(cc,root=seed),
+ edge.color='black',vertex.frame.color=NA)
-par(mfrow = c(2,3))
+par(mfrow = c(1,3))
diff --git a/R Scripts/data-exploration.R b/R Scripts/data-exploration.R
index f9617f9..16e7bba 100755
--- a/R Scripts/data-exploration.R
+++ b/R Scripts/data-exploration.R
@@ -28,9 +28,10 @@ S = data.frame(C_dat = trl,
S
##### Degree Distribution
-plot(degree.distribution(lcc)*vcount(lcc),log='xy',type='l',col='red',lwd=2,
- xlab='Degree', ylab='Number of Vertices', main='Degree Distribution')
-
+plot(1:max(degree(lcc)),degree.distribution(lcc)[-1]*vcount(lcc),
+ log='xy',col='#377EB8',pch=20,
+ xlab='Degree', ylab='Number of Vertices', main='')
+pl = power.law.fit(degree.distribution(lcc))
##### Victims
vic_ids = which(V(lcc)$vic==TRUE)
diff --git a/R Scripts/plot-power-law.R b/R Scripts/plot-power-law.R
new file mode 100644
index 0000000..8cca228
--- /dev/null
+++ b/R Scripts/plot-power-law.R
@@ -0,0 +1,26 @@
+plot(1:max(degree(lcc)),degree.distribution(lcc)[-1],
+ log='xy',col='#377EB8',pch=20,
+ xlab='Degree', ylab='Number of Vertices', main='')
+
+
+# plot and fit the power law distribution
+# calculate degree
+d = degree(graph, mode = "all")
+dd = degree.distribution(graph, mode = "all", cumulative = FALSE)
+degree = 1:max(d)
+probability = dd[-1]
+# delete blank values
+nonzero.position = which(probability != 0)
+probability = probability[nonzero.position]
+degree = degree[nonzero.position]
+reg = lm(log(probability) ~ log(degree))
+cozf = coef(reg)
+power.law.fit = function(x) exp(cozf[[1]] + cozf[[2]] * log(x))
+alpha = -cozf[[2]]
+print(paste("Alpha =", round(alpha, 3)))
+# plot
+print(d)
+curve(power.law.fit, col = "red", add = T, n = length(d))
+
+
+# fit_power_law(lcc)
diff --git a/R Scripts/predict-victims-plots.R b/R Scripts/predict-victims-plots.R
index 179a60b..0e1b669 100644
--- a/R Scripts/predict-victims-plots.R
+++ b/R Scripts/predict-victims-plots.R
@@ -24,7 +24,7 @@ counts = matrix(c( sum(correct_rank1<(vcount(lcc)*popsizes[1])),
sum(correct_rank3<(vcount(lcc)*popsizes[2])),
sum(correct_rank3<(vcount(lcc)*popsizes[3]))),
nrow=3, byrow=T)
-cols = brewer.pal(3,'Paired'); cols=cols[c(3,1,2)]
+cols = brewer.pal(3,'Dark2');# cols=cols[c(3,1,2)]
barplot(counts, border=NA,
xlab="Size of High-Risk Population",
ylab="Victims Predicted",
@@ -43,10 +43,10 @@ mtext(side = 4, line = 3, "Number of Victims Predicted")
#### Precision-Recall Curve
-plot(ecdf(correct_rank1),col=cols[1],lwd=2,main='',
+plot(ecdf(correct_rank1),col=cols[1],lwd=3,main='',
xlab='Ranking',ylab='CDF',xlim=c(0,140000))
-plot(ecdf(correct_rank2),col=cols[2],lwd=2,add=T)
-plot(ecdf(correct_rank3),col=cols[3],lwd=2,add=T)
+plot(ecdf(correct_rank2),col=cols[2],lwd=3,add=T)
+plot(ecdf(correct_rank3),col=cols[3],lwd=3,add=T)
legend("bottomright", inset=0.05,
c("Demographics Model", "Contagion Model", "Combined Model"),
fill=cols,border=NA,bty='n')