From 28de63defb949f402df2bd06c11637e5ef6402e6 Mon Sep 17 00:00:00 2001 From: Ben Green Date: Tue, 15 Sep 2015 22:42:01 -0400 Subject: update code to analyze cascades --- R Scripts/analyze-cascades.R | 53 ++++++++++++++++++++++++-------------------- R Scripts/predict-victims.R | 6 ++--- R Scripts/structural.R | 2 +- 3 files changed, 33 insertions(+), 28 deletions(-) diff --git a/R Scripts/analyze-cascades.R b/R Scripts/analyze-cascades.R index e758d60..b1ce3c3 100755 --- a/R Scripts/analyze-cascades.R +++ b/R Scripts/analyze-cascades.R @@ -1,25 +1,40 @@ -# library(igraph) -# setwd("~/Documents/Cascade Project/") -# -# load('Results/dag_dat_all.RData') -# load('Results/weight-12-1-14.RData') -# load('Results/hyper-lcc.RData') - -# dag = graph.edgelist(as.matrix(dag_dat[,1:2])) -# dag = set.edge.attribute(dag,'weight',value=weight) -# dag_dat = dag_dat[which(E(dag)$weight>=0.4),] -# dag = delete.edges(dag, which(E(dag)$weight<0.4)) +library(igraph) +setwd("~/Documents/Violence Cascades/") +# plot cascade sizes +data = read.csv('Results/components_dist-91515.csv',header=F) +data = data[order(data$V1),] +sizes = data$V1 +counts = data$V2 +plot(sizes,counts,log='xy',type='o',lwd=3, + xlab='Size of Cascade', ylab='Number of Cascades', main='Distribution of Cascade Sizes') + + +# plot cascades +edges = read.csv('Results/edges-91515.csv',header=F, + col.names=c('v1','t1','v2','t2','dist')) +for(id in unique(union(edges$v1,edges$v2))){ + e = edges[union(match(id,edges$v1), match(id,edges$v2)),] + times = sort(union(e$t1[e$v1==id],e$t2[e$v2==id])) + if (length(times)>1){ + for(time in times[-1]){ + idx = which(time==times) + edges$v1[as.numeric(rownames(e))[e$t1==time]] = e$v1[e$t1==time] + (idx-1)/length(times) + edges$v2[as.numeric(rownames(e))[e$t2==time]] = e$v2[e$t2==time] + (idx-1)/length(times) + } + } +} +dag = graph.data.frame(edges[,c(1,3)], directed=TRUE) table(clusters(dag)$csize) + + + clusters = clusters(dag) membership = clusters$membership csize = clusters$csize order = rev(order(csize)) -#use table not hist -plot(sizes,counts,log='xy',type='o',lwd=3, - xlab='Size of Cascade', ylab='Number of Cascades', main='Distribution of Cascade Sizes') i = 4 V = which(clusters(dag)$membership==order[i]) # get all nodes in cluster @@ -46,14 +61,4 @@ indeg = degree(cc,mode='in') outdeg = degree(cc,mode='out') ds = mean(cc_dat$dist) -### node demographic statistics -from = vic_ids[cc_dat$from] -to = vic_ids[cc_dat$to] -V(lcc)$sex[from] == V(lcc)$sex[to] -V(lcc)$sex[Vi] -V(lcc)$race[Vi] -as.numeric(V(lcc)$age[Vi]) -V(lcc)$gang.member[Vi] -V(lcc)$gang.name[Vi] -V(lcc)$faction.name[Vi] diff --git a/R Scripts/predict-victims.R b/R Scripts/predict-victims.R index ccf894a..7caaf13 100644 --- a/R Scripts/predict-victims.R +++ b/R Scripts/predict-victims.R @@ -2,7 +2,7 @@ library(igraph) library(data.table) library(foreach) library(doMC) -registerDoMC(cores=4) +registerDoMC(cores=6) setwd('~/Documents/Violence Cascades/') load('Raw Data/lcc.RData') load('Raw Data/dag_dat_lcc.RData') @@ -14,8 +14,8 @@ source('criminal_cascades/R Scripts/structural.R') nArrests = function(arrests,day){return(sum(arrests