summaryrefslogtreecommitdiffstats
path: root/R Scripts/analyze-cascades.R
diff options
context:
space:
mode:
authorBen Green <bgreen@g.harvard.edu>2015-09-15 22:42:01 -0400
committerBen Green <bgreen@g.harvard.edu>2015-09-15 22:42:04 -0400
commit28de63defb949f402df2bd06c11637e5ef6402e6 (patch)
tree146a16793efcf73ff98d2a44daca6b143abcc82d /R Scripts/analyze-cascades.R
parent6db9e7baeda21cd5d38f390cc84f91c3d23e054b (diff)
downloadcriminal_cascades-28de63defb949f402df2bd06c11637e5ef6402e6.tar.gz
update code to analyze cascades
Diffstat (limited to 'R Scripts/analyze-cascades.R')
-rwxr-xr-xR Scripts/analyze-cascades.R51
1 files changed, 28 insertions, 23 deletions
diff --git a/R Scripts/analyze-cascades.R b/R Scripts/analyze-cascades.R
index e758d60..b1ce3c3 100755
--- a/R Scripts/analyze-cascades.R
+++ b/R Scripts/analyze-cascades.R
@@ -1,25 +1,40 @@
-# library(igraph)
-# setwd("~/Documents/Cascade Project/")
-#
-# load('Results/dag_dat_all.RData')
-# load('Results/weight-12-1-14.RData')
-# load('Results/hyper-lcc.RData')
+library(igraph)
+setwd("~/Documents/Violence Cascades/")
-# dag = graph.edgelist(as.matrix(dag_dat[,1:2]))
-# dag = set.edge.attribute(dag,'weight',value=weight)
-# dag_dat = dag_dat[which(E(dag)$weight>=0.4),]
-# dag = delete.edges(dag, which(E(dag)$weight<0.4))
+# plot cascade sizes
+data = read.csv('Results/components_dist-91515.csv',header=F)
+data = data[order(data$V1),]
+sizes = data$V1
+counts = data$V2
+plot(sizes,counts,log='xy',type='o',lwd=3,
+ xlab='Size of Cascade', ylab='Number of Cascades', main='Distribution of Cascade Sizes')
+
+# plot cascades
+edges = read.csv('Results/edges-91515.csv',header=F,
+ col.names=c('v1','t1','v2','t2','dist'))
+for(id in unique(union(edges$v1,edges$v2))){
+ e = edges[union(match(id,edges$v1), match(id,edges$v2)),]
+ times = sort(union(e$t1[e$v1==id],e$t2[e$v2==id]))
+ if (length(times)>1){
+ for(time in times[-1]){
+ idx = which(time==times)
+ edges$v1[as.numeric(rownames(e))[e$t1==time]] = e$v1[e$t1==time] + (idx-1)/length(times)
+ edges$v2[as.numeric(rownames(e))[e$t2==time]] = e$v2[e$t2==time] + (idx-1)/length(times)
+ }
+ }
+}
+dag = graph.data.frame(edges[,c(1,3)], directed=TRUE)
table(clusters(dag)$csize)
+
+
+
clusters = clusters(dag)
membership = clusters$membership
csize = clusters$csize
order = rev(order(csize))
-#use table not hist
-plot(sizes,counts,log='xy',type='o',lwd=3,
- xlab='Size of Cascade', ylab='Number of Cascades', main='Distribution of Cascade Sizes')
i = 4
V = which(clusters(dag)$membership==order[i]) # get all nodes in cluster
@@ -46,14 +61,4 @@ indeg = degree(cc,mode='in')
outdeg = degree(cc,mode='out')
ds = mean(cc_dat$dist)
-### node demographic statistics
-from = vic_ids[cc_dat$from]
-to = vic_ids[cc_dat$to]
-V(lcc)$sex[from] == V(lcc)$sex[to]
-V(lcc)$sex[Vi]
-V(lcc)$race[Vi]
-as.numeric(V(lcc)$age[Vi])
-V(lcc)$gang.member[Vi]
-V(lcc)$gang.name[Vi]
-V(lcc)$faction.name[Vi]