Worked on process to generate cascades in R, recover them in ml2, and

compare true/recovered. Results look good: ml2 finds close-to-actual parameters and gets ~60% of the infectors right. I will check how many recovery gets in the top 2 or 3 infectors.
author: Ben Green <ben@SEASITs-MacBook-Pro.local> 2015-06-25 23:56:26 -0400
committer: Ben Green <ben@SEASITs-MacBook-Pro.local> 2015-06-25 23:56:26 -0400
commit: 7167a81cfb8b872dd1547e5a8669004b191417db (patch)
tree: dd160b9a7f438837118d8ca9fe83ae6c35a0d71a /R Scripts
parent: c8fc620dda6096932707566dc8f20a3e65cb26b0 (diff)
download: criminal_cascades-7167a81cfb8b872dd1547e5a8669004b191417db.tar.gz
2 files changed, 80 insertions, 2 deletions
diff --git a/R Scripts/generate-network.R b/R Scripts/generate-network.R
new file mode 100644
index 0000000..dc4a4f8
--- /dev/null
+++ b/R Scripts/generate-network.R
@@ -0,0 +1,78 @@
+library(igraph)
+setwd("~/Documents/Cascade Project/")
+source('criminal_cascades/R Scripts/temporal.R')
+source('criminal_cascades/R Scripts/structural.R')
+
+alpha = 1/100
+beta = 0.02
+delta = 0.15
+# lmbda = 1/10
+t_max = 1000
+
+# g = watts.strogatz.game(1, 100, 3, 0.25)
+N = 5000
+g = forest.fire.game(nodes=N, fw.prob=0.3, ambs=1, directed=F)
+plot(g, vertex.size=5, vertex.label=NA)
+
+V(g)$seed = runif(vcount(g))<beta
+seeds = which(V(g)$seed)
+V(g)$vic = V(g)$seed
+V(g)$vic.day[V(g)$seed] = sample(1:t_max, sum(V(g)$seed))
+V(g)$spawn.date = 0
+V(g)$infector = NA
+
+for (day in 1:t_max){
+  day.vics = match(day,V(g)$vic.day)
+  if (is.na(day.vics)) next
+  for (vic in day.vics){
+    neighbors = (unlist(neighborhood(g,3,vic)))[-1]
+    dists = as.numeric(shortest.paths(g,vic,neighbors))
+    infected = neighbors[which(runif(length(neighbors))<structural(delta, dists))]
+    infected = setdiff(infected,seeds) # don't try to infect seeds
+    inf.days = day + ceiling(alpha*rexp(length(infected),alpha))
+    V(g)$vic[infected] = TRUE
+    infects = (inf.days < V(g)$vic.day[infected]) %in% c(NA,T)
+    V(g)$vic.day[infected[infects]] = inf.days[infects]
+    V(g)$infector[infected[infects]] = vic
+  }
+}
+
+vic_ids = which(V(g)$vic)
+cols = rep('lightblue',N); cols[V(g)$vic]='red'; cols[V(g)$seed]='darkred'
+plot(g, vertex.size=5, vertex.label=NA, vertex.color=cols)
+
+##### generate dag_dat
+dag_dat_test = data.frame(matrix(nrow=1,ncol=10))
+ei = 1
+for (u in vic_ids){
+  if ((which(vic_ids==u) %% 1000)==0)  print(which(vic_ids==u))
+  tu    = V(g)$vic.day[u]
+  u_spawn = V(g)$spawn.date[u]
+  nbhd  = (unlist(neighborhood(g,nodes=u,order=3)))[-1] # get nodes within neighborhood
+  tvs   = V(g)$vic.day[nbhd]
+  v_spawn = V(g)$spawn.date[nbhd]
+  nbhd  = nbhd[tu>v_spawn & (is.na(tvs) | tu<tvs)]
+  tvs   = V(g)$vic.day[nbhd]
+  dists = as.numeric(shortest.paths(g,u,nbhd))
+  
+  weights = matrix(1,nrow=length(nbhd),ncol=3)
+  
+  #will be faster to pre-allocate and fill in rather than rbind each time
+  dag_dat_test[ei:(ei+length(nbhd)-1),] = data.frame(rep(u,length(nbhd)), nbhd, 
+                                                    rep(tu,length(nbhd)), tvs, dists, 
+                                                    weights, rep(u_spawn,length(nbhd)),
+                                                    rep(0,length(nbhd)), row.names=NULL)
+  ei = ei + length(nbhd)
+}
+colnames(dag_dat_test) = c('from','to','t1','t2','dist','w1','w2','w3','spawn1','spawn2')
+rownames(dag_dat_test) = NULL
+
+write.csv(dag_dat_test, file='Results/dag_dat_test.csv')
+
+##### analyze performance of recovery algorithm
+recovered = read.csv('Results/infectors.csv',header=F,col.names=c('victim','infector'))
+recovered = recovered[order(recovered$victim),]
+infectors = cbind(setdiff(vic_ids,seeds),
+                  V(g)$infector[setdiff(vic_ids,seeds)],
+                  recovered$infector[recovered$victim %in% setdiff(vic_ids,seeds)])
+mean(infectors[,2]==infectors[,3])
diff --git a/R Scripts/structural.R b/R Scripts/structural.R
index 132aa6c..b68711c 100755
--- a/R Scripts/structural.R
+++ b/R Scripts/structural.R
@@ -1,5 +1,5 @@
-# structural = function(w1,w2,w3,gamma){
-#   return (plogis(w1,scale=gamma) * plogis(w2,scale=gamma) * plogis(w3,scale=gamma))
+# structural = function(delta,lmbda,w1,w2,w3){
+#   return (delta/(1. + 1./(w1*lmbda) + 1./(w2*lmbda) + 1./(w3*lmbda)))
 # }
 
 structural = function(delta,dist){
author	Ben Green <ben@SEASITs-MacBook-Pro.local>	2015-06-25 23:56:26 -0400
committer	Ben Green <ben@SEASITs-MacBook-Pro.local>	2015-06-25 23:56:26 -0400
commit	7167a81cfb8b872dd1547e5a8669004b191417db (patch)
tree	dd160b9a7f438837118d8ca9fe83ae6c35a0d71a /R Scripts
parent	c8fc620dda6096932707566dc8f20a3e65cb26b0 (diff)
download	criminal_cascades-7167a81cfb8b872dd1547e5a8669004b191417db.tar.gz