summaryrefslogtreecommitdiffstats
path: root/R Scripts/create-hyper-lcc.R
diff options
context:
space:
mode:
authorBen Green <ben@SEASITs-MacBook-Pro.local>2015-07-02 00:41:49 -0400
committerBen Green <ben@SEASITs-MacBook-Pro.local>2015-07-02 00:41:49 -0400
commit375f29ab4306821c888fd9ef0637f9ab2879e375 (patch)
tree30d1c40e429abda8c9cbae953b719543f9cd9cc8 /R Scripts/create-hyper-lcc.R
parent8e09ca6ca68c71bdab65525b529e2adfa281823c (diff)
downloadcriminal_cascades-375f29ab4306821c888fd9ef0637f9ab2879e375.tar.gz
in which the model works!!
Diffstat (limited to 'R Scripts/create-hyper-lcc.R')
-rw-r--r--R Scripts/create-hyper-lcc.R126
1 files changed, 0 insertions, 126 deletions
diff --git a/R Scripts/create-hyper-lcc.R b/R Scripts/create-hyper-lcc.R
deleted file mode 100644
index 786b694..0000000
--- a/R Scripts/create-hyper-lcc.R
+++ /dev/null
@@ -1,126 +0,0 @@
-library(igraph)
-setwd("~/Documents/Cascade Project/")
-load('Raw Data/lcc.RData')
-
-lcc_verts = get.data.frame(lcc,'vertices')
-lcc_edges = get.data.frame(lcc,'edges')
-
-
-##### Create new vertices dataframe
-cols = c('id','ir_no','sex','race','dob','age.arrest','arrest.day',
- 'gang.member','gang.name','faction.name')
-hyp_lcc_verts = data.frame(matrix(ncol=length(cols)+1,nrow=0))
-colnames(hyp_lcc_verts) = c(cols,'spawn.date')
-
-ptm = proc.time()
-ri = 1
-for (i in 1:dim(lcc_verts)[1]){
- if (i%%10000==0) print(i)
- if (lcc_verts$vic[i]){
- if (lcc_verts$vic.nonfatal[i]>0){
- # create nodes for each nonfatal shooting
- for (nf in 1:lcc_verts$vic.nonfatal[i]){
- hyp_lcc_verts[ri,cols] = lcc_verts[i,cols]
- hyp_lcc_verts$id[ri] = ri
- hyp_lcc_verts$vic[ri] = T
- hyp_lcc_verts$vic.type[ri] = 'nonfatal'
- if(nf==1){
- hyp_lcc_verts$vic.day[ri] = lcc_verts$nonfatal_day_1[i]
- hyp_lcc_verts$spawn.date[ri] = 0
- } else if(nf==2){
- hyp_lcc_verts$vic.day[ri] = lcc_verts$nonfatal_day_2[i]
- hyp_lcc_verts$spawn.date[ri] = lcc_verts$nonfatal_day_1[i]
- } else if(nf==3){
- hyp_lcc_verts$vic.day[ri] = lcc_verts$nonfatal_day_3[i]
- hyp_lcc_verts$spawn.date[ri] = lcc_verts$nonfatal_day_2[i]
- } else if(nf==4){
- hyp_lcc_verts$vic.day[ri] = lcc_verts$nonfatal_day_4[i]
- hyp_lcc_verts$spawn.date[ri] = lcc_verts$nonfatal_day_3[i]
- } else if(nf==5){
- hyp_lcc_verts$vic.day[ri] = lcc_verts$nonfatal_day_5[i]
- hyp_lcc_verts$spawn.date[ri] = lcc_verts$nonfatal_day_4[i]
- }
- ri = ri+1
- }
- # if no fatal infection, create uninfected duplicate
- if (!lcc_verts$vic.fatal[i]){
- hyp_lcc_verts[ri,cols] = lcc_verts[i,cols]
- hyp_lcc_verts$id[ri] = ri
- hyp_lcc_verts$vic[ri] = F
- hyp_lcc_verts$vic.type[ri] = NA
- hyp_lcc_verts$vic.day[ri] = NA
- hyp_lcc_verts$spawn.date[ri] = max(lcc_verts$nonfatal_day_1[i],lcc_verts$nonfatal_day_2[i],
- lcc_verts$nonfatal_day_3[i],lcc_verts$nonfatal_day_4[i],
- lcc_verts$nonfatal_day_5[i],na.rm=T)
- ri = ri+1
- }
- }
- # create a node for each fatal shooting
- # if also nonfatal shootings, spawn at last nonfatal shooting
- if (lcc_verts$vic.fatal[i]){
- hyp_lcc_verts[ri,cols] = lcc_verts[i,cols]
- hyp_lcc_verts$id[ri] = ri
- hyp_lcc_verts$vic[ri] = T
- hyp_lcc_verts$vic.type[ri] = 'fatal'
- hyp_lcc_verts$vic.day[ri] = lcc_verts$fatal_day[i]
- if (lcc_verts$vic.nonfatal[i]>0){
- hyp_lcc_verts$spawn.date[ri] = max(lcc_verts$nonfatal_day_1[i],lcc_verts$nonfatal_day_2[i],
- lcc_verts$nonfatal_day_3[i],lcc_verts$nonfatal_day_4[i],
- lcc_verts$nonfatal_day_5[i],na.rm=T)
- } else {
- hyp_lcc_verts$spawn.date[ri] = 0
- }
- ri = ri+1
- }
- }
- # create an uninfected node for each uninfected person
- else{
- hyp_lcc_verts[ri,cols] = lcc_verts[i,cols]
- hyp_lcc_verts$id[ri] = ri
- hyp_lcc_verts$vic[ri] = F
- hyp_lcc_verts$vic.type[ri] = NA
- hyp_lcc_verts$vic.day[ri] = NA
- hyp_lcc_verts$spawn.date[ri] = 0
- ri = ri+1
- }
-}
-print(proc.time()-ptm) # 1.5 hrs
-row.names(hyp_lcc_verts) = NULL
-n.nodes = sum(sum(V(lcc)$vic.fatal),sum(V(lcc)$vic.nonfatal),
- sum(V(lcc)$vic.nonfatal>0 & !V(lcc)$vic.fatal),sum(V(lcc)$vic==FALSE))
-stopifnot(dim(hyp_lcc_verts)[1] == n.nodes)
-
-##### Create new edgelist
-print('Edges')
-hyp_lcc_edges = data.frame(from=0, to=0, weight=0)
-ei = 1
-ptm = proc.time()
-for(i in 1:dim(hyp_lcc_verts)[1]){
- if (i%%10000==0) print(i)
- ego_id = hyp_lcc_verts$id[i]
- ego_ir = hyp_lcc_verts$ir_no[i]
- alter_irs = union(lcc_edges$from[which(lcc_edges$to==ego_ir)],
- lcc_edges$to[which(lcc_edges$from==ego_ir)])
-# alter_irs = union(ego_ir, alter_irs) # add edges to other infected selves
- alter_ids = hyp_lcc_verts$id[which(hyp_lcc_verts$ir_no %in% alter_irs)]
- alter_ids = alter_ids[ego_id<alter_ids] # avoid double-counting edges
- for(alter_id in alter_ids){
- weight=Inf
- alter_ir = hyp_lcc_verts$ir_no[alter_id]
- if(ego_ir!=alter_ir){
- edge_id = which((lcc_edges$from %in% c(ego_ir,alter_ir) + lcc_edges$to %in% c(ego_ir,alter_ir))==2)
- weight = lcc_edges$weight[edge_id]
- }
- hyp_lcc_edges[ei,] = c(ego_id, alter_id, weight)
- ei = ei + 1
- }
-}
-print(proc.time()-ptm) #10 hrs
-
-##### Create new graph
-hyp_lcc = graph.data.frame(hyp_lcc_edges, directed=FALSE, vertices=hyp_lcc_verts)
-
-save(hyp_lcc_edges, hyp_lcc_verts, hyp_lcc, file='Results/hyper-lcc.RData')
-write.csv(hyp_lcc_edges, file='Results/hyp_lcc_edges.csv')
-write.csv(hyp_lcc_verts[,c('id','spawn.date','vic','vic.type','vic.day')], file='Results/hyp_lcc_verts.csv')
-