diff options
Diffstat (limited to 'R Scripts/create-hyper-lcc.R')
| -rw-r--r-- | R Scripts/create-hyper-lcc.R | 126 |
1 files changed, 126 insertions, 0 deletions
diff --git a/R Scripts/create-hyper-lcc.R b/R Scripts/create-hyper-lcc.R new file mode 100644 index 0000000..786b694 --- /dev/null +++ b/R Scripts/create-hyper-lcc.R @@ -0,0 +1,126 @@ +library(igraph) +setwd("~/Documents/Cascade Project/") +load('Raw Data/lcc.RData') + +lcc_verts = get.data.frame(lcc,'vertices') +lcc_edges = get.data.frame(lcc,'edges') + + +##### Create new vertices dataframe +cols = c('id','ir_no','sex','race','dob','age.arrest','arrest.day', + 'gang.member','gang.name','faction.name') +hyp_lcc_verts = data.frame(matrix(ncol=length(cols)+1,nrow=0)) +colnames(hyp_lcc_verts) = c(cols,'spawn.date') + +ptm = proc.time() +ri = 1 +for (i in 1:dim(lcc_verts)[1]){ + if (i%%10000==0) print(i) + if (lcc_verts$vic[i]){ + if (lcc_verts$vic.nonfatal[i]>0){ + # create nodes for each nonfatal shooting + for (nf in 1:lcc_verts$vic.nonfatal[i]){ + hyp_lcc_verts[ri,cols] = lcc_verts[i,cols] + hyp_lcc_verts$id[ri] = ri + hyp_lcc_verts$vic[ri] = T + hyp_lcc_verts$vic.type[ri] = 'nonfatal' + if(nf==1){ + hyp_lcc_verts$vic.day[ri] = lcc_verts$nonfatal_day_1[i] + hyp_lcc_verts$spawn.date[ri] = 0 + } else if(nf==2){ + hyp_lcc_verts$vic.day[ri] = lcc_verts$nonfatal_day_2[i] + hyp_lcc_verts$spawn.date[ri] = lcc_verts$nonfatal_day_1[i] + } else if(nf==3){ + hyp_lcc_verts$vic.day[ri] = lcc_verts$nonfatal_day_3[i] + hyp_lcc_verts$spawn.date[ri] = lcc_verts$nonfatal_day_2[i] + } else if(nf==4){ + hyp_lcc_verts$vic.day[ri] = lcc_verts$nonfatal_day_4[i] + hyp_lcc_verts$spawn.date[ri] = lcc_verts$nonfatal_day_3[i] + } else if(nf==5){ + hyp_lcc_verts$vic.day[ri] = lcc_verts$nonfatal_day_5[i] + hyp_lcc_verts$spawn.date[ri] = lcc_verts$nonfatal_day_4[i] + } + ri = ri+1 + } + # if no fatal infection, create uninfected duplicate + if (!lcc_verts$vic.fatal[i]){ + hyp_lcc_verts[ri,cols] = lcc_verts[i,cols] + hyp_lcc_verts$id[ri] = ri + hyp_lcc_verts$vic[ri] = F + hyp_lcc_verts$vic.type[ri] = NA + hyp_lcc_verts$vic.day[ri] = NA + hyp_lcc_verts$spawn.date[ri] = max(lcc_verts$nonfatal_day_1[i],lcc_verts$nonfatal_day_2[i], + lcc_verts$nonfatal_day_3[i],lcc_verts$nonfatal_day_4[i], + lcc_verts$nonfatal_day_5[i],na.rm=T) + ri = ri+1 + } + } + # create a node for each fatal shooting + # if also nonfatal shootings, spawn at last nonfatal shooting + if (lcc_verts$vic.fatal[i]){ + hyp_lcc_verts[ri,cols] = lcc_verts[i,cols] + hyp_lcc_verts$id[ri] = ri + hyp_lcc_verts$vic[ri] = T + hyp_lcc_verts$vic.type[ri] = 'fatal' + hyp_lcc_verts$vic.day[ri] = lcc_verts$fatal_day[i] + if (lcc_verts$vic.nonfatal[i]>0){ + hyp_lcc_verts$spawn.date[ri] = max(lcc_verts$nonfatal_day_1[i],lcc_verts$nonfatal_day_2[i], + lcc_verts$nonfatal_day_3[i],lcc_verts$nonfatal_day_4[i], + lcc_verts$nonfatal_day_5[i],na.rm=T) + } else { + hyp_lcc_verts$spawn.date[ri] = 0 + } + ri = ri+1 + } + } + # create an uninfected node for each uninfected person + else{ + hyp_lcc_verts[ri,cols] = lcc_verts[i,cols] + hyp_lcc_verts$id[ri] = ri + hyp_lcc_verts$vic[ri] = F + hyp_lcc_verts$vic.type[ri] = NA + hyp_lcc_verts$vic.day[ri] = NA + hyp_lcc_verts$spawn.date[ri] = 0 + ri = ri+1 + } +} +print(proc.time()-ptm) # 1.5 hrs +row.names(hyp_lcc_verts) = NULL +n.nodes = sum(sum(V(lcc)$vic.fatal),sum(V(lcc)$vic.nonfatal), + sum(V(lcc)$vic.nonfatal>0 & !V(lcc)$vic.fatal),sum(V(lcc)$vic==FALSE)) +stopifnot(dim(hyp_lcc_verts)[1] == n.nodes) + +##### Create new edgelist +print('Edges') +hyp_lcc_edges = data.frame(from=0, to=0, weight=0) +ei = 1 +ptm = proc.time() +for(i in 1:dim(hyp_lcc_verts)[1]){ + if (i%%10000==0) print(i) + ego_id = hyp_lcc_verts$id[i] + ego_ir = hyp_lcc_verts$ir_no[i] + alter_irs = union(lcc_edges$from[which(lcc_edges$to==ego_ir)], + lcc_edges$to[which(lcc_edges$from==ego_ir)]) +# alter_irs = union(ego_ir, alter_irs) # add edges to other infected selves + alter_ids = hyp_lcc_verts$id[which(hyp_lcc_verts$ir_no %in% alter_irs)] + alter_ids = alter_ids[ego_id<alter_ids] # avoid double-counting edges + for(alter_id in alter_ids){ + weight=Inf + alter_ir = hyp_lcc_verts$ir_no[alter_id] + if(ego_ir!=alter_ir){ + edge_id = which((lcc_edges$from %in% c(ego_ir,alter_ir) + lcc_edges$to %in% c(ego_ir,alter_ir))==2) + weight = lcc_edges$weight[edge_id] + } + hyp_lcc_edges[ei,] = c(ego_id, alter_id, weight) + ei = ei + 1 + } +} +print(proc.time()-ptm) #10 hrs + +##### Create new graph +hyp_lcc = graph.data.frame(hyp_lcc_edges, directed=FALSE, vertices=hyp_lcc_verts) + +save(hyp_lcc_edges, hyp_lcc_verts, hyp_lcc, file='Results/hyper-lcc.RData') +write.csv(hyp_lcc_edges, file='Results/hyp_lcc_edges.csv') +write.csv(hyp_lcc_verts[,c('id','spawn.date','vic','vic.type','vic.day')], file='Results/hyp_lcc_verts.csv') + |
