library(igraph) setwd("~/Documents/Cascade Project/") load('Raw Data/lcc.RData') lcc_verts = get.data.frame(lcc,'vertices') lcc_edges = get.data.frame(lcc,'edges') ##### Create new vertices dataframe cols = c('id','ir_no','sex','race','dob','age.arrest','arrest.day', 'gang.member','gang.name','faction.name') hyp_lcc_verts = data.frame(matrix(ncol=length(cols)+1,nrow=0)) colnames(hyp_lcc_verts) = c(cols,'spawn.date') ptm = proc.time() ri = 1 for (i in 1:dim(lcc_verts)[1]){ if (i%%10000==0) print(i) if (lcc_verts$vic[i]){ if (lcc_verts$vic.nonfatal[i]>0){ # create nodes for each nonfatal shooting for (nf in 1:lcc_verts$vic.nonfatal[i]){ hyp_lcc_verts[ri,cols] = lcc_verts[i,cols] hyp_lcc_verts$id[ri] = ri hyp_lcc_verts$vic[ri] = T hyp_lcc_verts$vic.type[ri] = 'nonfatal' if(nf==1){ hyp_lcc_verts$vic.day[ri] = lcc_verts$nonfatal_day_1[i] hyp_lcc_verts$spawn.date[ri] = 0 } else if(nf==2){ hyp_lcc_verts$vic.day[ri] = lcc_verts$nonfatal_day_2[i] hyp_lcc_verts$spawn.date[ri] = lcc_verts$nonfatal_day_1[i] } else if(nf==3){ hyp_lcc_verts$vic.day[ri] = lcc_verts$nonfatal_day_3[i] hyp_lcc_verts$spawn.date[ri] = lcc_verts$nonfatal_day_2[i] } else if(nf==4){ hyp_lcc_verts$vic.day[ri] = lcc_verts$nonfatal_day_4[i] hyp_lcc_verts$spawn.date[ri] = lcc_verts$nonfatal_day_3[i] } else if(nf==5){ hyp_lcc_verts$vic.day[ri] = lcc_verts$nonfatal_day_5[i] hyp_lcc_verts$spawn.date[ri] = lcc_verts$nonfatal_day_4[i] } ri = ri+1 } # if no fatal infection, create uninfected duplicate if (!lcc_verts$vic.fatal[i]){ hyp_lcc_verts[ri,cols] = lcc_verts[i,cols] hyp_lcc_verts$id[ri] = ri hyp_lcc_verts$vic[ri] = F hyp_lcc_verts$vic.type[ri] = NA hyp_lcc_verts$vic.day[ri] = NA hyp_lcc_verts$spawn.date[ri] = max(lcc_verts$nonfatal_day_1[i],lcc_verts$nonfatal_day_2[i], lcc_verts$nonfatal_day_3[i],lcc_verts$nonfatal_day_4[i], lcc_verts$nonfatal_day_5[i],na.rm=T) ri = ri+1 } } # create a node for each fatal shooting # if also nonfatal shootings, spawn at last nonfatal shooting if (lcc_verts$vic.fatal[i]){ hyp_lcc_verts[ri,cols] = lcc_verts[i,cols] hyp_lcc_verts$id[ri] = ri hyp_lcc_verts$vic[ri] = T hyp_lcc_verts$vic.type[ri] = 'fatal' hyp_lcc_verts$vic.day[ri] = lcc_verts$fatal_day[i] if (lcc_verts$vic.nonfatal[i]>0){ hyp_lcc_verts$spawn.date[ri] = max(lcc_verts$nonfatal_day_1[i],lcc_verts$nonfatal_day_2[i], lcc_verts$nonfatal_day_3[i],lcc_verts$nonfatal_day_4[i], lcc_verts$nonfatal_day_5[i],na.rm=T) } else { hyp_lcc_verts$spawn.date[ri] = 0 } ri = ri+1 } } # create an uninfected node for each uninfected person else{ hyp_lcc_verts[ri,cols] = lcc_verts[i,cols] hyp_lcc_verts$id[ri] = ri hyp_lcc_verts$vic[ri] = F hyp_lcc_verts$vic.type[ri] = NA hyp_lcc_verts$vic.day[ri] = NA hyp_lcc_verts$spawn.date[ri] = 0 ri = ri+1 } } print(proc.time()-ptm) # 1.5 hrs row.names(hyp_lcc_verts) = NULL n.nodes = sum(sum(V(lcc)$vic.fatal),sum(V(lcc)$vic.nonfatal), sum(V(lcc)$vic.nonfatal>0 & !V(lcc)$vic.fatal),sum(V(lcc)$vic==FALSE)) stopifnot(dim(hyp_lcc_verts)[1] == n.nodes) ##### Create new edgelist print('Edges') hyp_lcc_edges = data.frame(from=0, to=0, weight=0) ei = 1 ptm = proc.time() for(i in 1:dim(hyp_lcc_verts)[1]){ if (i%%10000==0) print(i) ego_id = hyp_lcc_verts$id[i] ego_ir = hyp_lcc_verts$ir_no[i] alter_irs = union(lcc_edges$from[which(lcc_edges$to==ego_ir)], lcc_edges$to[which(lcc_edges$from==ego_ir)]) # alter_irs = union(ego_ir, alter_irs) # add edges to other infected selves alter_ids = hyp_lcc_verts$id[which(hyp_lcc_verts$ir_no %in% alter_irs)] alter_ids = alter_ids[ego_id