diff options
| author | Ben Green <bgreen@g.harvard.edu> | 2015-08-21 13:06:12 -0400 |
|---|---|---|
| committer | Thibaut Horel <thibaut.horel@gmail.com> | 2015-08-22 17:01:37 -0700 |
| commit | ef61ece9773e8a865b57f60ca1e1b9faa903af23 (patch) | |
| tree | 577ff3fad1750cc824c1cb732bc05046c36efc11 /R Scripts/data-prep.R | |
| parent | 542012fc5ab0b373d85d1d13852daf834193bd33 (diff) | |
| download | criminal_cascades-ef61ece9773e8a865b57f60ca1e1b9faa903af23.tar.gz | |
added age to sim analysis and updated data generation for new model
Diffstat (limited to 'R Scripts/data-prep.R')
| -rwxr-xr-x | R Scripts/data-prep.R | 24 |
1 files changed, 12 insertions, 12 deletions
diff --git a/R Scripts/data-prep.R b/R Scripts/data-prep.R index 3104ea2..2994575 100755 --- a/R Scripts/data-prep.R +++ b/R Scripts/data-prep.R @@ -1,5 +1,5 @@ library(igraph) -setwd('~/Documents/Cascade Project/Raw Data/') +setwd('~/Documents/Violence Cascades/Raw Data/') #================ # (1) load data @@ -99,14 +99,6 @@ start_date = as.Date("2005-12-31") ## Get first arrest date in the study period for each person sub.arrests$dates = as.Date(arrests$arrest_date,format='%m/%d/%Y') sub.arrests = sub.arrests[order(sub.arrests$dates),] -sub.arrests = sub.arrests[match(unique(sub.arrests$individuals),sub.arrests$individuals),] -arrest.dates = as.Date(sub.arrests$dates,format='%m/%d/%y') -arrest.days = as.numeric(arrest.dates-start_date) -V(person)$arrest.day = arrest.days[match(V(person)$name, sub.arrests$individuals)] - -V(person)$age.arrest = floor(difftime(arrest.dates[match(V(person)$name, sub.arrests$individuals)], - V(person)$dob, - units='days')/365.25) #=================================================================== @@ -192,22 +184,30 @@ V(person)$faction.name <- as.character(gangs$FACTION_NAME[match_vector]) #=================================================================== # create id number -V(person)$id = rank(V(person)$name) # save data # person = remove.edge.attribute(person,'weight') # person_data = get.data.frame(person,'both') -save(person, file="chi-19mar2015.RData") +save(person, file="chi-19aug2015.RData") #=================================================================== # get LCC of the network lcc = induced.subgraph(person,which(clusters(person)$membership==which.max(clusters(person)$csize))) +V(lcc)$id = rank(V(lcc)$name) V(lcc)$ir_no = V(lcc)$name +V(lcc)$name = V(lcc)$id vic_ids = which(V(lcc)$vic) +lcc_edges = as_data_frame(lcc,'edges') + +# update lcc_verts lcc_verts = get.data.frame(lcc,'vertices') -lcc_edges = get.data.frame(lcc,'edges') +lcc_verts = lcc_verts[,c(1,23,24,2:22)] + +# save file save(lcc, lcc_verts, lcc_edges, vic_ids, file="lcc.RData") +##### +# old stuff lcc_data = get.data.frame(lcc,'both') lcc = set.vertex.attribute(graph=lcc, name='name', value=V(lcc)$id) row.names(lcc_data$vertices) = lcc_data$vertices$id |
