1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
library(igraph)
library(data.table)
library(foreach)
library(doMC)
registerDoMC(cores=6)
setwd('~/Documents/Violence Cascades/')
load('Raw Data/lcc.RData')
load('Raw Data/dag_dat_lcc.RData')
load('Raw Data/vic_times_lcc.RData')
load('Raw Data/prior-arrests.RData')
source('criminal_cascades/R Scripts/temporal.R')
source('criminal_cascades/R Scripts/structural.R')
nArrests = function(arrests,day){return(sum(arrests<day))}
##### Set parameters
alpha = 0.00781529533133
beta = 0.00373882477787
start_date = as.Date("2005-12-31")
edges_all = data.table(dag_dat_lcc)
vic_ids = which(lcc_verts$vic==T)
lcc_verts = lcc_verts[,c('id','ir_no','nonfatal_day_1','nonfatal_day_2',
'nonfatal_day_3','nonfatal_day_4','nonfatal_day_5',
'fatal_day','sex','race','dob','gang.member','gang.name')]
lcc_verts$vic.day = NA
lcc_verts$vic.day[vic_ids] = as.numeric(apply(vic_times_lcc[vic_ids,2:7],1,min,na.rm=T))
lcc_verts$age = FALSE
lcc_verts$age[vic_ids] = as.numeric(start_date + lcc_verts$vic.day[vic_ids] - as.Date(lcc_verts$dob[vic_ids]))
lcc_verts$arrests = NA
lcc_verts$arrests[vic_ids] = unlist(mapply(nArrests,prior_arrests[vic_ids],day=lcc_verts$vic.day[vic_ids]))
lcc_verts$vic = FALSE
rownames(lcc_verts) = lcc_verts$ir_no
vic_times_lcc$ir_no = lcc_verts$ir_no
rownames(vic_times_lcc) = vic_times_lcc$ir_no
##### Initialize data
formula = vic ~ sex + race + age + gang.member + gang.name + arrests
lcc_verts$sex = as.factor(lcc_verts$sex)
lcc_verts$race = as.factor(lcc_verts$race)
lcc_verts$gang.name = as.factor(lcc_verts$gang.name)
dt = data.table(ir=lcc_verts$ir_no, dem=0, cas=0, comb=0)
#####
days = Reduce(union, list(lcc_verts$fatal_day,lcc_verts$nonfatal_day_1,
lcc_verts$nonfatal_day_2,lcc_verts$nonfatal_day_3,
lcc_verts$nonfatal_day_4,lcc_verts$nonfatal_day_5))
days = days[!is.na(days)]
days = sort(days)
days = split(days, ceiling(seq_along(days)/92))
lambdas = c(0, exp(seq(log(0.0001), log(0.01), length.out=100)), 1)
##### Loop through days
correct_rank = c()
for(i in 1:length(days)){
ptm = proc.time()
print(c(i,length(days)))
ds = unlist(days[i], use.names=F)
cr = foreach (day = ds, .combine=rbind) %dopar% {
##### Demographics model
victims = lcc_verts
vics = which(victims$vic.day<day)
victims$age[-vics] = as.numeric(start_date + day - 1 - as.Date(victims$dob[-vics]))
victims$arrests[-vics] = unlist(lapply(prior_arrests[-vics],nArrests,day=day))
victims$vic[vics] = TRUE
victims$vic[-vics] = FALSE
fit = glm(formula, data=victims, family=binomial)
lcc_verts_day = victims
lcc_verts_day$age[vics] = as.numeric(start_date + day - 1 - as.Date(victims$dob[vics]))
lcc_verts_day$arrests[vics] = unlist(lapply(prior_arrests[vics],nArrests,day=day))
probs = predict(fit, newdata=lcc_verts_day, type='response')
##### Cascade Model
edges = edges_all[which(edges_all$t1<day),]
f = temporal(edges$t1, day, beta)
h = structural(alpha,edges$dist)
weights = f*h
ids = edges$to
irs = lcc_verts$ir_no[ids]
risk = data.table(id=ids, ir=irs, weight=weights)
if (dim(risk)[1]>0) risk = risk[, list(weight=sum(weight)), by=ir]
##### Combined Model
combined = dt
combined$dem[match(attr(probs,'name'), dt$ir)] = as.numeric(probs)
combined$cas[match(risk$ir, dt$ir)] = risk$weight
##### Gather results
infected_irs = attr(which(rowSums(vic_times_lcc[,2:7]==day,na.rm=T)==1),'name')
crday = matrix(nrow=length(infected_irs), ncol=length(lambdas))
for (lambda in lambdas){
combined$comb = lambda*combined$dem + (1-lambda)*combined$cas
c_idx = which(lambdas==lambda)
crday[,c_idx] = rank(-combined$comb,ties.method='average')[match(infected_irs,combined$ir)]
}
return(crday)
}
correct_rank = rbind(correct_rank,cr)
print(proc.time()-ptm)
}
# save(correct_rank, lambdas, file='Results/correct_rank_91515.RData')
|