summaryrefslogtreecommitdiffstats
path: root/R Scripts/predict-victims.R
blob: c662651327485979934fca3e7f1615f6e771220d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
library(igraph)
library(data.table)
library(foreach)
library(doMC)
registerDoMC(cores=4)
setwd('~/Documents/Violence Cascades/')
load('Raw Data/lcc.RData')
load('Results/hyper-lcc.RData')
load('Results/dag_dat_all.RData')
source('criminal_cascades/R Scripts/temporal.R')
source('criminal_cascades/R Scripts/structural.R')

##### Initialize data
formula = vic ~ sex + race + age + gang.member + gang.name
lcc_verts$sex = as.factor(lcc_verts$sex)
lcc_verts$race = as.factor(lcc_verts$race)
lcc_verts$age = as.numeric(lcc_verts$age)
lcc_verts$gang.name = as.factor(lcc_verts$gang.name)
dt = data.table(ir=lcc_verts$ir_no, dem=0, cas=0, comb=0)

alpha = 0.0028
delta = 0.06
days = sort(unique(hyp_lcc_verts$vic.day)) # 70:max(hyp_lcc_verts$vic.day, na.rm=T)
days = split(days, ceiling(seq_along(days)/456))
lambdas = c(0, exp(seq(log(0.0000001), log(.95), length.out=50)), 1)
nvics = sum(hyp_lcc_verts$vic)
edges_all = data.table(dag_dat_all)

##### Loop through days
correct_rank = c()
for(i in 1:length(days)){
  ptm = proc.time()
  print(c(i,length(days)))
  ds = unlist(days[i], use.names=F)
  cr = foreach (day = ds, .combine=rbind) %dopar% {
  
    ##### Demographics model
    vics = match(unique(hyp_lcc_verts$ir_no[which(hyp_lcc_verts$vic.day<day)]),lcc_verts$name)
    victims = lcc_verts[,c('vic','sex','race','age','gang.member','gang.name')]
    victims$vic[vics] = TRUE
    victims$vic[-vics] = FALSE
#     fit = lm(formula, data=victims)
    fit = glm(formula, data=victims, family=binomial)
  #   fit = randomForest(formula, data=victims[,1:5], ntree=100)
    probs = predict(fit, newdata=lcc_verts, type='response')
    
    ##### Cascade Model
    edges = edges_all[which(edges_all$t1<day),]
    f = temporal(edges$t1, day, alpha)
    h = structural(delta,edges$dist)
    weights = f*h
    ids = edges$to
    irs = hyp_lcc_verts$ir_no[ids]
    
    risk = data.table(id=ids, ir=irs, weight=weights)
    if (dim(risk)[1]>0) risk = risk[, list(weight=sum(weight)), by=ir] # max or sum
  
    ##### Combined Model
    combined = dt
    combined$dem[match(attr(probs,'name'), dt$ir)] = as.numeric(probs)
    combined$cas[match(risk$ir, dt$ir)] = risk$weight
    
    ##### Gather results
    infected_irs = hyp_lcc_verts$ir_no[which(hyp_lcc_verts$vic.day==day)]
    crday = matrix(nrow=length(infected_irs), ncol=length(lambdas))
    for (lambda in lambdas){
      combined$comb = lambda*combined$dem + (1-lambda)*combined$cas
      c_idx = which(lambdas==lambda)
      crday[,c_idx] = rank(-combined$comb,ties.method='average')[match(infected_irs,combined$ir)]
    }
  
    return(crday)
  }
  correct_rank = rbind(correct_rank,cr)
  print(proc.time()-ptm)
}

# save(correct_rank, file='Results/correct_rank_62815.RData')