summaryrefslogtreecommitdiffstats
path: root/R Scripts/find-parents.R
blob: 3ec880920c181a1b1eb9552bd4217e693e9f1295 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# library(igraph)
# setwd("~/Documents/Cascade Project/")
# load('Results/hyper-lcc.RData')
# load('Results/dag_dat_vics.RData')
# source('criminal_cascades/R Scripts/temporal.R')
# source('criminal_cascades/R Scripts/structural.R')

##### Initialize parameters based on what ml2 found
alpha = 0.061
delta = 0.082

##### Get weights
edges = dag_dat_test[!is.na(dag_dat_test$t2),]

dt = edges$t2 - edges$t1
p_t = exp(-alpha*dt) * (exp(alpha)-1)
p_s = structural(delta, edges$dist)
p = p_s * p_t
p_tilde = 1 - p_s + p_s * exp(-alpha*dt)
weights = p/p_tilde
edges$weight = weights

##### Find most likely parents
parents = data.frame(vic=0,Npars=0,par_rank=0)
vics = setdiff(vic_ids,seeds)
for (u in vics){
  u_parents = edges[edges$to==u,]
  u_parents = u_parents[order(u_parents$weight,decreasing=T),]
  Nparents = dim(u_parents)[1]
  infector = V(g)$infector[u]
  infectorID = which(u_parents$from==infector)
  parents[which(vics==u),] = c(u, Nparents, infectorID)
}

##### Get some summary statistics on how well 
median(parents$par_rank[parents$Npars>9])
median(parents$par_rank[parents$Npars>99])