summaryrefslogtreecommitdiffstats
path: root/R Scripts/find-cascades.R
blob: 0a69c5ca43f0eaae51e8f1efc2ee1f0555e5360d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
library(igraph)
setwd("~/Documents/Cascade Project/")
load('Results/hyper-lcc.RData')
load('Results/dag_dat_vics.RData')
source('criminal_cascades/R Scripts/temporal.R')
# source('Scripts/homophily.R')
source('criminal_cascades/R Scripts/structural.R')

vic_ids = which(V(hyp_lcc)$vic==TRUE)

#### Initialize model parameters
alpha = 1/21
# gamma = 0.3
delta = 0.001
beta = 0.00796964464237

##### Get weights
# find max n days where infection possible given alpha
edges = dag_dat_vics
# edges = edges[(edges$t2-edges$t1)<300,]

p_t = temporal(edges$t1, edges$t2, alpha)
p_s = structural(delta, edges$dist)
p = p_s * p_t
p_tilde = 1 - p_s + p_s * temporal(edges$t1, edges$t2+1, alpha)
weights = p/p_tilde

##### Find transmission edges
# load('Results/lm-probs.RData')
# names = attr(lm.probs,'names')
# probs = as.numeric(lm.probs)
# betas = probs[match(hyp_lcc_verts$ir_no[edges$to],names)]
# betas = 0.055
thresh = beta/(1-beta)

realized = c()
# edges = edges[weights>thresh,]
# weights = weights[weights>thresh]
for (u in vic_ids){
  max_prob = -1
  max_edge = NULL
  Ei = which(edges$to==u)
  for(i in Ei){
    if(weights[i]>thresh){
      prob = p[i] * prod(p_tilde[setdiff(Ei,i)])
      if(prob>max_prob){
        max_prob = prob
        max_edge = i
      }
    }
  }
  realized = c(realized, max_edge)
}
#   if (length(Ei)>0){
#     max_edge = Ei[which.max(weights[Ei])] # how to deal with ties????
#     realized = c(realized, max_edge)
#   }

##### Create forest of cascades
# dag is all of the victim nodes but with only transmission edges, or don't index by vic_ids
dag = graph.data.frame(edges[realized,1:2], directed=TRUE, vertices=hyp_lcc_verts[vic_ids,])
dag = set.edge.attribute(dag,'weight',value=weights[realized])
table(clusters(dag)$csize)

t=table(clusters(dag)$csize)
sizes = as.numeric(attr(t,'name'))
counts = as.numeric(t)
plot(sizes,counts,log='xy',type='o',lwd=3)
n.seeds = sum(counts)
n.nodes = sum(sizes*counts) # for vics case is just length(vic_ids)
n.infections = n.nodes-n.seeds

# dag = graph.edgelist(as.matrix(edges[realized,1:2]))