summaryrefslogtreecommitdiffstats
path: root/R Scripts/sim-analysis.R
blob: 501048502a9be2a2e22263d3fa17d00dfcec7fee (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
library(igraph)
setwd("~/Documents/Violence Cascades/")

load('Results/hyper-lcc.RData')
vic_ids = which(V(hyp_lcc)$vic==1)
n.infections = length(vic_ids)
n.days = max(hyp_lcc_verts$vic.day,na.rm=T) - min(hyp_lcc_verts$vic.day,na.rm=T)
inf.dates = hyp_lcc_verts$vic.day[vic_ids]

birthyears = as.numeric(format(as.Date(hyp_lcc_verts$dob[vic_ids]),'%Y'))

# nbrs = neighborhood(graph, nodes=vic_ids, order=1)
load('Results/vic-nbrs.RData')

n = 150
mean.time = matrix(0,1,n)
med.time = matrix(0,1,n)
mean.50 = matrix(0,1,n)
mean.100 = matrix(0,1,n)
n.vicpairs = matrix(0,1,n)

hyp_lcc = upgrade_graph(hyp_lcc)

ptm = proc.time()
for(q in 1:n){
  if (q%%250==0) print(paste('run:',q))
  graph = hyp_lcc
  vics = vic_ids
  
  sim.dates = rep(0,length(vic_ids))
  for(y in unique(birthyears)){
    year_ids = which(birthyears==y)
    if (length(year_ids)>1) sim.dates[year_ids] = sample(inf.dates[year_ids])
  }
#   {vics = vic_ids; sim.dates = inf.dates} # data
  
  vic.time = rep(NA,14885)
  idx = 1
  for (i in 1:n.infections){
    u = vics[i]
    nbhd = nbrs[[i]]
    nbhd = intersect(vic_ids,nbhd)
    nbhd = nbhd[u<nbhd] # only looks at pairs with u<v to avoid double-counting
    nbhd = setdiff(nbhd,u)
    nbhd = nbhd[hyp_lcc_verts$ir_no[u] != hyp_lcc_verts$ir_no[nbhd]] # don't count infections of the same person
    if (length(nbhd)==0) next
    tu = sim.dates[i]
    tvs = sim.dates[match(nbhd,vic_ids)]
    vic.time[idx:(idx+length(nbhd)-1)] = abs(tu-tvs)
    idx = idx + length(nbhd)
  }

  n.vicpairs[q] = length(vic.time)
  mean.time[q] = mean(vic.time)
  med.time[q] = median(vic.time)
  mean.50[q] = mean(vic.time<=50)
  mean.100[q] = mean(vic.time<=100)
  #     decay.rate[sim,q] = getDecayRate(vic.time) #get lm of histogram. or maybe just cor?
}
print(proc.time()-ptm)


####### plot simulation data #######
data = 634 # 774.5483, 634, 0.07252472, 0.1176774
simdata = med.time
xl = c(0.9*min(min(simdata),data),1.1*max(max(simdata),data))
yl = c(0,max(hist(simdata,15)$density))
hist(simdata,15,xlim=xl,ylim=yl,col=rgb(0,1,0,1/4),freq=F,
     xlab='Neighbors Infected Within 100 Days', main=NULL)
abline(v=data,lwd=4,col='red')


##### inf dates histogram
dates = c(V(lcc)$fatal_date[!is.na(V(lcc)$fatal_date)],
          V(lcc)$nonfatal_date_1[!is.na(V(lcc)$nonfatal_date_1)],
          V(lcc)$nonfatal_date_2[!is.na(V(lcc)$nonfatal_date_2)],
          V(lcc)$nonfatal_date_3[!is.na(V(lcc)$nonfatal_date_3)],
          V(lcc)$nonfatal_date_4[!is.na(V(lcc)$nonfatal_date_4)],
          V(lcc)$nonfatal_date_5[!is.na(V(lcc)$nonfatal_date_5)])
dates = as.Date(dates)
hist(dates, breaks='months',freq=T,col='lightblue',format='%m/%Y',
     xlab='Month',ylab='Number of Shootings',main='')