summaryrefslogtreecommitdiffstats
path: root/R Scripts/data-exploration.R
blob: 22ec60c6cba4749493f00fb697ed89bbc27de5ad (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
library(igraph)
setwd("~/Documents/Violence Cascades/")
load('Raw Data/lcc.RData')

d = remove.edge.attribute(person,'weight')
lcc = induced.subgraph(d,which(clusters(d)$membership==which.max(clusters(d)$csize)))

##### Small-World Analysis
trl = mean(transitivity(lcc,type='local',isolates='zero'))
apl = average.path.length(lcc)
cat('Local Transitivity =', trl);cat('\nAverage Path Length =', apl)

nsim = 5
ER_sim = data.frame(trl=rep(0,nsim),apl=0)
for(i in 1:nsim){
  print(i)
  erg = erdos.renyi.game(n=vcount(lcc),p.or.m=ecount(lcc),type='gnm')
  erg = induced.subgraph(erg,which(clusters(erg)$membership==which.max(clusters(erg)$csize)))
  ER_sim[i,1] = mean(transitivity(erg,type='local',isolates='zero'))
  ER_sim[i,2] = average.path.length(erg)
}

S = data.frame(C_dat = trl,
               L_dat = apl,
               C_ER=mean(ER_sim$trl),
               L_ER=mean(ER_sim$apl),
               S_ER=mean((trl/ER_sim$trl)/(apl/ER_sim$apl)))
S

##### Degree Distribution
plot(1:max(degree(lcc)),degree.distribution(lcc)[-1]*vcount(lcc),
     log='xy',col='#1f78b4',pch=20,cex.axis=0.6,cex.lab=0.6,
     xlab='Degree', ylab='Number of vertices', main='')
pl = power.law.fit(degree.distribution(lcc))

##### small-worldness of lcc
trg = transitivity(lcc,type='global')
trl = mean(transitivity(lcc,type='local',isolates='zero'))
apl = average.path.length(lcc)

erg = erdos.renyi.game(n=vcount(lcc),p.or.m=ecount(lcc),type='gnm')
transitivity(erg,type='global')
mean(transitivity(erg,type='local',isolates='zero'))
average.path.length(erg)


##### Victims
vic_ids = which(V(lcc)$vic==TRUE)
non_vic_ids = which(V(lcc)$vic==FALSE)
hist(as.numeric(V(lcc)$vic_date[vic_ids]),100,col='lightblue',
     xlab='Day of Study Period',main='Infections During the Study Period')

# n infections
sum(!is.na(lcc_verts$fatal_day))
sum(!is.na(lcc_verts$nonfatal_day_1))
sum(!is.na(lcc_verts$nonfatal_day_2))
sum(!is.na(lcc_verts$nonfatal_day_3))
sum(!is.na(lcc_verts$nonfatal_day_4))
sum(!is.na(lcc_verts$nonfatal_day_5))

sum(sum(!is.na(lcc_verts$fatal_day)),
    sum(!is.na(lcc_verts$nonfatal_day_1)),
    sum(!is.na(lcc_verts$nonfatal_day_2)),
    sum(!is.na(lcc_verts$nonfatal_day_3)),
    sum(!is.na(lcc_verts$nonfatal_day_4)),
    sum(!is.na(lcc_verts$nonfatal_day_5)))