summaryrefslogtreecommitdiffstats
path: root/R Scripts
diff options
context:
space:
mode:
authorBen Green <bgreen@g.harvard.edu>2015-09-26 16:26:19 -0400
committerBen Green <bgreen@g.harvard.edu>2015-09-26 16:26:21 -0400
commit82bcc4121025f1212b3fcc07224cababa410f32a (patch)
treefd16ecc292a6f0ae9dd11ab8ec6ad25693648935 /R Scripts
parent786714e2a9acdbd650092bcaf351128dbebd160d (diff)
downloadcriminal_cascades-82bcc4121025f1212b3fcc07224cababa410f32a.tar.gz
demographic stats
Diffstat (limited to 'R Scripts')
-rwxr-xr-xR Scripts/analyze-cascades.R4
-rw-r--r--R Scripts/demographics.R41
-rw-r--r--R Scripts/fit-background.R4
-rw-r--r--R Scripts/plot-data-prep.R20
4 files changed, 46 insertions, 23 deletions
diff --git a/R Scripts/analyze-cascades.R b/R Scripts/analyze-cascades.R
index f03af36..02f9349 100755
--- a/R Scripts/analyze-cascades.R
+++ b/R Scripts/analyze-cascades.R
@@ -11,8 +11,10 @@ sizes = 1:max(data$V1)
counts = rep(0,max(sizes))
counts[data$V1] = data$V2
pl = power.law.fit(counts)
-plot(sizes,counts,log='xy',pch=20,col='#377EB8',
+plot(sizes,counts,log='xy',pch=20,col='#1f78b4',xaxt='n',yaxt='n',
xlab='Size of Cascade', ylab='Number of Cascades', main='')
+axis(1,at=c(1,5,10,50,100,500))
+axis(2,at=c(1,10,100,1000))
sizes_pl = displ$new(counts[counts>1])
est = estimate_xmin(sizes_pl)
diff --git a/R Scripts/demographics.R b/R Scripts/demographics.R
index 349eda9..196d485 100644
--- a/R Scripts/demographics.R
+++ b/R Scripts/demographics.R
@@ -2,23 +2,42 @@ library(igraph)
setwd('~/Documents/Violence Cascades/')
load('Raw Data/lcc.RData')
+dim(lcc_verts)[1]
length(vic_ids)
+dim(lcc_verts)[1] - length(vic_ids)
-mean(lcc_verts$sex=="M")
-mean(lcc_verts$sex[vic_ids]=="M")
+birthyears = as.numeric(format(as.Date(lcc_verts$dob),'%Y'))
+mean(birthyears)
+mean(birthyears[vic_ids])
+mean(birthyears[-vic_ids])
-mean(lcc_verts$race=="BLK")
+mean(lcc_verts$sex=="M")*100
+mean(lcc_verts$sex[vic_ids]=="M")*100
+mean(lcc_verts$sex[-vic_ids]=="M")*100
-mean(lcc_verts$race %in% c("WHI", "WWH"))
+mean(lcc_verts$race=="BLK")*100
+mean(lcc_verts$race[vic_ids]=="BLK")*100
+mean(lcc_verts$race[-vic_ids]=="BLK")*100
-mean(lcc_verts$gang.member)
+mean(lcc_verts$race %in% c("WHI", "WWH"))*100
+mean(lcc_verts$race[vic_ids] %in% c("WHI", "WWH"))*100
+mean(lcc_verts$race[-vic_ids] %in% c("WHI", "WWH"))*100
+
+mean(lcc_verts$gang.member)*100
+mean(lcc_verts$gang.member[vic_ids])*100
+mean(lcc_verts$gang.member[-vic_ids])*100
d = degree(lcc)
mean(d)
+mean(d[vic_ids])
+mean(d[-vic_ids])
+
+is_vic = lcc_verts$vic
+vic_nbrs = function(neighbors){ mean(is_vic[neighbors[-1]]) }
+nbrs = neighborhood(lcc, order=1)
+vic_frac = lapply(nbrs, vic_nbrs)
+vic_frac = unlist(vic_frac)
-vic_frac = rep(NA,vcount(lcc))
-for(i in 1:vcount(lcc)){
- if (i %% 10000==0) print(i)
- neighbors = unlist(neighborhood(lcc, nodes=i, order=2, mode='all'))[-1]
- vic_frac[i] = sum(lcc_verts$vic[neighbors])/length(neighbors)
-} \ No newline at end of file
+mean(vic_frac)
+mean(vic_frac[vic_ids])
+mean(vic_frac[-vic_ids])
diff --git a/R Scripts/fit-background.R b/R Scripts/fit-background.R
index 5bc1ce0..a5f935b 100644
--- a/R Scripts/fit-background.R
+++ b/R Scripts/fit-background.R
@@ -30,8 +30,8 @@ res = nls(formula=fit_form, data=infs, start=list(lambda=3, A=2, phi=4))
co = coef(res); co
plot(t)
-plot(days,counts,pch=20,cex=0.9,col='#1f78b4',
+plot(days,counts,pch=20,cex=0.5,col='#1f78b4',
xlab='Day',ylab='Number of Infections')
curve(fit(x, lambda=co["lambda"], A=co["A"], phi=co["phi"]),
- add=TRUE ,lwd=5, col="#1b9e77")
+ add=TRUE ,lwd=3, col="#1a9850")
diff --git a/R Scripts/plot-data-prep.R b/R Scripts/plot-data-prep.R
index 937512a..27c2416 100644
--- a/R Scripts/plot-data-prep.R
+++ b/R Scripts/plot-data-prep.R
@@ -8,15 +8,15 @@ library(gridExtra)
# create bipartite graph
edges = c(1,6,
1,7,
- 1,10,
+ 1,9,
2,8,
3,7,
3,8,
- 3,10,
+ 3,9,
4,7,
4,8,
5,6,
- 5,9)
+ 5,10)
g = graph.bipartite(c(rep(T,5),rep(F,5)),edges)
par(mfrow=c(1,3), mar=c(1,1,1,1))
@@ -41,7 +41,7 @@ plot(g, layout=layout[,2:1], vertex.color=cols,edge.color='black',
# (C) unipartite person-person graph
par(mar=rep(1,4))
g2 = bipartite.projection(g)$proj1
-layout.g2 = matrix(c(0,.33,-.75,-.33,0,-1,0,1,.75,-.33),ncol=2,byrow=T)
+layout.g2 = matrix(c(0,.33,-.75,-.33,0,-1,.75,-.33,0,1),ncol=2,byrow=T)
plot(g2, vertex.color='#1f78b4', edge.color='black',layout=layout.g2,rescale=F,
vertex.frame.color=NA,vertex.size=30,vertex.label=toupper(letters[1:5]),
vertex.label.color='white',vertex.label.family='sans')
@@ -50,10 +50,11 @@ plot(g2, vertex.color='#1f78b4', edge.color='black',layout=layout.g2,rescale=F,
##############################################################################
#### Hawkes Process Diagram
layout(matrix(c(1,2,3,3,3,3), ncol=2, byrow = TRUE))
+par(mar=c(1,1,1,1))
vics = data.frame(IC = toupper(letters[1:5]),
- Victim = c(T,F,F,F,T),
- Day = c(2,NA,NA,NA,4))
+ Victim = c('TRUE','FALSE','FALSE','TRUE','FALSE'),
+ Day = c(2,NA,NA,4,NA))
colnames(vics) = c('Identity Code (IC)','Victim','Infection Date')
# A
@@ -78,7 +79,7 @@ rate = function(x){
print(x)
r = rep(base+0.3,length(x))
for (time in times){
- r[x>=time] = r[x>=time] + 1.5*exp((time-x[x>=time])/2)
+ r[x>=time] = r[x>=time] + 1.5*exp((time-x[x>=time])/2.5)
}
return(r)
}
@@ -86,7 +87,7 @@ rate = function(x){
tmin = 1
tmax = 6
n = 1000
-plot(rate,from=tmin,to=tmax,ylim=c(0,15),lwd=0,bty='l',col='white',
+plot(rate,from=tmin,to=tmax,ylim=c(0,13),lwd=0,bty='l',col='white',
yaxt='n',xlab='Date',ylab='Infection Rate')
for(i in 1:vcount(g2)){
base <<- 3*(5-i)
@@ -98,5 +99,6 @@ for(i in 1:vcount(g2)){
polygon(xs,ys,col='#1f78b4',border=NA)
}
axis(2, at=seq(0.3,12.3,3), lab=toupper(letters[seq(5,1,-1)]), tick=F, las=T,lwd=0)
-
+polygon(c(1.98,1.98,2.02,2.02),c(12,14,14,12),col='#e41a1c',border=NA)
+polygon(c(3.98,3.98,4.02,4.02),c(3,5,5,3),col='#e41a1c',border=NA)