summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBen Green <bgreen@g.harvard.edu>2015-08-30 20:59:39 -0400
committerBen Green <bgreen@g.harvard.edu>2015-08-30 20:59:39 -0400
commit485ebd6d5b34ff2d8f6761f2dda44ea3e2b32aa6 (patch)
tree96e610b53c18f73a155974964e17f98bd1ade871
parentd1e9e8cfabfee86fd7ac9e108fab0d463da24ce8 (diff)
downloadcriminal_cascades-485ebd6d5b34ff2d8f6761f2dda44ea3e2b32aa6.tar.gz
wrote script to find background rate using nls
-rw-r--r--R Scripts/fit-background.R32
-rwxr-xr-xR Scripts/generate-dag-dat-unweighted.R59
-rwxr-xr-xR Scripts/sim-analysis.R8
3 files changed, 95 insertions, 4 deletions
diff --git a/R Scripts/fit-background.R b/R Scripts/fit-background.R
new file mode 100644
index 0000000..d385e7f
--- /dev/null
+++ b/R Scripts/fit-background.R
@@ -0,0 +1,32 @@
+library(igraph)
+setwd("~/Documents/Violence Cascades/")
+load('Results/hyper-lcc.RData')
+
+# load data
+hyp_lcc = upgrade_graph(hyp_lcc)
+vic_ids = which(V(hyp_lcc)$vic==TRUE)
+non_vic_ids = which(V(hyp_lcc)$vic==FALSE)
+
+hist(as.numeric(hyp_lcc_verts$vic.day[vic_ids]),2000,col='lightblue',
+ xlab='Day of Study Period',main='Infections During the Study Period')
+
+# get infection counts per day
+days = 1:3012
+t = table(factor(hyp_lcc_verts$vic.day[vic_ids],levels=days))
+counts = as.vector(t)
+infs = data.frame(days,counts)
+
+# define background function
+fit = function(x, mu, A, phi) {mu + A*(sin((pi/365)*x+phi))^2}
+fit_form = counts ~ mu + A*(sin((pi/365)*days+phi))^2
+
+# explore data
+plot(days,counts)
+curve(fit(x, mu=1, A=12, phi=2.8), add=TRUE ,lwd=2, col="steelblue")
+
+res = nls(formula=fit_form, data=infs, start=list(mu=1, A=12, phi=2.8))
+co = coef(res); co
+
+plot(t)
+plot(days,counts,pch=20,cex=0.4,xlab='Day',ylab='Number of Infections')
+curve(fit(x, mu=co["mu"], A=co["A"], phi=co["phi"]), add=TRUE ,lwd=5, col="steelblue")
diff --git a/R Scripts/generate-dag-dat-unweighted.R b/R Scripts/generate-dag-dat-unweighted.R
new file mode 100755
index 0000000..61db26c
--- /dev/null
+++ b/R Scripts/generate-dag-dat-unweighted.R
@@ -0,0 +1,59 @@
+library(igraph)
+setwd("~/Documents/Violence Cascades/")
+load('Raw Data/lcc.RData')
+
+library(foreach)
+library(doMC)
+registerDoMC(cores=4)
+
+lcc2 = remove.edge.attribute(lcc,'weight')
+vics = split(vic_ids, ceiling(seq_along(vic_ids)/500))
+dag_dat_lcc = c()
+for(i in 1:length(vics)){
+ ptm = proc.time()
+ print(c(i,length(vics)))
+ vic_ids = unlist(vics[i], use.names=F)
+
+ ddl = foreach (u = vic_ids, .combine=rbind) %dopar% {
+ if ((which(vic_ids==u) %% 100)==0) print(which(vic_ids==u))
+
+ nbhd = unlist(neighborhood(lcc2,nodes=u,order=3)) # get nodes within neighborhood
+ nbhd = nbhd[-1] # don't want to include u in the neighborhood
+ dists = as.numeric(shortest.paths(lcc2,u,nbhd))
+
+ # make edge for every infection
+ ddlu = data.frame(matrix(nrow=1,ncol=4))
+ ei = 1
+ for (j in c(17:21,16)){
+ tu = lcc_verts[u,j]
+ if (is.na(tu)) next
+
+ ddlu[ei:(ei+length(nbhd)-1),] = data.frame(rep(u,length(nbhd)), nbhd,
+ rep(tu,length(nbhd)), dists, row.names=NULL)
+ ei = ei + length(nbhd)
+ }
+
+ return(ddlu)
+ }
+
+ dag_dat_lcc = rbind(dag_dat_lcc,ddl)
+ print(proc.time()-ptm)
+}
+
+colnames(dag_dat_lcc) = c('from','to','t1','dist')
+rownames(dag_dat_lcc) = NULL
+
+save(dag_dat_lcc, file='Results/dag_dat_lcc.RData')
+write.csv(dag_dat_lcc, file='Results/dag_dat_lcc.csv')
+
+# dag_dat_vics = dag_dat_lcc[!is.na(dag_dat_lcc$t2),]
+# save(dag_dat_lcc_vics, file='Results/dag_dat_lcc_vics.RData')
+# write.csv(dag_dat_lcc_vics, file='Results/dag_dat_lcc_vics.csv')
+
+####
+# create lcc_vic_times
+vic_times_lcc = lcc_verts[,c('name','nonfatal_day_1','nonfatal_day_2',
+ 'nonfatal_day_3','nonfatal_day_4',
+ 'nonfatal_day_5','fatal_day')]
+save(vic_times_lcc, file='Results/vic_times_lcc.RData')
+write.csv(vic_times_lcc, file='Results/vic_times_lcc.csv')
diff --git a/R Scripts/sim-analysis.R b/R Scripts/sim-analysis.R
index e368419..af280cc 100755
--- a/R Scripts/sim-analysis.R
+++ b/R Scripts/sim-analysis.R
@@ -31,8 +31,8 @@ for (i in 1:n.infections){
n = 10000
mean.time = rep(NA,n)
med.time = rep(NA,n)
+mean.30 = rep(NA,n)
mean.50 = rep(NA,n)
-mean.100 = rep(NA,n)
n.vicpairs = rep(NA,n)
hyp_lcc = upgrade_graph(hyp_lcc)
@@ -56,16 +56,16 @@ for(q in 1:n){
n.vicpairs[q] = length(vic.time)
mean.time[q] = mean(vic.time)
med.time[q] = median(vic.time)
+ mean.30[q] = mean(vic.time<=30)
mean.50[q] = mean(vic.time<=50)
- mean.100[q] = mean(vic.time<=100)
# decay.rate[sim,q] = getDecayRate(vic.time) #get lm of histogram. or maybe just cor?
}
print(proc.time()-ptm)
-# save(mean.time,med.time,mean.50,mean.100,file='Results/sims-Aug28.RData')
+# save(mean.time,med.time,mean.30,mean.50,file='Results/sims-Aug28.RData')
####### plot simulation data #######
-data = 0.1179039 # 778.2056, 638, 0.07248908, 0.1179039
+data = 0.1179039 # 778.2056, 638, 0.05105811, 0.07248908
simdata = mean.100
xl = c(0.95*min(min(simdata),data),1.05*max(max(simdata),data))
yl = c(0,max(hist(simdata,25)$density))