diff options
| author | Ben Green <bgreen@g.harvard.edu> | 2015-08-30 20:59:39 -0400 |
|---|---|---|
| committer | Ben Green <bgreen@g.harvard.edu> | 2015-08-30 20:59:39 -0400 |
| commit | 485ebd6d5b34ff2d8f6761f2dda44ea3e2b32aa6 (patch) | |
| tree | 96e610b53c18f73a155974964e17f98bd1ade871 /R Scripts | |
| parent | d1e9e8cfabfee86fd7ac9e108fab0d463da24ce8 (diff) | |
| download | criminal_cascades-485ebd6d5b34ff2d8f6761f2dda44ea3e2b32aa6.tar.gz | |
wrote script to find background rate using nls
Diffstat (limited to 'R Scripts')
| -rw-r--r-- | R Scripts/fit-background.R | 32 | ||||
| -rwxr-xr-x | R Scripts/generate-dag-dat-unweighted.R | 59 | ||||
| -rwxr-xr-x | R Scripts/sim-analysis.R | 8 |
3 files changed, 95 insertions, 4 deletions
diff --git a/R Scripts/fit-background.R b/R Scripts/fit-background.R new file mode 100644 index 0000000..d385e7f --- /dev/null +++ b/R Scripts/fit-background.R @@ -0,0 +1,32 @@ +library(igraph) +setwd("~/Documents/Violence Cascades/") +load('Results/hyper-lcc.RData') + +# load data +hyp_lcc = upgrade_graph(hyp_lcc) +vic_ids = which(V(hyp_lcc)$vic==TRUE) +non_vic_ids = which(V(hyp_lcc)$vic==FALSE) + +hist(as.numeric(hyp_lcc_verts$vic.day[vic_ids]),2000,col='lightblue', + xlab='Day of Study Period',main='Infections During the Study Period') + +# get infection counts per day +days = 1:3012 +t = table(factor(hyp_lcc_verts$vic.day[vic_ids],levels=days)) +counts = as.vector(t) +infs = data.frame(days,counts) + +# define background function +fit = function(x, mu, A, phi) {mu + A*(sin((pi/365)*x+phi))^2} +fit_form = counts ~ mu + A*(sin((pi/365)*days+phi))^2 + +# explore data +plot(days,counts) +curve(fit(x, mu=1, A=12, phi=2.8), add=TRUE ,lwd=2, col="steelblue") + +res = nls(formula=fit_form, data=infs, start=list(mu=1, A=12, phi=2.8)) +co = coef(res); co + +plot(t) +plot(days,counts,pch=20,cex=0.4,xlab='Day',ylab='Number of Infections') +curve(fit(x, mu=co["mu"], A=co["A"], phi=co["phi"]), add=TRUE ,lwd=5, col="steelblue") diff --git a/R Scripts/generate-dag-dat-unweighted.R b/R Scripts/generate-dag-dat-unweighted.R new file mode 100755 index 0000000..61db26c --- /dev/null +++ b/R Scripts/generate-dag-dat-unweighted.R @@ -0,0 +1,59 @@ +library(igraph) +setwd("~/Documents/Violence Cascades/") +load('Raw Data/lcc.RData') + +library(foreach) +library(doMC) +registerDoMC(cores=4) + +lcc2 = remove.edge.attribute(lcc,'weight') +vics = split(vic_ids, ceiling(seq_along(vic_ids)/500)) +dag_dat_lcc = c() +for(i in 1:length(vics)){ + ptm = proc.time() + print(c(i,length(vics))) + vic_ids = unlist(vics[i], use.names=F) + + ddl = foreach (u = vic_ids, .combine=rbind) %dopar% { + if ((which(vic_ids==u) %% 100)==0) print(which(vic_ids==u)) + + nbhd = unlist(neighborhood(lcc2,nodes=u,order=3)) # get nodes within neighborhood + nbhd = nbhd[-1] # don't want to include u in the neighborhood + dists = as.numeric(shortest.paths(lcc2,u,nbhd)) + + # make edge for every infection + ddlu = data.frame(matrix(nrow=1,ncol=4)) + ei = 1 + for (j in c(17:21,16)){ + tu = lcc_verts[u,j] + if (is.na(tu)) next + + ddlu[ei:(ei+length(nbhd)-1),] = data.frame(rep(u,length(nbhd)), nbhd, + rep(tu,length(nbhd)), dists, row.names=NULL) + ei = ei + length(nbhd) + } + + return(ddlu) + } + + dag_dat_lcc = rbind(dag_dat_lcc,ddl) + print(proc.time()-ptm) +} + +colnames(dag_dat_lcc) = c('from','to','t1','dist') +rownames(dag_dat_lcc) = NULL + +save(dag_dat_lcc, file='Results/dag_dat_lcc.RData') +write.csv(dag_dat_lcc, file='Results/dag_dat_lcc.csv') + +# dag_dat_vics = dag_dat_lcc[!is.na(dag_dat_lcc$t2),] +# save(dag_dat_lcc_vics, file='Results/dag_dat_lcc_vics.RData') +# write.csv(dag_dat_lcc_vics, file='Results/dag_dat_lcc_vics.csv') + +#### +# create lcc_vic_times +vic_times_lcc = lcc_verts[,c('name','nonfatal_day_1','nonfatal_day_2', + 'nonfatal_day_3','nonfatal_day_4', + 'nonfatal_day_5','fatal_day')] +save(vic_times_lcc, file='Results/vic_times_lcc.RData') +write.csv(vic_times_lcc, file='Results/vic_times_lcc.csv') diff --git a/R Scripts/sim-analysis.R b/R Scripts/sim-analysis.R index e368419..af280cc 100755 --- a/R Scripts/sim-analysis.R +++ b/R Scripts/sim-analysis.R @@ -31,8 +31,8 @@ for (i in 1:n.infections){ n = 10000 mean.time = rep(NA,n) med.time = rep(NA,n) +mean.30 = rep(NA,n) mean.50 = rep(NA,n) -mean.100 = rep(NA,n) n.vicpairs = rep(NA,n) hyp_lcc = upgrade_graph(hyp_lcc) @@ -56,16 +56,16 @@ for(q in 1:n){ n.vicpairs[q] = length(vic.time) mean.time[q] = mean(vic.time) med.time[q] = median(vic.time) + mean.30[q] = mean(vic.time<=30) mean.50[q] = mean(vic.time<=50) - mean.100[q] = mean(vic.time<=100) # decay.rate[sim,q] = getDecayRate(vic.time) #get lm of histogram. or maybe just cor? } print(proc.time()-ptm) -# save(mean.time,med.time,mean.50,mean.100,file='Results/sims-Aug28.RData') +# save(mean.time,med.time,mean.30,mean.50,file='Results/sims-Aug28.RData') ####### plot simulation data ####### -data = 0.1179039 # 778.2056, 638, 0.07248908, 0.1179039 +data = 0.1179039 # 778.2056, 638, 0.05105811, 0.07248908 simdata = mean.100 xl = c(0.95*min(min(simdata),data),1.05*max(max(simdata),data)) yl = c(0,max(hist(simdata,25)$density)) |
