library(pROC) library(igraph) setwd('~/Documents/Cascade Project') load('Raw Data/lcc.RData') load('Results/lcc_bw.RData') is_vic = V(lcc)$vic vics = which(V(lcc)$vic==T) nonvics = which(V(lcc)$vic==F) thresh = length(vics)/length(union(vics,nonvics)) nbhdVics = function(neighbs){return(sum(is_vic[neighbs[-1]]))} #setdiff(is_vic order-(order-1) to get only those at the given order n1s = neighborhood(lcc,1,V(lcc)) n2s = neighborhood(lcc,2,V(lcc)) n1vics = unlist(lapply(n1s,nbhdVics)) n2vics = unlist(lapply(n2s,nbhdVics)) data = data.frame(vic = V(lcc)$vic, sex = V(lcc)$sex, race = V(lcc)$race, age = V(lcc)$age, gang.member = V(lcc)$gang.member, gang.name = V(lcc)$gang.name, # fname = V(lcc)$faction.name, deg = degree(lcc), bw = bw, n1v = n1vics, n2v = n2vics ) data$age = as.numeric(data$age) data$gname = as.factor(data$gname) train = 1:dim(data)[1]; test = 1:dim(data)[1] # train = sort(union(sample(vics,length(vics)*0.8),sample(nonvics,length(nonvics)*0.8))) # test = sort((1:length(is_vic))[-train]) formula = vic ~ sex + race + age + gmember + gname + deg + bw + n1v + n2v formula = vic ~ sex + race + age + gmember + gname formula = vic ~ deg + bw formula = vic ~ n1v + n2v glm.fit = glm(formula, data=data, family=binomial) glm.probs = predict(glm.fit, newdata=data, type='response') roc = roc(data$vic[test],glm.probs,ci=TRUE) plot(roc,print.auc=TRUE) roc$auc lm.fit = lm(formula, data=data) lm.probs = predict(lm.fit, newdata=data, type='response') p = 1/(1+exp(-lm.probs)) z = log(lm.probs/(1-lm.probs)) logit = 1/(1+exp(-z)) save(glm.probs, file='Results/lm-probs.RData') ########## lm.pred = lm.probs>thresh tab = table(lm.pred,data$vic) tab (tab[1,1]+tab[2,2])/sum(tab) propensity = as.numeric(pred) similarity = 1 - abs(propensity[u]-propensity[v]) ############### # Random forest approach -- not as good library(randomForest) data$vic = factor(data$vic) rf.fit = randomForest(formula,data=data[train,],do.trace=50,ntree=20) rf.probs = (predict(rf.fit,newdata=data[test,],type='prob'))[,1] roc = roc(data$vic[test],rf.probs,ci=TRUE) plot(roc,print.auc=TRUE)