summaryrefslogtreecommitdiffstats
path: root/R Scripts/-recover-data.R
blob: b34ffc66371de22a4c7a6890747faae2e0ff35a0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
library(igraph)
setwd("/Users/Ben/Documents/Harvard/Fall 2014/CS 284r Social Data Mining/Cascade Project/")

el = read.csv('Data/dag.csv')
lcc = induced.subgraph(d,which(clusters(d)$membership==which.max(clusters(d)$csize)))
vic_ids = which(V(lcc)$vic==TRUE)

from = vic_ids[el$from]
to = vic_ids[el$to]

t1 = as.Date(V(lcc)$vic_date[from],format='%m/%d/%y')
t2 = as.Date(V(lcc)$vic_date[to],format='%m/%d/%y')

uf = unique(from)
dist = rep(0,length(from))
for (i in 1:length(uf)){
  if (i%%1000==0) print(i)
  f = uf[i]
  fi = which(from==f)
  ds = as.numeric(shortest.paths(lcc,v=f,to=to[fi]))
  dist[fi] = ds
}

dag_dat = data.frame(from=el$from,to=el$to,dist,t1,t2)

save(dag_dat,file='Data/dag_dat.RData')
write.csv(dag_dat,'Data/dag_dat.csv')