summaryrefslogtreecommitdiffstats
path: root/R Scripts/-generate-dag-dat-sim.R
blob: a6d115d7590b9fede0710fc303944842930bc5e7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
library(igraph)
setwd("/Users/Ben/Documents/Harvard/Fall 2014/CS 284r Social Data Mining/Cascade Project/")
load('Data/lcc_sim2a.RData')

# d = remove.edge.attribute(person,'weight')
# lcc = induced.subgraph(d,which(clusters(d)$membership==which.max(clusters(d)$csize)))
# vic_ids = which(V(lcc)$vic==TRUE)
# dag = graph.empty(vcount(lcc))
# dag = set.edge.attribute(dag,'weight',value=0)

lcc = lcc.sim
vic_ids = vic_ids.sim
ei = 1

dag_dat = matrix(0,253096,5)
for (1 in 1:length(vic_ids)){
  if (i %% 100)==0)  print(which(vic_ids==u))

  u = vic_ids[i]
  nbhd = unlist(neighborhood(lcc, nodes=u, order=3)) # get nodes within neighborhood
  nbhd = intersect(vic_ids,nbhd) # only want victim nodes
  nbhd = setdiff(nbhd,u) # don't want to include u in the neighborhood
  
  tu = as.numeric(V(lcc)$vic_date[u])
  tvs = as.numeric(V(lcc)$vic_date[nbhd])
  Es = tu<tvs
  tvs = tvs[Es]

  if (sum(Es)>0){
    nbhd = nbhd[Es]
    dists = as.numeric(shortest.paths(lcc,u,nbhd))  
    
    for (j in 1:sum(Es)){
      v = nbhd[j]
      d = dists[j]
      tv = tvs[j]
#       dag_dat[ei,] = c(u,v,d,tu,tv)
      dag_dat = rbind(dag_dat,c(u,v,d,tu,tv))
#       f = time(tu,tv)
#       h = structural(d)
#       weight[ei] = f*h
      ei = ei+1
    }
  }
}

dag_dat = dag_dat[rowSums(dag_dat)>0,]
dag_dat = as.data.frame(dag_dat)
colnames(dag_dat) = c('from','to','dist','t1','t2')
# save(dag_dat,file='Data/dag_dat_sim2a.RData')