aboutsummaryrefslogtreecommitdiffstats
path: root/src/cascade_creation.py
diff options
context:
space:
mode:
authorjeanpouget-abadie <jean.pougetabadie@gmail.com>2014-12-07 12:08:31 -0500
committerjeanpouget-abadie <jean.pougetabadie@gmail.com>2014-12-07 12:08:31 -0500
commit9de35421f25bf45158187daea4ddfedd1c93f3d8 (patch)
treef917008b6363a2b9dbff7855781f4fd5a10a6e94 /src/cascade_creation.py
parent6c874852773329f6fecbbc54476b30a37aa85b79 (diff)
downloadcascades-9de35421f25bf45158187daea4ddfedd1c93f3d8.tar.gz
renaming directory + creating dataset directory
Diffstat (limited to 'src/cascade_creation.py')
-rw-r--r--src/cascade_creation.py162
1 files changed, 162 insertions, 0 deletions
diff --git a/src/cascade_creation.py b/src/cascade_creation.py
new file mode 100644
index 0000000..9a26c03
--- /dev/null
+++ b/src/cascade_creation.py
@@ -0,0 +1,162 @@
+import networkx as nx
+import numpy as np
+import collections
+from itertools import izip
+from sklearn.preprocessing import normalize
+
+class InfluenceGraph(nx.Graph):
+ """
+ networkX graph with mat and logmat attributes
+ """
+ def __init__(self, max_proba, *args, **kwargs):
+ self.max_proba = max_proba
+ super(InfluenceGraph, self).__init__(*args, **kwargs)
+
+ def erdos_init(self, n, p):
+ G = nx.erdos_renyi_graph(n, p, directed=True)
+ self.add_edges_from(G.edges())
+
+ def import_from_file(self, file_name):
+ """
+ Takes a file from the Stanford collection of networks
+ """
+ with open(file_name, 'r') as f:
+ for edge in f:
+ if "#" not in edge:
+ u, v = [int(node) for node in edge.split()]
+ self.add_edge(u, v)
+
+ @property
+ def mat(self):
+ if not hasattr(self, '_mat'):
+ self._mat = (self.max_proba * np.random.rand(len(self), len(self))
+ * np.asarray(nx.adjacency_matrix(self)))
+ return self._mat
+
+ @property
+ def logmat(self):
+ if not hasattr(self, '_logmat'):
+ self._logmat = np.log(1 - self.mat)
+ return self._logmat
+
+
+class Cascade(list):
+ """
+ Cascade object: list with attributes
+ """
+ def __init__(self, *args, **kwargs):
+ super(Cascade, self).__init__(*args, **kwargs)
+
+ def infection_time(self, node):
+ """
+ Returns lists of infections times for node i in cascade
+ """
+ infected_times = []
+ for t, infected_set in izip(xrange(len(self)), self):
+ if infected_set[node]:
+ infected_times.append(t)
+ if not infected_times:
+ infected_times.append(None)
+ return infected_times
+
+ def candidate_infectors(self, node):
+ """
+ Returns Counter of nodes infected just before node i was
+ """
+ candidate_infectors = collections.Counter()
+ for t in self.infection_time(node):
+ if t: # avoid cases where t=0 or t is None
+ candidate_infectors.update(np.where(self[t-1])[0])
+ return candidate_infectors
+
+
+def icc_cascade(G, p_init):
+ """
+ Returns boolean vectors for one cascade
+ True means node was active at that time step
+ p_init: proba that node in seed set
+ """
+ susceptible = np.ones(G.number_of_nodes(), dtype=bool)
+ active = np.random.rand(G.number_of_nodes()) < p_init
+ susceptible = susceptible & np.logical_not(active)
+ cascade = Cascade()
+ while active.any():
+ cascade.append(active)
+ active = np.exp(np.dot(G.logmat, active)) \
+ < np.random.rand(G.number_of_nodes())
+ active = active & susceptible
+ susceptible = susceptible & np.logical_not(active)
+ if not cascade:
+ print "Empty cascade, consider changing p_init or n_nodes. Retrying."
+ return icc_cascade(G, p_init)
+ return cascade
+
+
+def generate_cascades(G, p_init, n_cascades):
+ """
+ returns list of cascades
+ """""
+ return [icc_cascade(G,p_init) for i in xrange(n_cascades)]
+
+
+def icc_matrixvector_for_node(cascades, node):
+ """
+ for the ICC model:
+ Returns M, w in matrix form where rows of M are i = t + k.T
+ Excludes all (t,k) after node infection time; w = 1_{infected}
+ """
+ #TODO: you need to remove the variable corresponding to the node
+ #you are solving for!!!!
+ if node is None:
+ return np.vstack(cascades), None
+ else:
+ w = []
+ M = []
+ for cascade in cascades:
+ t_i = cascade.infection_time(node)[0]
+ if t_i != 0:
+ indicator = np.zeros(len(cascade[:t_i]))
+ if t_i > 0:
+ indicator[-1] = 1
+ w.append(indicator)
+ M.append(np.array(cascade[:t_i]))
+ M = np.vstack(M)
+ w = np.hstack(w)
+ return M, w
+
+
+def normalize_matrix(M):
+ """
+ returns M with normalized (L_1 norm) columns
+ """
+ return normalize(M.astype("float32"), axis=0, norm="l2")
+
+
+def add_edges_from_proba_vector(G, p_node, node, floor_cstt):
+ """
+ Takes proba vector, node and adds edges to G by flooring very small
+ probabilities
+ Also updates G's mat matrix
+ """
+ floor_parent = np.nonzero(p_node*(p_node > floor_cstt))
+ for parent in floor_parent[0]:
+ G.add_edge(parent, node)
+ #TODO: update G's mat matrix
+ return G
+
+
+def test():
+ """
+ unit test
+ """
+ G = InfluenceGraph(max_proba = .3)
+ G.erdos_init(n = 10, p = 1)
+ import time
+ t0 = time.time()
+ A = generate_cascades(G, .1, 2)
+ M, w = icc_matrixvector_for_node(A, 0)
+ t1 = time.time()
+ print t1 - t0
+
+if __name__ == "__main__":
+ test()