aboutsummaryrefslogtreecommitdiffstats
path: root/jpa_test/cascade_creation.py
diff options
context:
space:
mode:
Diffstat (limited to 'jpa_test/cascade_creation.py')
-rw-r--r--jpa_test/cascade_creation.py162
1 files changed, 0 insertions, 162 deletions
diff --git a/jpa_test/cascade_creation.py b/jpa_test/cascade_creation.py
deleted file mode 100644
index 9a26c03..0000000
--- a/jpa_test/cascade_creation.py
+++ /dev/null
@@ -1,162 +0,0 @@
-import networkx as nx
-import numpy as np
-import collections
-from itertools import izip
-from sklearn.preprocessing import normalize
-
-class InfluenceGraph(nx.Graph):
- """
- networkX graph with mat and logmat attributes
- """
- def __init__(self, max_proba, *args, **kwargs):
- self.max_proba = max_proba
- super(InfluenceGraph, self).__init__(*args, **kwargs)
-
- def erdos_init(self, n, p):
- G = nx.erdos_renyi_graph(n, p, directed=True)
- self.add_edges_from(G.edges())
-
- def import_from_file(self, file_name):
- """
- Takes a file from the Stanford collection of networks
- """
- with open(file_name, 'r') as f:
- for edge in f:
- if "#" not in edge:
- u, v = [int(node) for node in edge.split()]
- self.add_edge(u, v)
-
- @property
- def mat(self):
- if not hasattr(self, '_mat'):
- self._mat = (self.max_proba * np.random.rand(len(self), len(self))
- * np.asarray(nx.adjacency_matrix(self)))
- return self._mat
-
- @property
- def logmat(self):
- if not hasattr(self, '_logmat'):
- self._logmat = np.log(1 - self.mat)
- return self._logmat
-
-
-class Cascade(list):
- """
- Cascade object: list with attributes
- """
- def __init__(self, *args, **kwargs):
- super(Cascade, self).__init__(*args, **kwargs)
-
- def infection_time(self, node):
- """
- Returns lists of infections times for node i in cascade
- """
- infected_times = []
- for t, infected_set in izip(xrange(len(self)), self):
- if infected_set[node]:
- infected_times.append(t)
- if not infected_times:
- infected_times.append(None)
- return infected_times
-
- def candidate_infectors(self, node):
- """
- Returns Counter of nodes infected just before node i was
- """
- candidate_infectors = collections.Counter()
- for t in self.infection_time(node):
- if t: # avoid cases where t=0 or t is None
- candidate_infectors.update(np.where(self[t-1])[0])
- return candidate_infectors
-
-
-def icc_cascade(G, p_init):
- """
- Returns boolean vectors for one cascade
- True means node was active at that time step
- p_init: proba that node in seed set
- """
- susceptible = np.ones(G.number_of_nodes(), dtype=bool)
- active = np.random.rand(G.number_of_nodes()) < p_init
- susceptible = susceptible & np.logical_not(active)
- cascade = Cascade()
- while active.any():
- cascade.append(active)
- active = np.exp(np.dot(G.logmat, active)) \
- < np.random.rand(G.number_of_nodes())
- active = active & susceptible
- susceptible = susceptible & np.logical_not(active)
- if not cascade:
- print "Empty cascade, consider changing p_init or n_nodes. Retrying."
- return icc_cascade(G, p_init)
- return cascade
-
-
-def generate_cascades(G, p_init, n_cascades):
- """
- returns list of cascades
- """""
- return [icc_cascade(G,p_init) for i in xrange(n_cascades)]
-
-
-def icc_matrixvector_for_node(cascades, node):
- """
- for the ICC model:
- Returns M, w in matrix form where rows of M are i = t + k.T
- Excludes all (t,k) after node infection time; w = 1_{infected}
- """
- #TODO: you need to remove the variable corresponding to the node
- #you are solving for!!!!
- if node is None:
- return np.vstack(cascades), None
- else:
- w = []
- M = []
- for cascade in cascades:
- t_i = cascade.infection_time(node)[0]
- if t_i != 0:
- indicator = np.zeros(len(cascade[:t_i]))
- if t_i > 0:
- indicator[-1] = 1
- w.append(indicator)
- M.append(np.array(cascade[:t_i]))
- M = np.vstack(M)
- w = np.hstack(w)
- return M, w
-
-
-def normalize_matrix(M):
- """
- returns M with normalized (L_1 norm) columns
- """
- return normalize(M.astype("float32"), axis=0, norm="l2")
-
-
-def add_edges_from_proba_vector(G, p_node, node, floor_cstt):
- """
- Takes proba vector, node and adds edges to G by flooring very small
- probabilities
- Also updates G's mat matrix
- """
- floor_parent = np.nonzero(p_node*(p_node > floor_cstt))
- for parent in floor_parent[0]:
- G.add_edge(parent, node)
- #TODO: update G's mat matrix
- return G
-
-
-def test():
- """
- unit test
- """
- G = InfluenceGraph(max_proba = .3)
- G.erdos_init(n = 10, p = 1)
- import time
- t0 = time.time()
- A = generate_cascades(G, .1, 2)
- M, w = icc_matrixvector_for_node(A, 0)
- t1 = time.time()
- print t1 - t0
-
-if __name__ == "__main__":
- test()