diff options
| -rw-r--r-- | paper/paper.tex | 1 | ||||
| -rw-r--r-- | paper/sections/experiments.tex | 19 | ||||
| -rw-r--r-- | paper/sparse.bib | 27 | ||||
| -rw-r--r-- | src/algorithms.py | 5 | ||||
| -rw-r--r-- | src/cascade_creation.py | 16 | ||||
| -rw-r--r-- | src/convex_optimization.py | 6 | ||||
| -rw-r--r-- | src/make_plots.py | 40 |
7 files changed, 96 insertions, 18 deletions
diff --git a/paper/paper.tex b/paper/paper.tex index f922b3b..4627890 100644 --- a/paper/paper.tex +++ b/paper/paper.tex @@ -102,6 +102,7 @@ \input{sections/model.tex} \section{Results} +\label{sec:results} \input{sections/results.tex} \section{A lower bound} diff --git a/paper/sections/experiments.tex b/paper/sections/experiments.tex index efcf754..4876927 100644 --- a/paper/sections/experiments.tex +++ b/paper/sections/experiments.tex @@ -8,6 +8,25 @@ \caption{Watts-Strogatz Model. 200 nodes, 20000 edges.} \end{figure} +In this section, we validate empirically the results and assumptions of Section~\ref{sec:results} for different initializations of parameters ($n$, $m$, $\lambda$) and for varying levels of sparsity. We compare our algorithm to two different state-of-the-art algorithms: \textsc{Greedy} and \textsc{MLE} from \cite{Netrapalli:2012}. We introduce an approximate \textsc{Lasso} algorithm and validate its effectiveness empirically. + +\paragraph{Experimental setup} + +We focus our analysis to synthetic graphs, which are known to exhibit the properties of real social networks. We therefore consider a Watts-Strogatz graph ($300$ nodes, $4500$ edges), a Barabasi-Albert graph ($300$ nodes, $16200$ edges), the Holme-Kim power law graph ($200$ nodes, $9772$ edges) \cite{Holme:2002}, and the recently introduced Kronecker graph ($256$ nodes, $10000$ edges) \cite{Leskovec:2010}. For every reported data point, we generate $n$ cascades from the Independent Cascade model for $n \in \{100, 500, 1000, 2000, 5000\}$, and compute the estimated graph $\hat {\cal G}$ for each algorithm. + +In the case of the \textsc{Lasso}, \textsc{MLE} and \textsc{Sparse MLE} algorithms, we construct the edges of $\hat {\cal G} : \cup_{j \in V} \{i : \Theta_{ij} > 0.1\}$. We report the F1-score$=2\frac{\text{precision}\cdot\text{recall}}{\text{precision}+\text{recall}}$. For each graph, the initial probability of a node being a source is fixed to $0.05$, i.e. an average of $15$ nodes source nodes per cascades. Except when testing for approximately sparse graphs, all edge weights are chosen uniformly in the interval $[0.2, 0.7]$. + +\paragraph{Comparison with other algorithms} +\begin{itemize} +\item Continuous time + single source -> no comparison with NETRATE... +\item This is a modeling decision: in practice, we do not have access to the cascade's ``patient 0'', which makes learning the graph harder. +\item Lasso +\end{itemize} + + +\paragraph{Quantifying robustness} + + \begin{itemize} \item Compare edge recovery to First Edge and Greedy as number of cascades grows on different types of graphs... diff --git a/paper/sparse.bib b/paper/sparse.bib index c05d318..48017cc 100644 --- a/paper/sparse.bib +++ b/paper/sparse.bib @@ -344,3 +344,30 @@ year = "2009" bibsource = {dblp computer science bibliography, http://dblp.org} } +@article{Leskovec:2010, + author = {Jure Leskovec and + Deepayan Chakrabarti and + Jon M. Kleinberg and + Christos Faloutsos and + Zoubin Ghahramani}, + title = {Kronecker Graphs: An Approach to Modeling Networks}, + journal = {Journal of Machine Learning Research}, + volume = {11}, + pages = {985--1042}, + year = {2010}, + url = {http://doi.acm.org/10.1145/1756006.1756039}, + doi = {10.1145/1756006.1756039}, + timestamp = {Thu, 22 Apr 2010 13:26:26 +0200}, + biburl = {http://dblp.uni-trier.de/rec/bib/journals/jmlr/LeskovecCKFG10}, + bibsource = {dblp computer science bibliography, http://dblp.org} +} + +@article{Holme:2002, + author= {Petter Holme and Beom Jun Kim}, + title = {Growing scale-free networks with tunable clustering}, + journal = {Physical review E}, + volume = {65}, + issue = {2}, + pages = {026--107}, + year = {2002} +} diff --git a/src/algorithms.py b/src/algorithms.py index 51f57bf..24a63f3 100644 --- a/src/algorithms.py +++ b/src/algorithms.py @@ -72,6 +72,8 @@ def correctness_measure(G, G_hat, print_values=True): f1_score = 2.* tp / (2 * tp + fp + fn) fall_out = 1. * fp / (fp + tn) + norm = np.linalg.norm(G.mat - G_hat.mat) + if print_values: print("False Positives: {}".format(fp)) print("False Negatives: {}".format(fn)) @@ -81,7 +83,8 @@ def correctness_measure(G, G_hat, print_values=True): print("Precision: {}".format(precision)) print("Recall: {}".format(recall)) print("F1 score: {}".format(f1_score)) - print("Fall Out: {}".format(fall_out)) + print("Fall Out: {}".format(fall_out)) + print("l2_norm: {}".format(norm)) return fp, fn, tp, tn diff --git a/src/cascade_creation.py b/src/cascade_creation.py index 1c365b9..a71fce7 100644 --- a/src/cascade_creation.py +++ b/src/cascade_creation.py @@ -9,9 +9,11 @@ class InfluenceGraph(nx.DiGraph): """ networkX graph with mat and logmat attributes """ - def __init__(self, max_proba=None, min_proba=None, *args, **kwargs): + def __init__(self, max_proba=None, min_proba=None,sparse_edges=False, + *args, **kwargs): self.max_proba = max_proba self.min_proba = min_proba + self.sparse_edges=sparse_edges super(InfluenceGraph, self).__init__(*args, **kwargs) def erdos_init(self, n, p): @@ -61,8 +63,16 @@ class InfluenceGraph(nx.DiGraph): @property def mat(self): if not hasattr(self, '_mat'): - self._mat = self.max_proba * np.random.rand(len(self), len(self) + if self.min_proba is None or self.max_proba is None: + print("You forgot to initialize the min and max proba") + self._mat = np.zeros((self.number_of_nodes(), self.number_of_nodes())) + else: + self._mat = self.max_proba * np.random.rand(len(self), len(self) ) * np.asarray(nx.adjacency_matrix(self).todense().T) + if self.sparse_edges: + #Adding sparse non-edges to the graph! + self._mat += .1 * np.random.rand(len(self), len(self) + ) * np.random.binomial(1, p=.33, size=(len(self), len(self))) return self._mat @property @@ -175,7 +185,7 @@ def add_edges_from_proba_vector(G, p_node, node, floor_cstt): floor_parent = np.nonzero(p_node*(p_node > floor_cstt)) for parent in floor_parent[0]: G.add_edge(parent, node) - #TODO: update G's mat matrix + G.mat[node] = p_node.T return G diff --git a/src/convex_optimization.py b/src/convex_optimization.py index e355bc6..fce89b8 100644 --- a/src/convex_optimization.py +++ b/src/convex_optimization.py @@ -80,7 +80,7 @@ def type_lasso(lbda, n_cascades): return f_x, f_xz -@timeout.timeout(10) +@timeout.timeout(70) def diff_and_opt(M_val, w_val, f_x, f_xz): if M_val.dtype == bool: @@ -90,7 +90,7 @@ def diff_and_opt(M_val, w_val, f_x, f_xz): def F(x=None, z=None): if x is None: - return 0, cvxopt.matrix(-.001, (n,1)) + return 0, cvxopt.matrix(-.7, (n,1)) elif z is None: y, y_diff = f_x(x, M_val, w_val) return cvxopt.matrix(float(y), (1, 1)),\ @@ -116,7 +116,7 @@ def diff_and_opt(M_val, w_val, f_x, f_xz): " given to the solver") except ValueError: print("Domain Error, skipping to next node") - theta = np.zeros(len(w_val)) + theta = np.zeros(M_val.shape[1]) if cvxopt.solvers.options['show_progress']: print(1 - np.exp(theta)) diff --git a/src/make_plots.py b/src/make_plots.py index 28d89e2..10fd657 100644 --- a/src/make_plots.py +++ b/src/make_plots.py @@ -27,11 +27,14 @@ def compare_greedy_and_lagrange_cs284r(): algorithms.correctness_measure(G, G_hat, print_values=True) -def compute_graph(graph_name, n_cascades, lbda, passed_function): +def compute_graph(graph_name, n_cascades, lbda, passed_function, min_proba, + max_proba, sparse_edges=False): """ Test running time on different algorithms """ - G = cascade_creation.InfluenceGraph(max_proba=.7, min_proba=.2) + G = cascade_creation.InfluenceGraph(max_proba=max_proba, + min_proba=min_proba, + sparse_edges=True) G.import_from_file(graph_name) A = cascade_creation.generate_cascades(G, p_init=.05, n_cascades=n_cascades) @@ -51,7 +54,8 @@ def plot_watts_strogatz_graph(): plt.clf() fig = plt.figure(1) labels = [50, 100, 500, 1000, 2000, 5000] - x = [np.log(50), np.log(100), np.log(500), np.log(1000), np.log(2000), np.log(5000)] + x = [np.log(50), np.log(100), np.log(500), + np.log(1000), np.log(2000), np.log(5000)] sparse_recov = [.25, .32, .7, .82, .89, .92] max_likel = [.21, .29, .67, .8, .87, .9] lasso = [.07, .30, .46, .65, .86, .89] @@ -97,10 +101,14 @@ def plot_ROC_curve(figure_name): plt.xlabel("Recall") plt.ylabel("Precision") plt.grid(color="lightgrey") - ax.plot(recall_lasso_200, precision_lasso_200, 'ko-', color="lightseagreen", label="Lasso-200 cascades") - ax.plot(recall_sparse_200, precision_sparse_200, 'ko-', color="k", label="Our Method-200 cascades") - ax.plot(recall_lasso_50, precision_lasso_50, 'ko-', color="orange", label="Lasso-50 cascades") - ax.plot(recall_sparse_50, precision_sparse_50, 'ko-', color="cornflowerblue", label="Our Method-50 cascades") + ax.plot(recall_lasso_200, precision_lasso_200, 'ko-', + color="lightseagreen", label="Lasso-200 cascades") + ax.plot(recall_sparse_200, precision_sparse_200, 'ko-', + color="k", label="Our Method-200 cascades") + ax.plot(recall_lasso_50, precision_lasso_50, 'ko-', + color="orange", label="Lasso-50 cascades") + ax.plot(recall_sparse_50, precision_sparse_50, 'ko-', + color="cornflowerblue", label="Our Method-50 cascades") plt.legend(loc="upper right") plt.savefig("../paper/figures/"+"ROC_curve.pdf") @@ -108,19 +116,29 @@ def plot_ROC_curve(figure_name): if __name__=="__main__": if 0: compute_graph("../datasets/watts_strogatz_300_30_point3.txt", - n_cascades=100, lbda=.01, passed_function= + n_cascades=100, lbda=.01, min_proba=.2, max_proba=.7, + passed_function= #convex_optimization.sparse_recovery) #algorithms.greedy_prediction) convex_optimization.sparse_recovery) if 0: compute_graph("../datasets/powerlaw_200_30_point3.txt", - n_cascades=200, lbda=.01, passed_function= + n_cascades=200, lbda=.01, min_proba=.2, max_proba=.7, + passed_function= #convex_optimization.sparse_recovery) #algorithms.greedy_prediction) convex_optimization.type_lasso) if 0: compute_graph("../datasets/barabasi_albert_300_30.txt", - n_cascades=100, lbda=.002, passed_function= + n_cascades=100, lbda=.002, min_proba=.2, + max_proba=.7, passed_function= convex_optimization.sparse_recovery) #algorithms.greedy_prediction) - #convex_optimization.type_lasso)
\ No newline at end of file + #convex_optimization.type_lasso) + if 1: + compute_graph("../datasets/kronecker_graph_256_cross.txt", + n_cascades=1000, lbda=.001, min_proba=.2, max_proba=.7, + passed_function= + convex_optimization.sparse_recovery, + #convex_optimization.type_lasso, + sparse_edges=False)
\ No newline at end of file |
