aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--paper/paper.tex1
-rw-r--r--paper/sections/experiments.tex19
-rw-r--r--paper/sparse.bib27
-rw-r--r--src/algorithms.py5
-rw-r--r--src/cascade_creation.py16
-rw-r--r--src/convex_optimization.py6
-rw-r--r--src/make_plots.py40
7 files changed, 96 insertions, 18 deletions
diff --git a/paper/paper.tex b/paper/paper.tex
index f922b3b..4627890 100644
--- a/paper/paper.tex
+++ b/paper/paper.tex
@@ -102,6 +102,7 @@
\input{sections/model.tex}
\section{Results}
+\label{sec:results}
\input{sections/results.tex}
\section{A lower bound}
diff --git a/paper/sections/experiments.tex b/paper/sections/experiments.tex
index efcf754..4876927 100644
--- a/paper/sections/experiments.tex
+++ b/paper/sections/experiments.tex
@@ -8,6 +8,25 @@
\caption{Watts-Strogatz Model. 200 nodes, 20000 edges.}
\end{figure}
+In this section, we validate empirically the results and assumptions of Section~\ref{sec:results} for different initializations of parameters ($n$, $m$, $\lambda$) and for varying levels of sparsity. We compare our algorithm to two different state-of-the-art algorithms: \textsc{Greedy} and \textsc{MLE} from \cite{Netrapalli:2012}. We introduce an approximate \textsc{Lasso} algorithm and validate its effectiveness empirically.
+
+\paragraph{Experimental setup}
+
+We focus our analysis to synthetic graphs, which are known to exhibit the properties of real social networks. We therefore consider a Watts-Strogatz graph ($300$ nodes, $4500$ edges), a Barabasi-Albert graph ($300$ nodes, $16200$ edges), the Holme-Kim power law graph ($200$ nodes, $9772$ edges) \cite{Holme:2002}, and the recently introduced Kronecker graph ($256$ nodes, $10000$ edges) \cite{Leskovec:2010}. For every reported data point, we generate $n$ cascades from the Independent Cascade model for $n \in \{100, 500, 1000, 2000, 5000\}$, and compute the estimated graph $\hat {\cal G}$ for each algorithm.
+
+In the case of the \textsc{Lasso}, \textsc{MLE} and \textsc{Sparse MLE} algorithms, we construct the edges of $\hat {\cal G} : \cup_{j \in V} \{i : \Theta_{ij} > 0.1\}$. We report the F1-score$=2\frac{\text{precision}\cdot\text{recall}}{\text{precision}+\text{recall}}$. For each graph, the initial probability of a node being a source is fixed to $0.05$, i.e. an average of $15$ nodes source nodes per cascades. Except when testing for approximately sparse graphs, all edge weights are chosen uniformly in the interval $[0.2, 0.7]$.
+
+\paragraph{Comparison with other algorithms}
+\begin{itemize}
+\item Continuous time + single source -> no comparison with NETRATE...
+\item This is a modeling decision: in practice, we do not have access to the cascade's ``patient 0'', which makes learning the graph harder.
+\item Lasso
+\end{itemize}
+
+
+\paragraph{Quantifying robustness}
+
+
\begin{itemize}
\item Compare edge recovery to First Edge and Greedy as number of cascades grows on different types of graphs...
diff --git a/paper/sparse.bib b/paper/sparse.bib
index c05d318..48017cc 100644
--- a/paper/sparse.bib
+++ b/paper/sparse.bib
@@ -344,3 +344,30 @@ year = "2009"
bibsource = {dblp computer science bibliography, http://dblp.org}
}
+@article{Leskovec:2010,
+ author = {Jure Leskovec and
+ Deepayan Chakrabarti and
+ Jon M. Kleinberg and
+ Christos Faloutsos and
+ Zoubin Ghahramani},
+ title = {Kronecker Graphs: An Approach to Modeling Networks},
+ journal = {Journal of Machine Learning Research},
+ volume = {11},
+ pages = {985--1042},
+ year = {2010},
+ url = {http://doi.acm.org/10.1145/1756006.1756039},
+ doi = {10.1145/1756006.1756039},
+ timestamp = {Thu, 22 Apr 2010 13:26:26 +0200},
+ biburl = {http://dblp.uni-trier.de/rec/bib/journals/jmlr/LeskovecCKFG10},
+ bibsource = {dblp computer science bibliography, http://dblp.org}
+}
+
+@article{Holme:2002,
+ author= {Petter Holme and Beom Jun Kim},
+ title = {Growing scale-free networks with tunable clustering},
+ journal = {Physical review E},
+ volume = {65},
+ issue = {2},
+ pages = {026--107},
+ year = {2002}
+}
diff --git a/src/algorithms.py b/src/algorithms.py
index 51f57bf..24a63f3 100644
--- a/src/algorithms.py
+++ b/src/algorithms.py
@@ -72,6 +72,8 @@ def correctness_measure(G, G_hat, print_values=True):
f1_score = 2.* tp / (2 * tp + fp + fn)
fall_out = 1. * fp / (fp + tn)
+ norm = np.linalg.norm(G.mat - G_hat.mat)
+
if print_values:
print("False Positives: {}".format(fp))
print("False Negatives: {}".format(fn))
@@ -81,7 +83,8 @@ def correctness_measure(G, G_hat, print_values=True):
print("Precision: {}".format(precision))
print("Recall: {}".format(recall))
print("F1 score: {}".format(f1_score))
- print("Fall Out: {}".format(fall_out))
+ print("Fall Out: {}".format(fall_out))
+ print("l2_norm: {}".format(norm))
return fp, fn, tp, tn
diff --git a/src/cascade_creation.py b/src/cascade_creation.py
index 1c365b9..a71fce7 100644
--- a/src/cascade_creation.py
+++ b/src/cascade_creation.py
@@ -9,9 +9,11 @@ class InfluenceGraph(nx.DiGraph):
"""
networkX graph with mat and logmat attributes
"""
- def __init__(self, max_proba=None, min_proba=None, *args, **kwargs):
+ def __init__(self, max_proba=None, min_proba=None,sparse_edges=False,
+ *args, **kwargs):
self.max_proba = max_proba
self.min_proba = min_proba
+ self.sparse_edges=sparse_edges
super(InfluenceGraph, self).__init__(*args, **kwargs)
def erdos_init(self, n, p):
@@ -61,8 +63,16 @@ class InfluenceGraph(nx.DiGraph):
@property
def mat(self):
if not hasattr(self, '_mat'):
- self._mat = self.max_proba * np.random.rand(len(self), len(self)
+ if self.min_proba is None or self.max_proba is None:
+ print("You forgot to initialize the min and max proba")
+ self._mat = np.zeros((self.number_of_nodes(), self.number_of_nodes()))
+ else:
+ self._mat = self.max_proba * np.random.rand(len(self), len(self)
) * np.asarray(nx.adjacency_matrix(self).todense().T)
+ if self.sparse_edges:
+ #Adding sparse non-edges to the graph!
+ self._mat += .1 * np.random.rand(len(self), len(self)
+ ) * np.random.binomial(1, p=.33, size=(len(self), len(self)))
return self._mat
@property
@@ -175,7 +185,7 @@ def add_edges_from_proba_vector(G, p_node, node, floor_cstt):
floor_parent = np.nonzero(p_node*(p_node > floor_cstt))
for parent in floor_parent[0]:
G.add_edge(parent, node)
- #TODO: update G's mat matrix
+ G.mat[node] = p_node.T
return G
diff --git a/src/convex_optimization.py b/src/convex_optimization.py
index e355bc6..fce89b8 100644
--- a/src/convex_optimization.py
+++ b/src/convex_optimization.py
@@ -80,7 +80,7 @@ def type_lasso(lbda, n_cascades):
return f_x, f_xz
-@timeout.timeout(10)
+@timeout.timeout(70)
def diff_and_opt(M_val, w_val, f_x, f_xz):
if M_val.dtype == bool:
@@ -90,7 +90,7 @@ def diff_and_opt(M_val, w_val, f_x, f_xz):
def F(x=None, z=None):
if x is None:
- return 0, cvxopt.matrix(-.001, (n,1))
+ return 0, cvxopt.matrix(-.7, (n,1))
elif z is None:
y, y_diff = f_x(x, M_val, w_val)
return cvxopt.matrix(float(y), (1, 1)),\
@@ -116,7 +116,7 @@ def diff_and_opt(M_val, w_val, f_x, f_xz):
" given to the solver")
except ValueError:
print("Domain Error, skipping to next node")
- theta = np.zeros(len(w_val))
+ theta = np.zeros(M_val.shape[1])
if cvxopt.solvers.options['show_progress']:
print(1 - np.exp(theta))
diff --git a/src/make_plots.py b/src/make_plots.py
index 28d89e2..10fd657 100644
--- a/src/make_plots.py
+++ b/src/make_plots.py
@@ -27,11 +27,14 @@ def compare_greedy_and_lagrange_cs284r():
algorithms.correctness_measure(G, G_hat, print_values=True)
-def compute_graph(graph_name, n_cascades, lbda, passed_function):
+def compute_graph(graph_name, n_cascades, lbda, passed_function, min_proba,
+ max_proba, sparse_edges=False):
"""
Test running time on different algorithms
"""
- G = cascade_creation.InfluenceGraph(max_proba=.7, min_proba=.2)
+ G = cascade_creation.InfluenceGraph(max_proba=max_proba,
+ min_proba=min_proba,
+ sparse_edges=True)
G.import_from_file(graph_name)
A = cascade_creation.generate_cascades(G, p_init=.05, n_cascades=n_cascades)
@@ -51,7 +54,8 @@ def plot_watts_strogatz_graph():
plt.clf()
fig = plt.figure(1)
labels = [50, 100, 500, 1000, 2000, 5000]
- x = [np.log(50), np.log(100), np.log(500), np.log(1000), np.log(2000), np.log(5000)]
+ x = [np.log(50), np.log(100), np.log(500),
+ np.log(1000), np.log(2000), np.log(5000)]
sparse_recov = [.25, .32, .7, .82, .89, .92]
max_likel = [.21, .29, .67, .8, .87, .9]
lasso = [.07, .30, .46, .65, .86, .89]
@@ -97,10 +101,14 @@ def plot_ROC_curve(figure_name):
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.grid(color="lightgrey")
- ax.plot(recall_lasso_200, precision_lasso_200, 'ko-', color="lightseagreen", label="Lasso-200 cascades")
- ax.plot(recall_sparse_200, precision_sparse_200, 'ko-', color="k", label="Our Method-200 cascades")
- ax.plot(recall_lasso_50, precision_lasso_50, 'ko-', color="orange", label="Lasso-50 cascades")
- ax.plot(recall_sparse_50, precision_sparse_50, 'ko-', color="cornflowerblue", label="Our Method-50 cascades")
+ ax.plot(recall_lasso_200, precision_lasso_200, 'ko-',
+ color="lightseagreen", label="Lasso-200 cascades")
+ ax.plot(recall_sparse_200, precision_sparse_200, 'ko-',
+ color="k", label="Our Method-200 cascades")
+ ax.plot(recall_lasso_50, precision_lasso_50, 'ko-',
+ color="orange", label="Lasso-50 cascades")
+ ax.plot(recall_sparse_50, precision_sparse_50, 'ko-',
+ color="cornflowerblue", label="Our Method-50 cascades")
plt.legend(loc="upper right")
plt.savefig("../paper/figures/"+"ROC_curve.pdf")
@@ -108,19 +116,29 @@ def plot_ROC_curve(figure_name):
if __name__=="__main__":
if 0:
compute_graph("../datasets/watts_strogatz_300_30_point3.txt",
- n_cascades=100, lbda=.01, passed_function=
+ n_cascades=100, lbda=.01, min_proba=.2, max_proba=.7,
+ passed_function=
#convex_optimization.sparse_recovery)
#algorithms.greedy_prediction)
convex_optimization.sparse_recovery)
if 0:
compute_graph("../datasets/powerlaw_200_30_point3.txt",
- n_cascades=200, lbda=.01, passed_function=
+ n_cascades=200, lbda=.01, min_proba=.2, max_proba=.7,
+ passed_function=
#convex_optimization.sparse_recovery)
#algorithms.greedy_prediction)
convex_optimization.type_lasso)
if 0:
compute_graph("../datasets/barabasi_albert_300_30.txt",
- n_cascades=100, lbda=.002, passed_function=
+ n_cascades=100, lbda=.002, min_proba=.2,
+ max_proba=.7, passed_function=
convex_optimization.sparse_recovery)
#algorithms.greedy_prediction)
- #convex_optimization.type_lasso) \ No newline at end of file
+ #convex_optimization.type_lasso)
+ if 1:
+ compute_graph("../datasets/kronecker_graph_256_cross.txt",
+ n_cascades=1000, lbda=.001, min_proba=.2, max_proba=.7,
+ passed_function=
+ convex_optimization.sparse_recovery,
+ #convex_optimization.type_lasso,
+ sparse_edges=False) \ No newline at end of file