7 files changed, 96 insertions, 18 deletions
diff --git a/paper/paper.tex b/paper/paper.tex
index f922b3b..4627890 100644
--- a/paper/paper.tex
+++ b/paper/paper.tex
@@ -102,6 +102,7 @@
 \input{sections/model.tex}
 
 \section{Results}
+\label{sec:results}
 \input{sections/results.tex}
 
 \section{A lower bound}
diff --git a/paper/sections/experiments.tex b/paper/sections/experiments.tex
index efcf754..4876927 100644
--- a/paper/sections/experiments.tex
+++ b/paper/sections/experiments.tex
@@ -8,6 +8,25 @@
 \caption{Watts-Strogatz Model. 200 nodes, 20000 edges.}
 \end{figure}
 
+In this section, we validate empirically the results and assumptions of Section~\ref{sec:results} for different initializations of parameters ($n$, $m$, $\lambda$) and for varying levels of sparsity. We compare our algorithm to two different state-of-the-art algorithms: \textsc{Greedy} and \textsc{MLE} from \cite{Netrapalli:2012}. We introduce an approximate \textsc{Lasso} algorithm and validate its effectiveness empirically.
+
+\paragraph{Experimental setup}
+
+We focus our analysis to synthetic graphs, which are known to exhibit the properties of real social networks. We therefore consider a Watts-Strogatz graph ($300$ nodes, $4500$ edges), a Barabasi-Albert graph ($300$ nodes, $16200$ edges), the Holme-Kim power law graph ($200$ nodes, $9772$ edges) \cite{Holme:2002}, and the recently introduced Kronecker graph ($256$ nodes, $10000$ edges) \cite{Leskovec:2010}. For every reported data point, we generate $n$ cascades from the Independent Cascade model for $n \in \{100, 500, 1000, 2000, 5000\}$, and compute the estimated graph $\hat {\cal G}$ for each algorithm. 
+
+In the case of the \textsc{Lasso}, \textsc{MLE} and \textsc{Sparse MLE} algorithms, we construct the edges of $\hat {\cal G} : \cup_{j \in V} \{i : \Theta_{ij} > 0.1\}$. We report the F1-score$=2\frac{\text{precision}\cdot\text{recall}}{\text{precision}+\text{recall}}$. For each graph, the initial probability of a node being a source is fixed to $0.05$, i.e. an average of $15$ nodes source nodes per cascades.  Except when testing for approximately sparse graphs, all edge weights are chosen uniformly in the interval $[0.2, 0.7]$.
+
+\paragraph{Comparison with other algorithms}
+\begin{itemize}
+\item Continuous time + single source -> no comparison with NETRATE...
+\item This is a modeling decision: in practice, we do not have access to the cascade's ``patient 0'', which makes learning the graph harder.
+\item Lasso
+\end{itemize}
+
+
+\paragraph{Quantifying robustness}
+
+
 
 \begin{itemize}
 \item Compare edge recovery to First Edge and Greedy as number of cascades grows on different types of graphs...
diff --git a/paper/sparse.bib b/paper/sparse.bib
index c05d318..48017cc 100644
--- a/paper/sparse.bib
+++ b/paper/sparse.bib
@@ -344,3 +344,30 @@ year = "2009"
   bibsource = {dblp computer science bibliography, http://dblp.org}
 }
 
+@article{Leskovec:2010,
+  author    = {Jure Leskovec and
+               Deepayan Chakrabarti and
+               Jon M. Kleinberg and
+               Christos Faloutsos and
+               Zoubin Ghahramani},
+  title     = {Kronecker Graphs: An Approach to Modeling Networks},
+  journal   = {Journal of Machine Learning Research},
+  volume    = {11},
+  pages     = {985--1042},
+  year      = {2010},
+  url       = {http://doi.acm.org/10.1145/1756006.1756039},
+  doi       = {10.1145/1756006.1756039},
+  timestamp = {Thu, 22 Apr 2010 13:26:26 +0200},
+  biburl    = {http://dblp.uni-trier.de/rec/bib/journals/jmlr/LeskovecCKFG10},
+  bibsource = {dblp computer science bibliography, http://dblp.org}
+}
+
+@article{Holme:2002,
+  author= {Petter Holme and Beom Jun Kim},
+  title = {Growing scale-free networks with tunable clustering},
+  journal = {Physical review E},
+  volume = {65},
+  issue = {2},
+  pages = {026--107},
+  year = {2002}
+}
diff --git a/src/algorithms.py b/src/algorithms.py
index 51f57bf..24a63f3 100644
--- a/src/algorithms.py
+++ b/src/algorithms.py
@@ -72,6 +72,8 @@ def correctness_measure(G, G_hat, print_values=True):
     f1_score = 2.* tp / (2 * tp + fp + fn)
     fall_out = 1. * fp / (fp + tn)
 
+    norm = np.linalg.norm(G.mat - G_hat.mat)
+
     if print_values:
         print("False Positives: {}".format(fp))
         print("False Negatives: {}".format(fn))
@@ -81,7 +83,8 @@ def correctness_measure(G, G_hat, print_values=True):
         print("Precision: {}".format(precision))
         print("Recall: {}".format(recall))
         print("F1 score: {}".format(f1_score))
-        print("Fall Out: {}".format(fall_out))        
+        print("Fall Out: {}".format(fall_out))   
+        print("l2_norm: {}".format(norm))     
 
     return fp, fn, tp, tn
 
diff --git a/src/cascade_creation.py b/src/cascade_creation.py
index 1c365b9..a71fce7 100644
--- a/src/cascade_creation.py
+++ b/src/cascade_creation.py
@@ -9,9 +9,11 @@ class InfluenceGraph(nx.DiGraph):
     """
     networkX graph with mat and logmat attributes
     """
-    def __init__(self, max_proba=None, min_proba=None, *args, **kwargs):
+    def __init__(self, max_proba=None, min_proba=None,sparse_edges=False,
+                 *args, **kwargs):
         self.max_proba = max_proba
         self.min_proba = min_proba
+        self.sparse_edges=sparse_edges
         super(InfluenceGraph, self).__init__(*args, **kwargs)
 
     def erdos_init(self, n, p):
@@ -61,8 +63,16 @@ class InfluenceGraph(nx.DiGraph):
     @property
     def mat(self):
         if not hasattr(self, '_mat'):
-            self._mat = self.max_proba * np.random.rand(len(self), len(self)
+            if self.min_proba is None or self.max_proba is None:
+                print("You forgot to initialize the min and max proba")
+                self._mat = np.zeros((self.number_of_nodes(), self.number_of_nodes()))
+            else:
+                self._mat = self.max_proba * np.random.rand(len(self), len(self)
                         ) * np.asarray(nx.adjacency_matrix(self).todense().T)
+        if self.sparse_edges:
+            #Adding sparse non-edges to the graph!
+            self._mat += .1 * np.random.rand(len(self), len(self)
+                            ) * np.random.binomial(1, p=.33, size=(len(self), len(self)))
         return self._mat
 
     @property
@@ -175,7 +185,7 @@ def add_edges_from_proba_vector(G, p_node, node, floor_cstt):
     floor_parent = np.nonzero(p_node*(p_node > floor_cstt))
     for parent in floor_parent[0]:
         G.add_edge(parent, node)
-    #TODO: update G's mat matrix
+    G.mat[node] = p_node.T
     return G
 
 
diff --git a/src/convex_optimization.py b/src/convex_optimization.py
index e355bc6..fce89b8 100644
--- a/src/convex_optimization.py
+++ b/src/convex_optimization.py
@@ -80,7 +80,7 @@ def type_lasso(lbda, n_cascades):
 
     return f_x, f_xz
 
-@timeout.timeout(10)
+@timeout.timeout(70)
 def diff_and_opt(M_val, w_val, f_x, f_xz):
 
     if M_val.dtype == bool:
@@ -90,7 +90,7 @@ def diff_and_opt(M_val, w_val, f_x, f_xz):
 
     def F(x=None, z=None):
         if x is None:
-            return 0, cvxopt.matrix(-.001, (n,1))
+            return 0, cvxopt.matrix(-.7, (n,1))
         elif z is None:
             y, y_diff = f_x(x, M_val, w_val)
             return cvxopt.matrix(float(y), (1, 1)),\
@@ -116,7 +116,7 @@ def diff_and_opt(M_val, w_val, f_x, f_xz):
               " given to the solver")
     except ValueError:
         print("Domain Error, skipping to next node")
-        theta = np.zeros(len(w_val))
+        theta = np.zeros(M_val.shape[1])
 
     if cvxopt.solvers.options['show_progress']:
         print(1 - np.exp(theta))
diff --git a/src/make_plots.py b/src/make_plots.py
index 28d89e2..10fd657 100644
--- a/src/make_plots.py
+++ b/src/make_plots.py
@@ -27,11 +27,14 @@ def compare_greedy_and_lagrange_cs284r():
     algorithms.correctness_measure(G, G_hat, print_values=True)
 
 
-def compute_graph(graph_name, n_cascades, lbda, passed_function):
+def compute_graph(graph_name, n_cascades, lbda, passed_function, min_proba,
+                max_proba, sparse_edges=False):
     """
     Test running time on different algorithms
     """
-    G = cascade_creation.InfluenceGraph(max_proba=.7, min_proba=.2)
+    G = cascade_creation.InfluenceGraph(max_proba=max_proba,
+                                        min_proba=min_proba,
+                                        sparse_edges=True)
     G.import_from_file(graph_name)
     A = cascade_creation.generate_cascades(G, p_init=.05, n_cascades=n_cascades)
 
@@ -51,7 +54,8 @@ def plot_watts_strogatz_graph():
     plt.clf()
     fig = plt.figure(1)
     labels = [50, 100, 500, 1000, 2000, 5000]
-    x = [np.log(50), np.log(100), np.log(500), np.log(1000), np.log(2000), np.log(5000)]
+    x = [np.log(50), np.log(100), np.log(500), 
+            np.log(1000), np.log(2000), np.log(5000)]
     sparse_recov = [.25, .32, .7, .82, .89, .92]
     max_likel = [.21, .29, .67, .8, .87, .9]
     lasso = [.07, .30, .46, .65, .86, .89]
@@ -97,10 +101,14 @@ def plot_ROC_curve(figure_name):
     plt.xlabel("Recall")
     plt.ylabel("Precision")
     plt.grid(color="lightgrey")
-    ax.plot(recall_lasso_200, precision_lasso_200, 'ko-', color="lightseagreen", label="Lasso-200 cascades")
-    ax.plot(recall_sparse_200, precision_sparse_200, 'ko-', color="k", label="Our Method-200 cascades")
-    ax.plot(recall_lasso_50, precision_lasso_50, 'ko-', color="orange", label="Lasso-50 cascades")
-    ax.plot(recall_sparse_50, precision_sparse_50, 'ko-', color="cornflowerblue", label="Our Method-50 cascades")
+    ax.plot(recall_lasso_200, precision_lasso_200, 'ko-', 
+            color="lightseagreen", label="Lasso-200 cascades")
+    ax.plot(recall_sparse_200, precision_sparse_200, 'ko-', 
+            color="k", label="Our Method-200 cascades")
+    ax.plot(recall_lasso_50, precision_lasso_50, 'ko-', 
+            color="orange", label="Lasso-50 cascades")
+    ax.plot(recall_sparse_50, precision_sparse_50, 'ko-', 
+            color="cornflowerblue", label="Our Method-50 cascades")
     plt.legend(loc="upper right")
     plt.savefig("../paper/figures/"+"ROC_curve.pdf")
 
@@ -108,19 +116,29 @@ def plot_ROC_curve(figure_name):
 if __name__=="__main__":
     if 0:
         compute_graph("../datasets/watts_strogatz_300_30_point3.txt",
-                    n_cascades=100, lbda=.01, passed_function=
+                    n_cascades=100, lbda=.01, min_proba=.2, max_proba=.7,
+                    passed_function=
                     #convex_optimization.sparse_recovery)
                     #algorithms.greedy_prediction)
                     convex_optimization.sparse_recovery)
     if 0:
         compute_graph("../datasets/powerlaw_200_30_point3.txt",
-            n_cascades=200, lbda=.01, passed_function=
+            n_cascades=200, lbda=.01, min_proba=.2, max_proba=.7,
+            passed_function=
             #convex_optimization.sparse_recovery)
             #algorithms.greedy_prediction)
             convex_optimization.type_lasso)
     if 0:
         compute_graph("../datasets/barabasi_albert_300_30.txt",
-            n_cascades=100, lbda=.002, passed_function=
+            n_cascades=100, lbda=.002, min_proba=.2,
+            max_proba=.7, passed_function=
             convex_optimization.sparse_recovery)
             #algorithms.greedy_prediction)
-            #convex_optimization.type_lasso)
-\ No newline at end of file
+            #convex_optimization.type_lasso)
+    if 1:
+        compute_graph("../datasets/kronecker_graph_256_cross.txt", 
+            n_cascades=1000, lbda=.001, min_proba=.2, max_proba=.7,
+            passed_function=
+            convex_optimization.sparse_recovery, 
+            #convex_optimization.type_lasso,
+            sparse_edges=False)
+\ No newline at end of file