From 6f876aa0fa8596eec71eec640ca9556b48034b6f Mon Sep 17 00:00:00 2001 From: Thibaut Horel Date: Sat, 12 Dec 2015 00:28:23 -0500 Subject: Abandonned by my project partner... finishing up experiment section --- finale/finale.png | Bin 0 -> 45728 bytes finale/sections/appendix.tex | 22 +++++++++++++ finale/sections/experiments.tex | 69 +++++++++++++++++++++++----------------- 3 files changed, 61 insertions(+), 30 deletions(-) create mode 100644 finale/finale.png diff --git a/finale/finale.png b/finale/finale.png new file mode 100644 index 0000000..59378f9 Binary files /dev/null and b/finale/finale.png differ diff --git a/finale/sections/appendix.tex b/finale/sections/appendix.tex index e69de29..b53de70 100644 --- a/finale/sections/appendix.tex +++ b/finale/sections/appendix.tex @@ -0,0 +1,22 @@ +\begin{figure*} +\centering +\subfigure[][50 cascades]{ +\includegraphics[scale=.4]{../simulation/plots/2015-11-05_22:52:30.pdf}}\hspace{1em}% +\subfigure[][100 cascades]{ +\includegraphics[scale=.4]{../simulation/plots/2015-11-05_22:52:47.pdf}}\\ +\subfigure[][150 cascades]{ +\includegraphics[scale=.4]{../simulation/plots/2015-11-05_22:53:24.pdf}}\hspace{1em}% +\subfigure[][200 cascades]{ +\includegraphics[scale=.4]{../simulation/plots/2015-11-05_22:55:39.pdf}}\\ +\subfigure[][250 cascades]{ +\includegraphics[scale=.4]{../simulation/plots/2015-11-05_22:57:26.pdf}}\hspace{1em}% +\subfigure[][1000 cascades]{ +\includegraphics[scale=.4]{../simulation/plots/2015-11-05_22:58:29.pdf}} +\caption{Bayesian Inference of $\Theta$ with MCMC using a $Beta(1, 1)$ prior on +each edge. For each figure, the plot $(i, j)$ on the $i^{th}$ row and $j^{th}$ +column represent a histogram of samples taken from the posterior of the +corresponding edge $\Theta_{i, j}$. The red line indicates the true value of the +edge weight. If an edge does not exist (has weight $0$) the red line is +confounded with the y axis.} +\label{betapriorbayeslearning} +\end{figure*} diff --git a/finale/sections/experiments.tex b/finale/sections/experiments.tex index 74025f2..534ea7b 100644 --- a/finale/sections/experiments.tex +++ b/finale/sections/experiments.tex @@ -2,31 +2,35 @@ In this section, we apply the framework from Section~\ref{sec:bayes} and~\ref{sec:active} on synthetic graphs and cascades to validate the Bayesian approach as well as the effectiveness of the Active Learning heuristics. -We started with using the library PyMC to sample from the posterior distribution -directly. This method was shown to scale poorly with the number of nodes in the -graph, such that graphs of size $\geq 100$ could not be reasonably be learned -quickly. In Section~\ref{sec:appendix}, we show the progressive convergence of -the posterior around the true values of the edge weights of the graph for a -graph of size $4$. +We started by using the PyMC library \cite{pymc} which implements MCMC +algorithms to sample from the posterior distribution directly. We found this +method to scale poorly with the number of nodes in the graph, such that graphs +of size $\geq 100$ could not be reasonably be learned in the amount of time we +had. In the appendix, we show the progressive convergence of the posterior +around the true values of the edge weights of the graph for a graph of size +$4$. -In order to show the effect of the active learning policies, we needed to scale -the experiments to graphs of size $\geq 1000$, which required the use of the -variational inference procedure. A graph of size $1000$ has $1M$ parameters to -be learned ($2M$ in the product-prior in Eq.~\ref{eq:gaussianprior}). The -maximum-likelihood estimator converges to an $l_\infty$-error of $.05$ for most -graphs after having observed at least $100M$ distinct cascade-steps. +\paragraph{VI implementation.} In order to show the effect of the active +learning policies, we needed to scale the experiments to graphs of size $\geq +1000$. A graph of size $1000$ has $1M$ parameters to be learned ($2M$ in the +product-prior of Eq.~\ref{eq:gaussianprior}). The maximum-likelihood estimator +converges to an $\ell_\infty$-error of $.05$ for most graphs after having +observed at least $100M$ distinct cascade-steps. Given the number of parameters +to be learned and the number of cascades to process, we had to resort to +Variational Inference. We therefore used the Blocks~\cite{blocks} framework, written on top of Theano -for highly efficient SGD routines with minimal memory overhead. By encoding a -cascade step as a tuple of two binary vectors, one for the infected nodes and -one for the susceptible nodes, the variational inference objective can be -written as a sum of two matrix multiplications, which Theano optimizes for -on GPU. +for automatic differentiation and highly efficient SGD routines with minimal +memory overhead. By encoding a cascade step as a tuple of two binary vectors, +one for the infected nodes and one for the susceptible nodes, the variational +inference objective can be written as a simple computation graph on matrices +that Theano compiles to machine code optimized for GPU execution. -Since intuitively if nodes are exchangeable in our graph, the active learning -policy will have little impact over the uniform-source policy, we decided to -test our algorithms on an unbalanced graph $\mathcal{G}_A$ whose adjacency -matrix $A$ is as follows: +\paragraph{Synthetic graph.} Since intuitively if nodes are exchangeable +(\emph{i.e} their topological properties like degree are identical) in our +graph, the active learning policy will make little difference compared to the +uniform-source policy, we decided to test our algorithms on an unbalanced graph +$\mathcal{G}_A$ whose adjacency matrix $A$ is as follows: \begin{equation*} A = \left( \begin{array}{cccccc} 0 & 1 & 1 & 1 & \dots & 1 \\ @@ -37,15 +41,20 @@ A = \left( \begin{array}{cccccc} \end{array} \right) \end{equation*} +In other words, graph $\mathcal{G}_A$ is a wheel graph where every node, except +for the center node, points to its (clock-wise) neighbor. -In other words, graph $\mathcal{G}_A$ is a star graph where every node, except -for the center node, points to its (clock-wise) neighbor. In order for the -baseline to be fair, we choose to create cascades starting from the source node -on the fly both in the case of the uniform source and for the active learning -policy. Each cascade is therefore `observed' only once. We plot the RMSE of the -graph i.e. $RMSE^2 = \frac{1}{n^2} \|\hat{\mathbf{\Theta}} - -\mathbf{\Theta}\|^2_2$. +\paragraph{Baseline.} In order for the baseline to be fair, we choose to create +cascades starting from the source node on the fly both in the case of the +uniform source and for the active learning policy. Each cascade is therefore +`observed' only once. We plot the RMSE of the graph i.e. $RMSE^2 += \frac{1}{n^2} \|\hat{\mathbf{\Theta}} - \mathbf{\Theta}\|^2_2$. The results +are presented in -graphs/datasets +\begin{figure} +\centering +\label{fig:graphical} +\includegraphics[scale=.4]{finale.png} +\caption{} +\end{figure} -bullshit -- cgit v1.2.3-70-g09d2