2 files changed, 58 insertions, 3 deletions
diff --git a/finale/mid_report.tex b/finale/mid_report.tex
index e831fcc..684d0a8 100644
--- a/finale/mid_report.tex
+++ b/finale/mid_report.tex
@@ -5,8 +5,9 @@
 \usepackage[pagebackref=false,breaklinks=true,
             colorlinks=true,citecolor=blue]{hyperref}
 \usepackage[capitalize, noabbrev]{cleveref}
-\usepackage{graphicx}
+\usepackage{graphicx, subfig}
 \usepackage{bbm}
+\usepackage{fullpage}
 
 \DeclareMathOperator*{\argmax}{arg\,max}
 \DeclareMathOperator{\E}{\mathbb{E}}
@@ -241,10 +242,50 @@ $$P(G | \Theta) \propto \exp \left( s(G)\cdot \Theta \right)$$
 We can sample from the posterior by MCMC\@. This might not be the fastest
 solution however. We could greatly benefit from using an alternative method:
 \begin{itemize}
-\item EM~\cite{}
-\item Variational Inference~\cite{}
+\item EM\@. This approach was used in \cite{linderman2014discovering} to learn
+the parameters of a Hawkes process, a closely related inference problem.
+\item Variational Inference. This approach was used
+in~\cite{linderman2015scalable} as an extension to the paper cited in the
+previous bullet point.
 \end{itemize}
 
+
+
+\begin{figure}
+\subfloat[][50 cascades]{
+\includegraphics[scale=.4]{../simulation/plots/2015-11-05_22:52:30.pdf}}
+\subfloat[][100 cascades]{
+\includegraphics[scale=.4]{../simulation/plots/2015-11-05_22:52:47.pdf}}\\
+\subfloat[][150 cascades]{
+\includegraphics[scale=.4]{../simulation/plots/2015-11-05_22:53:24.pdf}}
+\subfloat[][200 cascades]{
+\includegraphics[scale=.4]{../simulation/plots/2015-11-05_22:55:39.pdf}}\\
+\subfloat[][250 cascades]{
+\includegraphics[scale=.4]{../simulation/plots/2015-11-05_22:57:26.pdf}}
+\subfloat[][1000 cascades]{
+\includegraphics[scale=.4]{../simulation/plots/2015-11-05_22:58:29.pdf}}
+\caption{Bayesian Inference of $\Theta$ with MCMC using a $Beta(1, 1)$ prior.
+For each figure, the plot $(i, j)$ on the $i^{th}$ row and $j^{th}$ column
+represent a histogram of samples taken from the  posterior of the corresponding
+edge $\Theta_{i, j}$. The red line indicates the true value of the edge weight.
+If an edge does not exist (has weight $0$) the red line is confounded with the y
+axis.}
+\label{betapriorbayeslearning}
+\end{figure}
+
+\paragraph{Experiments}
+
+We ran some experiments on a simple network with 4 nodes with $\binom{4}{2}=6$
+parameters to learn with the MCMC package PyMC\@. We plot in
+Figure~\ref{betapriorbayeslearning} the progressive learning of $\Theta$ for
+increasing numbers of observations. Of note, since the IC model does not include
+self-loops, the diagonal terms are never properly learned, which is expected but
+not undesirable. We notice that the existence or not of an edge is (relatively)
+quickly learned, with the posterior on edges with no weight converging to $0$
+after $100$ cascades. To get a concentrated posterior around the true non-zero
+edge weigth requires $1000$ cascades, which is unreasonably high considering the
+small number of parameters that we are learning in this experiment.
+
 \subsection{Active Learning}
 
 In this setup, $S$ is no longer drawn from a random distribution $p_s$ but is
@@ -277,5 +318,7 @@ given $(x_t)_{t\geq1}$}$ \Comment{Update posterior on $\theta$}
 \end{algorithmic}
 \end{algorithm}
 
+\bibliography{sparse}{}
+\bibliographystyle{plain}
 
 \end{document}
diff --git a/finale/sparse.bib b/finale/sparse.bib
index f50a0d2..9fc56df 100644
--- a/finale/sparse.bib
+++ b/finale/sparse.bib
@@ -504,4 +504,16 @@ year = "2009"
   publisher={Springer}
 }
 
+@article{linderman2015scalable,
+    title={Scalable Bayesian Inference for Excitatory Point Process Networks},
+      author={Linderman, Scott W and Adams, Ryan P},
+        journal={arXiv preprint arXiv:1507.03228},
+          year={2015}
+}
 
+@article{linderman2014discovering,
+    title={Discovering latent network structure in point process data},
+      author={Linderman, Scott W and Adams, Ryan P},
+        journal={arXiv preprint arXiv:1402.0914},
+          year={2014}
+}