From 4f673d21722aba9afa87af633c33e83cfd6a802f Mon Sep 17 00:00:00 2001
From: Thibaut Horel <thibaut.horel@gmail.com>
Date: Fri, 11 Dec 2015 21:39:50 -0500
Subject: Some bullshit on using SGD for online learning (however I think the
 intuition is correct and could be used to obtain a formal guarantee)

---
 finale/sections/active.tex | 17 ++++++++++++++---
 finale/sparse.bib          |  9 +++++++++
 2 files changed, 23 insertions(+), 3 deletions(-)

(limited to 'finale')

diff --git a/finale/sections/active.tex b/finale/sections/active.tex
index 7b9b390..3e130aa 100644
--- a/finale/sections/active.tex
+++ b/finale/sections/active.tex
@@ -101,7 +101,18 @@ a Bernouilli variable of parameter $\Theta_{i,j}$.
     proposed approximation of $U$.
 \end{remark}
 
-\paragraph{Online Bayesian Updates} bullshit on SGD on data streams. Cite
-"SGD as an online algorithm for data streams". Should tie this with our VI
-algorithm.
+\emph{Computational Considerations.} Given the online nature of the Active
+Learning scenario described above, it is crucial the algorithm used to perform
+Bayesian Inference supports online updates. This will be the case when using
+Stochastic Gradient Descent to optimize the Variational Inference objective as
+described in Section 3.2 and as used in the experiments in Section 5.
 
+However, contrary to the standard application of SGD, each data point will only
+be processed once. It has been noted in prior works (see for example
+\cite{bottou}) that when SGD is used on infinite data stream, with each data
+point being processed only once, the interpretation is that SGD is directly
+optimizing the expected loss against the distribution of the input data stream
+(as opposed to the empirical distribution of the fixed input data set in
+standard offline learning). In our case, as we learn the graph in an active
+manner, the distribution of the input data stream converges to the uniform
+distribution which shows the consistency of the resulting inference method.
diff --git a/finale/sparse.bib b/finale/sparse.bib
index 11e7c27..1690ee2 100644
--- a/finale/sparse.bib
+++ b/finale/sparse.bib
@@ -1,3 +1,12 @@
+@incollection{bottou,
+  title={Large-scale machine learning with stochastic gradient descent},
+  author={Bottou, L{\'e}on},
+  booktitle={Proceedings of COMPSTAT'2010},
+  pages={177--186},
+  year={2010},
+  publisher={Springer}
+}
+
 @inproceedings{shababo,
   title={Bayesian inference and online experimental design for mapping neural microcircuits},
   author={Shababo, Ben and Paige, Brooks and Pakman, Ari and Paninski, Liam},
-- 
cgit v1.2.3-70-g09d2