From 4f673d21722aba9afa87af633c33e83cfd6a802f Mon Sep 17 00:00:00 2001 From: Thibaut Horel Date: Fri, 11 Dec 2015 21:39:50 -0500 Subject: Some bullshit on using SGD for online learning (however I think the intuition is correct and could be used to obtain a formal guarantee) --- finale/sections/active.tex | 17 ++++++++++++++--- finale/sparse.bib | 9 +++++++++ 2 files changed, 23 insertions(+), 3 deletions(-) (limited to 'finale') diff --git a/finale/sections/active.tex b/finale/sections/active.tex index 7b9b390..3e130aa 100644 --- a/finale/sections/active.tex +++ b/finale/sections/active.tex @@ -101,7 +101,18 @@ a Bernouilli variable of parameter $\Theta_{i,j}$. proposed approximation of $U$. \end{remark} -\paragraph{Online Bayesian Updates} bullshit on SGD on data streams. Cite -"SGD as an online algorithm for data streams". Should tie this with our VI -algorithm. +\emph{Computational Considerations.} Given the online nature of the Active +Learning scenario described above, it is crucial the algorithm used to perform +Bayesian Inference supports online updates. This will be the case when using +Stochastic Gradient Descent to optimize the Variational Inference objective as +described in Section 3.2 and as used in the experiments in Section 5. +However, contrary to the standard application of SGD, each data point will only +be processed once. It has been noted in prior works (see for example +\cite{bottou}) that when SGD is used on infinite data stream, with each data +point being processed only once, the interpretation is that SGD is directly +optimizing the expected loss against the distribution of the input data stream +(as opposed to the empirical distribution of the fixed input data set in +standard offline learning). In our case, as we learn the graph in an active +manner, the distribution of the input data stream converges to the uniform +distribution which shows the consistency of the resulting inference method. diff --git a/finale/sparse.bib b/finale/sparse.bib index 11e7c27..1690ee2 100644 --- a/finale/sparse.bib +++ b/finale/sparse.bib @@ -1,3 +1,12 @@ +@incollection{bottou, + title={Large-scale machine learning with stochastic gradient descent}, + author={Bottou, L{\'e}on}, + booktitle={Proceedings of COMPSTAT'2010}, + pages={177--186}, + year={2010}, + publisher={Springer} +} + @inproceedings{shababo, title={Bayesian inference and online experimental design for mapping neural microcircuits}, author={Shababo, Ben and Paige, Brooks and Pakman, Ari and Paninski, Liam}, -- cgit v1.2.3-70-g09d2