diff options
Diffstat (limited to 'finale/mid_report.tex')
| -rw-r--r-- | finale/mid_report.tex | 52 |
1 files changed, 31 insertions, 21 deletions
diff --git a/finale/mid_report.tex b/finale/mid_report.tex index bd7f475..4abcd47 100644 --- a/finale/mid_report.tex +++ b/finale/mid_report.tex @@ -8,8 +8,9 @@ \usepackage{graphicx, subfig} \usepackage{bbm} \usepackage{fullpage} - +\input{def} \DeclareMathOperator*{\argmax}{arg\,max} +\DeclareMathOperator*{\argmin}{arg\,min} \DeclareMathOperator{\E}{\mathbb{E}} \let\P\relax \DeclareMathOperator{\P}{\mathbb{P}} @@ -457,42 +458,47 @@ with $\binom{4}{2}=6$ parameters to learn with the MCMC package PyMC\@. We plot in Figure~\ref{betapriorbayeslearning} the progressive learning of $\Theta$ for increasing numbers of observations. Of note, since the GLMC model does not include self-loops, the diagonal terms are never properly learned, which is -expected but not undesirable. We notice that the existence or not of an edge is -(relatively) quickly learned, with the posterior on edges with no weight -converging to $0$ after $100$ cascades. To get a concentrated posterior around -the true non-zero edge weigth requires $1000$ cascades, which is unreasonably -high considering the small number of parameters that we are learning in this -experiment. +expected but not undesirable. We notice that the absence of an edge is +(relatively) quickly learned: the posterior on edges with no weight converges +to $0$ after $100$ cascades. For existing edges though, the posterior visually +concentrates only after $1000$ cascades, which is unreasonably high considering +the small number of parameters that we are learning in this experiment. \subsection{Active Learning} -Finally, we propose an Active Learning formulation of Network Inference +Finally, we propose an Active Learning formulation of the Network Inference problem. In this setup, the source $x^0$ is no longer drawn from a random distribution $p_s$ but a single source $i$ is chosen by the designer of the experiment. Once a source is drawn, a cascade is drawn from the Markovian -process, $\mathcal{D'}$. We wish to choose the source which maximises the -information gain on the parameter $\Theta$, so as to speed up learning. We -suggest to choose the source which maximises the mutual information between -$\theta$ and $X$ at each time step. The mutual information can be computed -node-by-node by sampling: +process $\mathcal{D'}$ described in \eqref{eq:markov}. + +Our suggested selection stratgey is to select at each time step the source is +to select the node which maximises the information gain (mutual information) on +the parameter $\Theta$ resulting from observing the cascade $\bx$. The mutual +information can be computed node-by-node by sampling: \begin{equation*} -I((x_t)_{t\geq 1} ,\Theta | x_0 = i) = \mathbb{E}_{\substack{\Theta \sim D_t \\ +I(\bx ,\Theta | x^0 = i) = \mathbb{E}_{\substack{\Theta \sim D_t \\ x | \Theta, i \sim D'}}\log p(x|\Theta) - \mathbb{E}_{\substack{\Theta \sim D_t \\ x' | \Theta, i \sim D'}} \log p(x') \end{equation*} -The second term involves marginalizing over $\Theta$: $p(x') = -\mathbb{E}_{\Theta \sim D_t} p(x'| \Theta)$. +where $D_t$ denotes the posterior distribution on $\Theta$ after the first $t$ +cascades. Not that the second term involves marginalizing over $\Theta$: +$p(x') = \mathbb{E}_{\Theta \sim D_t} p(x'| \Theta)$. The full specification of +our Active Learning procedure is described in Algorithm 1. For the final +version of this project, we plan to test experimentally the learning speedup +induced by using Active Learning compared to independent observations from +a random source. \begin{algorithm} \caption{Active Bayesian Learning through Cascades (ABC)} \begin{algorithmic}[1] -\State $\theta \sim \mathcal{D}_0 = \mathcal{D}$ \Comment{Initial prior on -$\theta$} +\State $\Theta \sim \mathcal{D}_0 = \mathcal{D}$ \Comment{Initial prior on +$\Theta$} \While{True} -\State $i \leftarrow \arg\min_{i} I(\theta, (x_t)_{t \geq 1} | x_0 = i)$ +\State $i \leftarrow \argmin_{i} I(\Theta, \bx | x_0 = i)$ \Comment{Maximize mutual information} \State Observe $(x_t)_{t\geq1} |x_0 = i$ \Comment{Observe cascade from source} -\State $\mathcal{D}_{t+1} \gets \text{posterior of $\theta \sim \mathcal{D}_t$ +\State $\mathcal{D}_{t+1} \gets \text{posterior of $\Theta \sim \mathcal{D}_t$ given $(x_t)_{t\geq1}$}$ \Comment{Update posterior on $\theta$} \EndWhile \end{algorithmic} @@ -500,5 +506,9 @@ given $(x_t)_{t\geq1}$}$ \Comment{Update posterior on $\theta$} \bibliography{sparse}{} \bibliographystyle{abbrv} - +\appendix +\section{\textsf{mle.py}} +\input{mle} +\section{\textsf{bayes.py}} +\input{bayes} \end{document} |
