aboutsummaryrefslogtreecommitdiffstats
path: root/finale/mid_report.tex
diff options
context:
space:
mode:
Diffstat (limited to 'finale/mid_report.tex')
-rw-r--r--finale/mid_report.tex52
1 files changed, 31 insertions, 21 deletions
diff --git a/finale/mid_report.tex b/finale/mid_report.tex
index bd7f475..4abcd47 100644
--- a/finale/mid_report.tex
+++ b/finale/mid_report.tex
@@ -8,8 +8,9 @@
\usepackage{graphicx, subfig}
\usepackage{bbm}
\usepackage{fullpage}
-
+\input{def}
\DeclareMathOperator*{\argmax}{arg\,max}
+\DeclareMathOperator*{\argmin}{arg\,min}
\DeclareMathOperator{\E}{\mathbb{E}}
\let\P\relax
\DeclareMathOperator{\P}{\mathbb{P}}
@@ -457,42 +458,47 @@ with $\binom{4}{2}=6$ parameters to learn with the MCMC package PyMC\@. We plot
in Figure~\ref{betapriorbayeslearning} the progressive learning of $\Theta$ for
increasing numbers of observations. Of note, since the GLMC model does not
include self-loops, the diagonal terms are never properly learned, which is
-expected but not undesirable. We notice that the existence or not of an edge is
-(relatively) quickly learned, with the posterior on edges with no weight
-converging to $0$ after $100$ cascades. To get a concentrated posterior around
-the true non-zero edge weigth requires $1000$ cascades, which is unreasonably
-high considering the small number of parameters that we are learning in this
-experiment.
+expected but not undesirable. We notice that the absence of an edge is
+(relatively) quickly learned: the posterior on edges with no weight converges
+to $0$ after $100$ cascades. For existing edges though, the posterior visually
+concentrates only after $1000$ cascades, which is unreasonably high considering
+the small number of parameters that we are learning in this experiment.
\subsection{Active Learning}
-Finally, we propose an Active Learning formulation of Network Inference
+Finally, we propose an Active Learning formulation of the Network Inference
problem. In this setup, the source $x^0$ is no longer drawn from a random
distribution $p_s$ but a single source $i$ is chosen by the designer of the
experiment. Once a source is drawn, a cascade is drawn from the Markovian
-process, $\mathcal{D'}$. We wish to choose the source which maximises the
-information gain on the parameter $\Theta$, so as to speed up learning. We
-suggest to choose the source which maximises the mutual information between
-$\theta$ and $X$ at each time step. The mutual information can be computed
-node-by-node by sampling:
+process $\mathcal{D'}$ described in \eqref{eq:markov}.
+
+Our suggested selection stratgey is to select at each time step the source is
+to select the node which maximises the information gain (mutual information) on
+the parameter $\Theta$ resulting from observing the cascade $\bx$. The mutual
+information can be computed node-by-node by sampling:
\begin{equation*}
-I((x_t)_{t\geq 1} ,\Theta | x_0 = i) = \mathbb{E}_{\substack{\Theta \sim D_t \\
+I(\bx ,\Theta | x^0 = i) = \mathbb{E}_{\substack{\Theta \sim D_t \\
x | \Theta, i \sim D'}}\log p(x|\Theta) - \mathbb{E}_{\substack{\Theta \sim D_t
\\ x' | \Theta, i \sim D'}} \log p(x')
\end{equation*}
-The second term involves marginalizing over $\Theta$: $p(x') =
-\mathbb{E}_{\Theta \sim D_t} p(x'| \Theta)$.
+where $D_t$ denotes the posterior distribution on $\Theta$ after the first $t$
+cascades. Not that the second term involves marginalizing over $\Theta$:
+$p(x') = \mathbb{E}_{\Theta \sim D_t} p(x'| \Theta)$. The full specification of
+our Active Learning procedure is described in Algorithm 1. For the final
+version of this project, we plan to test experimentally the learning speedup
+induced by using Active Learning compared to independent observations from
+a random source.
\begin{algorithm}
\caption{Active Bayesian Learning through Cascades (ABC)}
\begin{algorithmic}[1]
-\State $\theta \sim \mathcal{D}_0 = \mathcal{D}$ \Comment{Initial prior on
-$\theta$}
+\State $\Theta \sim \mathcal{D}_0 = \mathcal{D}$ \Comment{Initial prior on
+$\Theta$}
\While{True}
-\State $i \leftarrow \arg\min_{i} I(\theta, (x_t)_{t \geq 1} | x_0 = i)$
+\State $i \leftarrow \argmin_{i} I(\Theta, \bx | x_0 = i)$
\Comment{Maximize mutual information}
\State Observe $(x_t)_{t\geq1} |x_0 = i$ \Comment{Observe cascade from source}
-\State $\mathcal{D}_{t+1} \gets \text{posterior of $\theta \sim \mathcal{D}_t$
+\State $\mathcal{D}_{t+1} \gets \text{posterior of $\Theta \sim \mathcal{D}_t$
given $(x_t)_{t\geq1}$}$ \Comment{Update posterior on $\theta$}
\EndWhile
\end{algorithmic}
@@ -500,5 +506,9 @@ given $(x_t)_{t\geq1}$}$ \Comment{Update posterior on $\theta$}
\bibliography{sparse}{}
\bibliographystyle{abbrv}
-
+\appendix
+\section{\textsf{mle.py}}
+\input{mle}
+\section{\textsf{bayes.py}}
+\input{bayes}
\end{document}