From f106e9fdf13f4f4c2dd5f5411f5e6fd5e1559c9d Mon Sep 17 00:00:00 2001 From: jeanpouget-abadie Date: Fri, 6 Nov 2015 10:45:45 -0500 Subject: adding to section Bayes --- finale/mid_report.tex | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++- simulation/bayes.py | 4 +-- 2 files changed, 79 insertions(+), 3 deletions(-) diff --git a/finale/mid_report.tex b/finale/mid_report.tex index 42a28c3..e831fcc 100644 --- a/finale/mid_report.tex +++ b/finale/mid_report.tex @@ -1,7 +1,9 @@ \documentclass[10pt]{article} \usepackage[utf8x]{inputenc} \usepackage{amsmath, amssymb, amsthm, microtype} -\usepackage[pagebackref=false,breaklinks=true,colorlinks=true,citecolor=blue]{hyperref} +\usepackage{algpseudocode, algorithm, algorithmicx} +\usepackage[pagebackref=false,breaklinks=true, + colorlinks=true,citecolor=blue]{hyperref} \usepackage[capitalize, noabbrev]{cleveref} \usepackage{graphicx} \usepackage{bbm} @@ -201,5 +203,79 @@ MLE estimator. \subsection{Bayesian Approach} +\paragraph{Notation} +Let's adopt a Bayesian approach to the previous problem. To fix the notation +introduced above, we let $G=(V, \Theta)$ be a weighted directed graph with +$\Theta \in \R_+^{k \times k}$. We place a prior $\mathcal{D}$ on $\Theta$. Let +$x_0$ be a source node, sampled from distribution $p_s$ over the vertices $V$ of +the graph $\mathcal{G}$. Since we assume for now that the state of the source +node is entirely observed, the choice of $p_s$ is not relevant to the inference +of $\Theta$. Finally, we let $x_t$ (resp. $\mathbbm{1}_{S_t}$) be the binary +vectors encoding the infected (resp.~susceptible) nodes at time $t$: +\begin{alignat*}{2} + \theta & \sim \mathcal{D}\qquad &&\text{(prior on $\theta$)} \\ + x_0 & \sim p_s &&\text{(random source distribution)}\\ + \forall t\geq 0, x_{t+1} | x_t, \theta &\sim Ber(f(\theta\cdot x_t)\cdot +\mathbbm{1}_{S_t}) + &&\text{(cascade process)} +\end{alignat*} + +\paragraph{Graph prior} +Prior work has focused on MLE estimation regularized using lasso, summarized in +the section above. This is equivalent to choosing an independent Laplace prior +for $\theta$: +$$(\Theta_{i,j})\sim \prod_{i,j} Laplace(0,1)$$ +In the context of social network learning however, we can place more informative +priors. We can: + +\begin{itemize} +\item Take into account prior knowledge of edges' existence (or lack thereof) +\item Take into account common graph structures, such as triangles, clustering +\end{itemize} + +A common prior for graph is the ERGM model~\cite{}, defined by feature vector +$s(G)$ and by the probability distribution: +$$P(G | \Theta) \propto \exp \left( s(G)\cdot \Theta \right)$$ + +\paragraph{Inference} +We can sample from the posterior by MCMC\@. This might not be the fastest +solution however. We could greatly benefit from using an alternative method: +\begin{itemize} +\item EM~\cite{} +\item Variational Inference~\cite{} +\end{itemize} + +\subsection{Active Learning} + +In this setup, $S$ is no longer drawn from a random distribution $p_s$ but is +chosen by the designer of the experiment. Once a source is drawn, a cascade is +drawn from the Markovian process, $\mathcal{D'}$. We wish to choose the source +which maximises the information gain on the parameter $\Theta$, so as to speed +up learning. We suggest to choose the source which maximises the mutual +information between $\theta$ and $X$ at each time step. The mutual information +can be computed node-by-node by sampling: +\begin{equation*} +I((x_t)_{t\geq 1} ,\Theta | x_0 = i) = \mathbb{E}_{\substack{\Theta \sim D_t \\ +x | \Theta, i \sim D'}}\log p(x|\Theta) - \mathbb{E}_{\substack{\Theta \sim D_t +\\ x' | \Theta, i \sim D'}} \log p(x') +\end{equation*} +The second term involves marginalizing over $\Theta$: $p(x') = +\mathbb{E}_{\Theta \sim D_t} p(x'| \Theta)$. + +\begin{algorithm} +\caption{Active Bayesian Learning through Cascades (ABC)} +\begin{algorithmic}[1] +\State $\theta \sim \mathcal{D}_0 = \mathcal{D}$ \Comment{Initial prior on +$\theta$} +\While{True} +\State $i \leftarrow \arg\min_{i} I(\theta, (x_t)_{t \geq 1} | x_0 = i)$ +\Comment{Maximize mutual information} +\State Observe $(x_t)_{t\geq1} |x_0 = i$ \Comment{Observe cascade from source} +\State $\mathcal{D}_{t+1} \gets \text{posterior of $\theta \sim \mathcal{D}_t$ +given $(x_t)_{t\geq1}$}$ \Comment{Update posterior on $\theta$} +\EndWhile +\end{algorithmic} +\end{algorithm} + \end{document} diff --git a/simulation/bayes.py b/simulation/bayes.py index 5114f70..7d8567b 100644 --- a/simulation/bayes.py +++ b/simulation/bayes.py @@ -78,11 +78,11 @@ if __name__=="__main__": g = np.log(1. / (1 - p * g)) print('running the graph-level MC set-up') - cascades = main.simulate_cascades(100, g) + cascades = main.simulate_cascades(1000, g) infected, susc = main.build_cascade_list(cascades) model = mc_graph_setup(infected, susc) mcmc = pymc.MCMC(model) - mcmc.sample(10**3, 100) + mcmc.sample(10**4, 1000) fig, ax = plt.subplots(len(g), len(g)) for i in xrange(len(g)): for j in xrange(len(g)): -- cgit v1.2.3-70-g09d2