From f106e9fdf13f4f4c2dd5f5411f5e6fd5e1559c9d Mon Sep 17 00:00:00 2001
From: jeanpouget-abadie <jean.pougetabadie@gmail.com>
Date: Fri, 6 Nov 2015 10:45:45 -0500
Subject: adding to section Bayes

---
 finale/mid_report.tex | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 simulation/bayes.py   |  4 +--
 2 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/finale/mid_report.tex b/finale/mid_report.tex
index 42a28c3..e831fcc 100644
--- a/finale/mid_report.tex
+++ b/finale/mid_report.tex
@@ -1,7 +1,9 @@
 \documentclass[10pt]{article}
 \usepackage[utf8x]{inputenc}
 \usepackage{amsmath, amssymb, amsthm, microtype}
-\usepackage[pagebackref=false,breaklinks=true,colorlinks=true,citecolor=blue]{hyperref}
+\usepackage{algpseudocode, algorithm, algorithmicx}
+\usepackage[pagebackref=false,breaklinks=true,
+            colorlinks=true,citecolor=blue]{hyperref}
 \usepackage[capitalize, noabbrev]{cleveref}
 \usepackage{graphicx}
 \usepackage{bbm}
@@ -201,5 +203,79 @@ MLE estimator.
 
 \subsection{Bayesian Approach}
 
+\paragraph{Notation}
+Let's adopt a Bayesian approach to the previous problem. To fix the notation
+introduced above, we let $G=(V, \Theta)$ be a weighted directed graph with
+$\Theta \in \R_+^{k \times k}$. We place a prior $\mathcal{D}$ on $\Theta$. Let
+$x_0$ be a source node, sampled from distribution $p_s$ over the vertices $V$ of
+the graph $\mathcal{G}$. Since we assume for now that the state of the source
+node is entirely observed, the choice of $p_s$ is not relevant to the inference
+of $\Theta$. Finally, we let $x_t$ (resp. $\mathbbm{1}_{S_t}$) be the binary
+vectors encoding the infected (resp.~susceptible) nodes at time $t$:
+\begin{alignat*}{2}
+ \theta & \sim \mathcal{D}\qquad &&\text{(prior on $\theta$)} \\
+ x_0 & \sim p_s &&\text{(random source distribution)}\\
+ \forall t\geq 0, x_{t+1} | x_t, \theta &\sim Ber(f(\theta\cdot x_t)\cdot
+\mathbbm{1}_{S_t})
+ &&\text{(cascade process)}
+\end{alignat*}
+
+\paragraph{Graph prior}
+Prior work has focused on MLE estimation regularized using lasso, summarized in
+the section above. This is equivalent to choosing an independent Laplace prior
+for $\theta$:
+$$(\Theta_{i,j})\sim \prod_{i,j} Laplace(0,1)$$
+In the context of social network learning however, we can place more informative
+priors. We can:
+
+\begin{itemize}
+\item Take into account prior knowledge of edges' existence (or lack thereof)
+\item Take into account common graph structures, such as triangles, clustering
+\end{itemize}
+
+A common prior for graph is the ERGM model~\cite{}, defined by feature vector
+$s(G)$ and by the probability distribution:
+$$P(G | \Theta) \propto \exp \left( s(G)\cdot \Theta \right)$$
+
+\paragraph{Inference}
+We can sample from the posterior by MCMC\@. This might not be the fastest
+solution however. We could greatly benefit from using an alternative method:
+\begin{itemize}
+\item EM~\cite{}
+\item Variational Inference~\cite{}
+\end{itemize}
+
+\subsection{Active Learning}
+
+In this setup, $S$ is no longer drawn from a random distribution $p_s$ but is
+chosen by the designer of the experiment. Once a source is drawn, a cascade is
+drawn from the Markovian process, $\mathcal{D'}$.  We wish to choose the source
+which maximises the information gain on the parameter $\Theta$, so as to speed
+up learning. We suggest to choose the source which maximises the mutual
+information between $\theta$ and $X$ at each time step. The mutual information
+can be computed node-by-node by sampling:
+\begin{equation*}
+I((x_t)_{t\geq 1} ,\Theta | x_0 = i) = \mathbb{E}_{\substack{\Theta \sim D_t \\
+x | \Theta, i \sim D'}}\log p(x|\Theta) - \mathbb{E}_{\substack{\Theta \sim D_t
+\\ x' | \Theta, i \sim D'}} \log p(x')
+\end{equation*}
+The second term involves marginalizing over $\Theta$: $p(x') =
+\mathbb{E}_{\Theta \sim D_t} p(x'| \Theta)$.
+
+\begin{algorithm}
+\caption{Active Bayesian Learning through Cascades (ABC)}
+\begin{algorithmic}[1]
+\State $\theta \sim \mathcal{D}_0 = \mathcal{D}$ \Comment{Initial prior on
+$\theta$}
+\While{True}
+\State $i \leftarrow \arg\min_{i} I(\theta, (x_t)_{t \geq 1} | x_0 = i)$
+\Comment{Maximize mutual information}
+\State Observe $(x_t)_{t\geq1} |x_0 = i$ \Comment{Observe cascade from source}
+\State $\mathcal{D}_{t+1} \gets \text{posterior of $\theta \sim \mathcal{D}_t$
+given $(x_t)_{t\geq1}$}$ \Comment{Update posterior on $\theta$}
+\EndWhile
+\end{algorithmic}
+\end{algorithm}
+
 
 \end{document}
diff --git a/simulation/bayes.py b/simulation/bayes.py
index 5114f70..7d8567b 100644
--- a/simulation/bayes.py
+++ b/simulation/bayes.py
@@ -78,11 +78,11 @@ if __name__=="__main__":
     g = np.log(1. / (1 - p * g))
 
     print('running the graph-level MC set-up')
-    cascades = main.simulate_cascades(100, g)
+    cascades = main.simulate_cascades(1000, g)
     infected, susc = main.build_cascade_list(cascades)
     model = mc_graph_setup(infected, susc)
     mcmc = pymc.MCMC(model)
-    mcmc.sample(10**3, 100)
+    mcmc.sample(10**4, 1000)
     fig, ax = plt.subplots(len(g), len(g))
     for i in xrange(len(g)):
         for j in xrange(len(g)):
-- 
cgit v1.2.3-70-g09d2