\documentclass[final]{beamer}
\usepackage[utf8]{inputenc}
\usepackage[scale=1.7]{beamerposter} % Use the beamerposter package for laying
\usetheme{confposter} % Use the confposter theme supplied with this template
\usepackage{framed, amsmath, amsthm, amssymb}
\usepackage{graphicx}
\usepackage{booktabs}
\usepackage{color, bbm}
\setbeamercolor{block title}{fg=dblue,bg=white} % Colors of the block titles
\setbeamercolor{block body}{fg=black,bg=white} % Colors of the body of blocks
\setbeamercolor{block alerted title}{fg=white,bg=dblue!70} % Colors of the
\setbeamercolor{block alerted body}{fg=black,bg=dblue!10} % Colors of the body

\newlength{\sepwid}
\newlength{\onecolwid}
\newlength{\twocolwid}
\newlength{\threecolwid}
\setlength{\paperwidth}{48in} % A0 width: 46.8in
\setlength{\paperheight}{36in} % A0 height: 33.1in
\setlength{\sepwid}{0.024\paperwidth} % Separation width (white space) between
\setlength{\onecolwid}{0.29\paperwidth} % Width of one column
\setlength{\twocolwid}{0.464\paperwidth} % Width of two columns
\setlength{\threecolwid}{0.708\paperwidth} % Width of three columns
\setlength{\topmargin}{-1in} % Reduce the top margin size

\title{Bayesian and Active Learning for Graph Inference} % Poster title
\author{Thibaut Horel\\[0.5em] Jean Pouget-Abadie} % Author(s)

\begin{document}
\setlength{\belowcaptionskip}{2ex} % White space under figures
\setlength\belowdisplayshortskip{2ex} % White space under equations

\begin{frame}[t]
\begin{columns}[t]
\begin{column}{\sepwid}\end{column}
\begin{column}{\onecolwid} % The first column

\begin{block}{Problem}
  \emph{How to recover an unknown network from the observation of contagion
  cascades?}
  \vspace{1em}
  \begin{itemize}
      \item \textbf{Observe:} state (infected or not) of nodes over time.
      \item \textbf{Objective:} learn $\Theta$, matrix of edge weights.
\end{itemize}
\end{block}
\vspace{2cm}

\begin{block}{\bf Contagion Model~\cite{Pouget:2015}}
\begin{itemize}
    \item $X^t\in\{0,1\}^N$: state of the network at time $t$
    \item At $t=0$, $X^0$ drawn from \emph{source distribution}
    \item For $t=1,2,\dots$:
        \begin{itemize}
            \item $X^t$ only depends on $X^{t-1}$
            \item for each node $j$, new state drawn independently with:
                \begin{displaymath}
                    \mathbb{P}(X^{t+1}_j = 1 | X^t) = f(\Theta_j \cdot X^t)
                \end{displaymath}
            ($f$: link function of the cascade model)
    \end{itemize}
\end{itemize}
\vspace{1em}
\begin{figure}
    \begin{center}
  \includegraphics[scale=2.5]{drawing.pdf}
  \end{center}
\end{figure}
\end{block}
\vspace{2cm}

\begin{block}{MLE}
  \begin{itemize}
    \item Define for node $j$, $y^t = x^{t+1}_j$
    \item Observations $\{(x^t, y^t)\}$ are drawn from a GLM.~MLE problem:
  \begin{equation*}
    \begin{split}
      \hat{\theta}\in \arg\max_\theta \sum_{t}~& y^t\log f(\Theta_j\cdot x^t)
      \\ & + (1-y^t) \log \big(1 - f(\Theta_j\cdot x^t)\big)
  \end{split}
\end{equation*}
    Can be solved efficiently by SGD on $\Theta$.
  \vspace{1cm}
\item log-likelihood is concave for common contagion models (\emph{e.g} IC
    model) $\Rightarrow$ provable convergence guarantees \cite{Netrapalli:2012}.
  \end{itemize}
\end{block}
\end{column} % End of the first column

%-----------------------------------------------------------------------------
\begin{column}{\sepwid}\end{column} % Empty spacer column
%-----------------------------------------------------------------------------
\begin{column}{\onecolwid} % The first column

\begin{block}{Bayesian Framework}
\begin{figure}
  \centering
  \vspace{-1em}
  \includegraphics[scale=3]{graphical.pdf}
\end{figure}
{\bf Advantages:}
\begin{itemize}
  \item prior encodes domain-specific knowledge of graph structure (e.g. ERGMs)
  \item posterior expresses uncertainty over edge weights
\end{itemize}
{\bf Disadvantages:}
\begin{itemize}
  \item hard to scale when large number of observations.
\end{itemize}
\end{block}

\begin{block}{Active Learning}
  \emph{Can we learn faster by choosing the source node? If so, how to best
  choose it?}
  \begin{center}--~OR~--\end{center}
  \emph{Can we cherry-pick the most relevant part of the dataset?}

\vspace{0.5em}
{\bf Idea:} Focus on parts of the graph which are unexplored (high uncertainty).
i.e.~maximize information gain per observation.

\vspace{0.5em}
\textbf{Baseline heuristic:}
\begin{itemize}
  \item Choose source w.p.~proportional to estimated out-degree $\implies$ wider
    cascades $\implies$ more data
\end{itemize}

\vspace{0.5em}
\textbf{Principled heuristic:}
\begin{itemize}
  \item Choose source w.p.~proportional to mutual information
      \begin{multline*}
        p(i) \propto I\big(\{X_t\}_{t\geq 1} ,\Theta | X^0 = \{i\}\big)\\
        = H(\Theta) - H\big(\Theta | \{X_t\}_{t\geq 1}, X_0 = \{i\}\big)
    \end{multline*}
\item Requires knowing true distribution of $\{X_t\}$ $\Rightarrow$ use current
    estimate of $\Theta$ instead.
\end{itemize}
\end{block}
\end{column}
%-----------------------------------------------------------------------------
\begin{column}{\sepwid}\end{column}
%-----------------------------------------------------------------------------

\begin{column}{\onecolwid} % The third column

  \begin{block}{Implementation}
  {\bf Scalable Bayesian Inference}
  \begin{itemize}
    \item MCMC: implements the graphical model directly with PyMC
        \cite{pymc}; does not scale beyond 10 nodes.
    \item VI: implements variational inference with  Blocks~\cite{blocks}.
        (wip: use Bohning bounds to avoid sampling).
  \end{itemize}

  \vspace{1em}
  {\bf Scalable Active Learning}

  Mutual information is expensive to compute. Instead:
\begin{itemize}
    \item use mutual information between $\Theta$ and $X^1$.

        \textbf{Justification} for any $f$:
        \begin{displaymath}
            I(X, Y) \geq I\big(f(X), Y\big)
        \end{displaymath}
        in our case: $f$ truncates the cascade at $t=1$.

        (wip: obtain closed-form formula in this case).
  \item variance is a lower-bound on mutual information $\implies$ pick
      node $i$ w.p.~proportional to $\sum_j \text{Var}(\Theta_{i,j})$.
\end{itemize}


  \end{block}

\begin{block}{Results}
    \begin{center}
  \includegraphics[scale=1.5]{../../simulation/plots/finale.png}
  \end{center}
\end{block}

\begin{block}{References}
  {\scriptsize \bibliography{sparse}
\bibliographystyle{abbrv}}
\end{block}

%-----------------------------------------------------------------------------

\end{column} % End of the third column

\end{columns} % End of all the columns in the poster
\end{frame} % End of the enclosing frame

\end{document}