\documentclass[final]{beamer}
\usepackage[utf8]{inputenc}
\usepackage[scale=1.8]{beamerposter} % Use the beamerposter package for laying
\usetheme{confposter} % Use the confposter theme supplied with this template
\usepackage{framed, amsmath, amsthm, amssymb}
\usepackage{graphicx}
\usepackage{booktabs}
\usepackage{color, bbm}
\setbeamercolor{block title}{fg=dblue,bg=white} % Colors of the block titles
\setbeamercolor{block body}{fg=black,bg=white} % Colors of the body of blocks
\setbeamercolor{block alerted title}{fg=white,bg=dblue!70} % Colors of the
\setbeamercolor{block alerted body}{fg=black,bg=dblue!10} % Colors of the body

\newlength{\sepwid}
\newlength{\onecolwid}
\newlength{\twocolwid}
\newlength{\threecolwid}
\setlength{\paperwidth}{48in} % A0 width: 46.8in
\setlength{\paperheight}{40in} % A0 height: 33.1in
\setlength{\sepwid}{0.024\paperwidth} % Separation width (white space) between
\setlength{\onecolwid}{0.29\paperwidth} % Width of one column
\setlength{\twocolwid}{0.464\paperwidth} % Width of two columns
\setlength{\threecolwid}{0.708\paperwidth} % Width of three columns
\setlength{\topmargin}{-1in} % Reduce the top margin size

\title{Bayesian and Active Learning for Graph Inference} % Poster title
\author{Thibaut Horel\and Jean Pouget-Abadie} % Author(s)

\begin{document}
\setlength{\belowcaptionskip}{2ex} % White space under figures
\setlength\belowdisplayshortskip{2ex} % White space under equations

\begin{frame}[t]
\begin{columns}[t]
\begin{column}{\sepwid}\end{column}
\begin{column}{\onecolwid} % The first column

\begin{block}{Problem}
  \emph{How to recover an unknown network from the observation of contagion
  cascades?}
  \vspace{1em}
  \begin{itemize}
      \item \textbf{Observe:} state (infected or not) of nodes over time.
      \item \textbf{Objective:} learn $\Theta$, matrix of edge weights.
\end{itemize}
\end{block}
\vspace{1cm}

\begin{block}{\bf Contagion Model~\cite{}}
\begin{itemize}
    \item $X^t\in\{0,1\}^N$: state of the network at time $t$
    \item At $t=0$, $X^0$ drawn from \emph{source distribution}
    \item For $t=1,2,\dots$:
        \begin{itemize}
            \item $X^t$ only depends on $X^{t-1}$
            \item for each node $j$, new state drawn independently with:
                \begin{displaymath}
                    \mathbb{P}(X^{t+1}_j = 1 | X^t) = f(\Theta_j \cdot X^t)
                \end{displaymath}
            ($f$: link function of the cascade model)
    \end{itemize}
\end{itemize}
\vspace{1em}
\begin{figure}
    \begin{center}
  \includegraphics[scale=2.5]{drawing.pdf}
  \end{center}
\end{figure}
\end{block}

\begin{block}{MLE}
  \begin{itemize}
    \item For node $i$, $y^t = x^{t+1}_i$
    \item For node $i$, $\{(x^t, y^t)\}$ are drawn from a GLM
    \item SGD on $\{\theta_{ij}\}$
  \vspace{1cm}
  \begin{equation*}
    \begin{split}
      \hat{\theta}\in \arg\max_\theta \sum_{t}~& y^t\log f(\theta\cdot x^t)
      \\ & + (1-y^t) \log \big(1 - f(\theta\cdot x^t)\big)
  \end{split}
\end{equation*}
\item Log-likelihood is concave for common contagion models (IC model)
\item Prior work~\cite{} finds convergence guarantees for $L1$-regularization
  \end{itemize}
\end{block}
\end{column} % End of the first column

%-----------------------------------------------------------------------------
\begin{column}{\sepwid}\end{column} % Empty spacer column
%-----------------------------------------------------------------------------
\begin{column}{\onecolwid} % The first column

\begin{block}{Bayesian Framework}
\begin{figure}
  \centering
  \includegraphics[scale=3]{graphical.pdf}
\end{figure}
{\bf Advantages:}
\begin{itemize}
  \item encode expressive graph priors (e.g. ERGMs)
  \item quantify uncertainty over each edge
\end{itemize}
{\bf Disadvantages:}
\begin{itemize}
  \item Hard to scale in large data volume regime
\end{itemize}
\end{block}

\begin{block}{Active Learning}
  \emph{Can we gain by choosing the source node? If so, how to best choose the
  source node?}
  \begin{center}--OR--\end{center}
  \emph{Can we choose the parts of the dataset we compute next?}
\end{block}

{\bf Idea:} Focus on parts of the graph which are unexplored (high uncertainty).
i.e.~maximize information gain per cascade

Baseline heuristic:
\begin{itemize}
  \item Choose source proportional to estimated out-degree $\implies$ wider
    cascades $\implies$ more data
\end{itemize}

Principled heuristic:
\begin{itemize}
  \item Choose source proportional to mutual information
    \begin{equation*}
      I((X_t) ,\Theta | x^0 = i) = - H(\Theta | (X_t), X_0 = i) + H(\Theta)
    \end{equation*}
  \item Exact strategy requires knowing true distribution of $(X_t)$
  \item Use estimated $\Theta$ to compute $H(\Theta | (X_t), X_0 = i)$
\end{itemize}
\end{column}
%-----------------------------------------------------------------------------
\begin{column}{\sepwid}\end{column}
%-----------------------------------------------------------------------------

\begin{column}{\onecolwid} % The third column

  \begin{block}{Implementation}
  {\bf Scalable Bayesian Inference}
  \begin{itemize}
    \item MCMC (PyMC~\cite{})
    \item VI (BLOCKS~\cite{})
  \end{itemize}

  {\bf Scalable Active Learning Criterion}
Approx.~heuristic:
\begin{itemize}
  \item Choose source proportional to mutual information of first step of
    cascade and $\Theta$: Hope for closed-form formula
  \item Intuition:
    \begin{itemize}
      \item Choose lower bound of mutual information $I(X, Y) \geq I(f(X),
        g(Y))$ where $f$ is the trunctation function
      \item First step is most informative~\cite{}
    \end{itemize}
  \item Sum over outgoing-edges' variance as proxy
\end{itemize}


  \end{block}

\begin{block}{Results}

\end{block}

\begin{block}{References}
  {\scriptsize \bibliography{../../paper/sparse}
\bibliographystyle{plain}}
\end{block}

%-----------------------------------------------------------------------------

\end{column} % End of the third column

\end{columns} % End of all the columns in the poster
\end{frame} % End of the enclosing frame

\end{document}