poster/Finale_poster/poster.tex


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182

\documentclass[final]{beamer}
\usepackage[utf8]{inputenc}
\usepackage[scale=1.8]{beamerposter} % Use the beamerposter package for laying
\usetheme{confposter} % Use the confposter theme supplied with this template
\usepackage{framed, amsmath, amsthm, amssymb}
\usepackage{graphicx}
\usepackage{booktabs}
\usepackage{color, bbm}
\setbeamercolor{block title}{fg=dblue,bg=white} % Colors of the block titles
\setbeamercolor{block body}{fg=black,bg=white} % Colors of the body of blocks
\setbeamercolor{block alerted title}{fg=white,bg=dblue!70} % Colors of the
\setbeamercolor{block alerted body}{fg=black,bg=dblue!10} % Colors of the body

\newlength{\sepwid}
\newlength{\onecolwid}
\newlength{\twocolwid}
\newlength{\threecolwid}
\setlength{\paperwidth}{48in} % A0 width: 46.8in
\setlength{\paperheight}{40in} % A0 height: 33.1in
\setlength{\sepwid}{0.024\paperwidth} % Separation width (white space) between
\setlength{\onecolwid}{0.29\paperwidth} % Width of one column
\setlength{\twocolwid}{0.464\paperwidth} % Width of two columns
\setlength{\threecolwid}{0.708\paperwidth} % Width of three columns
\setlength{\topmargin}{-1in} % Reduce the top margin size

\title{Bayesian and Active Learning for Graph Inference} % Poster title
\author{Thibaut Horel\and Jean Pouget-Abadie} % Author(s)

\begin{document}
\setlength{\belowcaptionskip}{2ex} % White space under figures
\setlength\belowdisplayshortskip{2ex} % White space under equations

\begin{frame}[t]
\begin{columns}[t]
\begin{column}{\sepwid}\end{column}
\begin{column}{\onecolwid} % The first column

\begin{block}{Problem}
  \emph{How to recover an unknown network from the observation of contagion
  cascades?}
  \vspace{1em}
  \begin{itemize}
      \item \textbf{Observe:} state (infected or not) of nodes over time.
      \item \textbf{Objective:} learn $\Theta$, matrix of edge weights.
\end{itemize}
\end{block}
\vspace{1cm}

\begin{block}{\bf Contagion Model~\cite{}}
\begin{itemize}
    \item $X^t\in\{0,1\}^N$: state of the network at time $t$
    \item At $t=0$, $X^0$ drawn from \emph{source distribution}
    \item For $t=1,2,\dots$:
        \begin{itemize}
            \item $X^t$ only depends on $X^{t-1}$
            \item for each node $j$, new state drawn independently with:
                \begin{displaymath}
                    \mathbb{P}(X^{t+1}_j = 1 | X^t) = f(\Theta_j \cdot X^t)
                \end{displaymath}
            ($f$: link function of the cascade model)
    \end{itemize}
\end{itemize}
\vspace{1em}
\begin{figure}
    \begin{center}
  \includegraphics[scale=2.5]{drawing.pdf}
  \end{center}
\end{figure}
\end{block}

\begin{block}{MLE}
  \begin{itemize}
    \item For node $i$, $y^t = x^{t+1}_i$
    \item For node $i$, $\{(x^t, y^t)\}$ are drawn from a GLM
    \item SGD on $\{\theta_{ij}\}$
  \vspace{1cm}
  \begin{equation*}
    \begin{split}
      \hat{\theta}\in \arg\max_\theta \sum_{t}~& y^t\log f(\theta\cdot x^t)
      \\ & + (1-y^t) \log \big(1 - f(\theta\cdot x^t)\big)
  \end{split}
\end{equation*}
\item Log-likelihood is concave for common contagion models (IC model)
\item Prior work~\cite{} finds convergence guarantees for $L1$-regularization
  \end{itemize}
\end{block}
\end{column} % End of the first column

%-----------------------------------------------------------------------------
\begin{column}{\sepwid}\end{column} % Empty spacer column
%-----------------------------------------------------------------------------
\begin{column}{\onecolwid} % The first column

\begin{block}{Bayesian Framework}
\begin{figure}
  \centering
  \includegraphics[scale=3]{graphical.pdf}
\end{figure}
{\bf Advantages:}
\begin{itemize}
  \item encode expressive graph priors (e.g. ERGMs)
  \item quantify uncertainty over each edge
\end{itemize}
{\bf Disadvantages:}
\begin{itemize}
  \item Hard to scale in large data volume regime
\end{itemize}
\end{block}

\begin{block}{Active Learning}
  \emph{Can we gain by choosing the source node? If so, how to best choose the
  source node?}
  \begin{center}--OR--\end{center}
  \emph{Can we choose the parts of the dataset we compute next?}
\end{block}

{\bf Idea:} Focus on parts of the graph which are unexplored (high uncertainty).
i.e.~maximize information gain per cascade

Baseline heuristic:
\begin{itemize}
  \item Choose source proportional to estimated out-degree $\implies$ wider
    cascades $\implies$ more data
\end{itemize}

Principled heuristic:
\begin{itemize}
  \item Choose source proportional to mutual information
    \begin{equation*}
      I((X_t) ,\Theta | x^0 = i) = - H(\Theta | (X_t), X_0 = i) + H(\Theta)
    \end{equation*}
  \item Exact strategy requires knowing true distribution of $(X_t)$
  \item Use estimated $\Theta$ to compute $H(\Theta | (X_t), X_0 = i)$
\end{itemize}
\end{column}
%-----------------------------------------------------------------------------
\begin{column}{\sepwid}\end{column}
%-----------------------------------------------------------------------------

\begin{column}{\onecolwid} % The third column

  \begin{block}{Implementation}
  {\bf Scalable Bayesian Inference}
  \begin{itemize}
    \item MCMC (PyMC~\cite{})
    \item VI (BLOCKS~\cite{})
  \end{itemize}

  {\bf Scalable Active Learning Criterion}
Approx.~heuristic:
\begin{itemize}
  \item Choose source proportional to mutual information of first step of
    cascade and $\Theta$: Hope for closed-form formula
  \item Intuition:
    \begin{itemize}
      \item Choose lower bound of mutual information $I(X, Y) \geq I(f(X),
        g(Y))$ where $f$ is the trunctation function
      \item First step is most informative~\cite{}
    \end{itemize}
  \item Sum over outgoing-edges' variance as proxy
\end{itemize}


  \end{block}

\begin{block}{Results}

\end{block}

\begin{block}{References}
  {\scriptsize \bibliography{../../paper/sparse}
\bibliographystyle{plain}}
\end{block}

%-----------------------------------------------------------------------------

\end{column} % End of the third column

\end{columns} % End of all the columns in the poster
\end{frame} % End of the enclosing frame

\end{document}