From 4d4fa73eb0a549ee487e012acf02b97deb0f33a0 Mon Sep 17 00:00:00 2001 From: Thibaut Horel Date: Tue, 17 Feb 2015 15:18:08 -0500 Subject: Add notes on message passing --- message-passing/main.tex | 179 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 message-passing/main.tex (limited to 'message-passing/main.tex') diff --git a/message-passing/main.tex b/message-passing/main.tex new file mode 100644 index 0000000..a86c83f --- /dev/null +++ b/message-passing/main.tex @@ -0,0 +1,179 @@ +\documentclass{article} +\title{Message Passing on Trees: Analysis of the Sum-Product Algorithm} +\author{Thibaut Horel} +\usepackage[utf8]{inputenc} +\usepackage{amsmath, amsfonts, amsthm} +\newcommand{\set}[1]{\mathbf{#1}} +\newcommand{\gx}{\mathcal{X}} +\newcommand{\bx}{\boldsymbol{x}} +\newcommand{\fa}{\phi} +\newcommand*{\defeq}{\equiv} +\newtheorem*{lemma}{Lemma} +\newtheorem*{theorem}{Theorem} +\usepackage[pagebackref=true,breaklinks=true,colorlinks=true]{hyperref} +\begin{document} +\maketitle + +\section{Introduction} + +Let $\gx_1,\ldots,\gx_n$ be $n$ finite sets, $n\in\set{N}$. We consider, +a discrete probability distribution $p$ over +$\boldsymbol{\gx}\defeq\gx_1\times\ldots\times\gx_n$ which \emph{factorizes} +according to the undirected tree $T = (V, E)$ with $V = \{1,\ldots. n\}$. By +definition, this means that there exist functions +$\fa_i:\gx_i\rightarrow\set{R}^+$, $i\in V$, and functions +$\fa_{i,j}:\gx_i\times\gx_j\rightarrow\set{R}^+$, $(i,j)\in E$, such that: +\begin{equation}\label{eq:factor} + p(x_1,\ldots,x_n) = \kappa\prod_{i\in V}\fa_i(x_i) + \prod_{(i,j)\in E} \fa_{i,j}(x_i, x_j), + \quad + (x_1,\ldots,x_n)\in\boldsymbol\gx +\end{equation} +where $\kappa$ is a normalization constant which ensures that $p$ sums to one +over $\boldsymbol\gx$. + +From now on, vectors will be denoted by bold letters (\emph{e.g.} +$\bx\in\boldsymbol\gx$), while their coordinates will be denoted by regular +letters (\emph{e.g.} $(x_1\ldots, x_n)\in\boldsymbol\gx$). If $S$ is a subset +of $V$, and $\bx$ is a vector indexed by $V$, $\bx_S$ denotes the sub-vector +obtained by keeping from $\bx$ only its coordinates in $S$ (when the ordering +of the coordinates does not matter and there is no ambiguity). Finally, for +a node $i\in V$, $N(i)$ denotes the set of its neighbors: $N(i)\defeq \{j\in +V\,|\, (i,j)\in E\}$. + +Our goal is to understand the behavior of the \emph{sum-product message passing +algorithm} when applied to computing marginals of the probability distribution +$p$. Formally, we want to compute for all $i\in V$, the marginal $p_i$: +\begin{equation}\label{eq:marginal} + p_i(x) \defeq \sum_{\substack{\bx\in\gx\\ x_i = x}}p(\bx), + \quad + x\in\gx_i +\end{equation} + +We recall that a tree can be \emph{rooted} at any of its nodes. By definition, +this amounts to designate a node as the root of the tree, hence defining +a partial ordering on the nodes of the tree. Once a tree is rooted, the notions +of \emph{subtree}, \emph{descendant}, \emph{ascendant} and \emph{leaves} are +defined with respect to the corresponding partial ordering. + +Let us now choose $i\in V$ and root $T$ on $i$. Let $j\in N(i)$, we denote by +$T_{j,i} = (V_{j,i}, E_{j,i})$ the subtree rooted at $j$. Note that $i$ and +$T_{j,i}$ for $j\in N(i)$ form a subdivision of $T$ into disjoint subtrees. +Using this subdivision, and distributing the sum in \eqref{eq:marginal} over +the products in \eqref{eq:factor} accordingly: +\begin{equation}\label{eq:finish} + p_i(x) = \kappa \fa_i(x)\prod_{j\in N(i)} M_{j,i}(x) +\end{equation} +where: +\begin{equation}\label{eq:limit} + M_{j,i}(x) = \sum_{\bx\in\boldsymbol\gx_{V_{i,j}}}\fa_{i,j}(x,x_j) + \prod_{k\in V_{j,i}}\fa_k(x_k) + \prod_{(k,l)\in E_{j,i}}\fa_{k,l}(x_k, x_l) +\end{equation} +Note that by keeping upfront the summation over $\gx_j$ in \eqref{eq:limit} +while distributing the remaining summations over the products, we get: +\begin{equation}\label{eq:recurse} + M_{j,i}(x) = \sum_{x_j\in\gx_j}\fa_{i,j}(x, x_j)\fa_j(x_j)\prod_{k\in + N(j)\setminus\{i\}}M_{k,j}(x_j) +\end{equation} + +The form obtained in \eqref{eq:recurse} suggests a recursive algorithm to +compute $p_i$: by starting from the leaves of $T$ rooted at $i$, the quantities +$M_{l,k}$, $(k,l)\in E$ can be computed recursively, ultimately leading to the +computation of $p_i$ as given by \eqref{eq:finish}. + +\section{The sum-product algorithm} + +The recursion above is at the core of the sum-product algorithm. The notable +difference being that all the marginals are computed at the same time, thus +reestablishing the symmetry which was lost when rooting $T$ on $i$. As +a consequence, a node $j$ cannot rely on its neighbors having properly computed +the quantities $M_{k,j}$, $k\in N(j)\setminus\{i\}$ \emph{prior to} computing +$M_{j,i}$ and \eqref{eq:recurse} cannot be treated as an exact recursion. +Instead, the sum-product algorithm is described in terms of time steps: at +each time step, each node sends messages to its neighbors, the messages +being computed from the messages received at the previous time step in a way +paralleling \eqref{eq:recurse}. + +Formally, for all $(i,j)$ in $E$, we define $|\gx_i|$ messages that are sent +from $j$ to $i$ at step $t\in\set{N}^*$: +\begin{align} + \mu^0_{j,i}(x_i)&=\sum_{x_j\in\gx_j}\fa_{i,j}(x_i,x_j)\fa_j(x_j), + \quad x_i\in\gx_i\\ + \label{eq:message-t} + \mu_{j,i}^t(x_i)&= \sum_{x_j\in\gx_j}\fa_{i,j}(x_i,x_j)\fa_j(x_j) + \prod_{k\in N(j)\setminus\{i\}}\mu_{k,j}^{t-1}(x_j), + \quad x_i\in\gx_i +\end{align} + +Let us denote by $h_{j,i}$ the height of the subtree rooted at $j$ in $T$ +rooted at $i$: +\begin{displaymath} + h_{j,i} \defeq + \begin{cases} + 0 & \text{if $j$ is a leaf in $T$ rooted at $i$}\\ + 1 + \max_{k\in N(j)\setminus\{i\}} h_{k,j} & \text{otherwise} + \end{cases} +\end{displaymath} +Then, the following lemma holds. + +\begin{lemma} + Let $(i,j)\in E$ be any edge of $T$, then: + \begin{displaymath} + \forall t\geq h_{j,i},\;\forall x_i\in\gx_i,\quad + \mu_{j,i}^t(x_i) = M_{j,i}(x_i) + \end{displaymath} +\end{lemma} + +\begin{proof} + We will prove this lemma by induction on $h_{j,i}$. The initialization is + straightforward: if $h_{j,i} = 0$, then the products in \eqref{eq:message-t} + as well as in \eqref{eq:recurse} are empty, and for all $t\geq 0$, + $\mu_{j,i}^t(x_i) = \mu_{j,i}^0(x_i) = M_{j,i}(x_i)$. + + For the inductive step, let us consider an edge $(i,j)\in E$, with + associated height $h_{j,i}$ and a time $t\geq h_{j,i}$. By definition of + $h_{j,i}$, for all $k\in N(j)\setminus\{i\}$, $h_{k,j}