From 48df7f24114a224c4e8266813f5002cc791704f2 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 5 Mar 2012 11:48:48 -0800 Subject: Algorithms --- algorithm.tex | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/algorithm.tex b/algorithm.tex index 9bc0fd6..1747227 100644 --- a/algorithm.tex +++ b/algorithm.tex @@ -20,15 +20,7 @@ distribution of the model is given by: P(\bx, y) = \cN(\bx | \bar{\bx}_y, \Sigma) P(y), \label{eq:mixture of Gaussians} \end{align} -where $P(y)$ is the probability of class $y$ and $\cN(\bx | \bar{\bx}_y, -\Sigma)$ is a multivariate normal distribution, which models the density of -$\bx$ given $y$. The mean of the distribution is $\bar{\bx}_y$ and the variance -of $\bx$ is captured by the covariance matrix $\Sigma$. The decision boundary -between any two classes is known to be is linear when all conditionals $\cN(\bx -| \bar{\bx}_y, \Sigma)$ have the same covariance matrix \cite{bishop06pattern}. -In this setting, the mixture of Gaussians model can be viewed as a -probabilistic variant of the nearest-neighbor (NN) classifier in -Section~\ref{sec:uniqueness}. +where $\bx$ denotes an observation, $y$ is a class, $P(y)$ is the probability of the class, and $\cN(\bx | \bar{\bx}_y, \Sigma)$ is the conditional probability of $\bx$ given $y$. The conditional is a multivariate normal distribution. The mean of the distribution is $\bar{\bx}_y$ and the variance of $\bx$ is captured by the covariance matrix $\Sigma$. The decision boundary between any two classes is linear when all conditionals $\cN(\bx | \bar{\bx}_y, \Sigma)$ have the same covariance matrix \cite{bishop06pattern}. In this setting, the mixture of Gaussians model can be viewed as a probabilistic variant of the nearest-neighbor (NN) classifier in Section~\ref{sec:uniqueness}. The mixture of Gaussians model has many advantages. First, the model can be easily learned using maximum-likelihood (ML) estimation \cite{bishop06pattern}. @@ -38,14 +30,12 @@ is computed as $\Sigma = \sum_y P(y) \Sigma_y$, where $\Sigma_y$ represents the covariance of $\bx$ given $y$. Second, the inference in the model can be performed in a closed form. In particular, the model predicts $\hat{y} = \arg\max_y P(y | \bx)$, where: - \begin{align} P(y | \bx) = \frac{P(\bx | y) P(y)}{\sum_y P(\bx | y) P(y)} = \frac{\cN(\bx | \bar{\bx}_y, \Sigma) P(y)}{\sum_y \cN(\bx | \bar{\bx}_y, \Sigma) P(y)}. \label{eq:inference} \end{align} - In practice, the prediction $\hat{y}$ is accepted when the classifier is confident. In other words, $P(\hat{y} | \bx) \! > \! \delta$, where $\delta \in (0, 1)$ is a threshold that controls the precision and recall of the @@ -85,7 +75,6 @@ sequential hypothesis testing. Sequential hypothesis testing subject is sequentially tested for belonging to one of several classes. The probability that the sequence of data $\bx^{(1)}, \dots, \bx^{(t)}$ belongs to the class $y$ at time $t$ is given by: - \begin{align} P(y | \bx^{(1)}, \dots, \bx^{(t)}) = \frac{\prod_{i = 1}^t \cN(\bx^{(i)} | \bar{\bx}_y, \Sigma) P(y)} -- cgit v1.2.3-70-g09d2