blob: 4f1290417b78d8b9564d7925466d489cedc89b15 [file] [log] [blame]
//! Naive Bayes Classifiers
//!
//! The classifier supports Gaussian, Bernoulli and Multinomial distributions.
//!
//! A naive Bayes classifier works by treating the features of each input as independent
//! observations. Under this assumption we utilize Bayes' rule to compute the
//! probability that each input belongs to a given class.
//!
//! # Examples
//!
//! ```
//! use rusty_machine::learning::naive_bayes::{NaiveBayes, Gaussian};
//! use rusty_machine::linalg::Matrix;
//! use rusty_machine::learning::SupModel;
//!
//! let inputs = Matrix::new(6, 2, vec![1.0, 1.1,
//! 1.1, 0.9,
//! 2.2, 2.3,
//! 2.5, 2.7,
//! 5.2, 4.3,
//! 6.2, 7.3]);
//!
//! let targets = Matrix::new(6,3, vec![1.0, 0.0, 0.0,
//! 1.0, 0.0, 0.0,
//! 0.0, 1.0, 0.0,
//! 0.0, 1.0, 0.0,
//! 0.0, 0.0, 1.0,
//! 0.0, 0.0, 1.0]);
//!
//! // Create a Gaussian Naive Bayes classifier.
//! let mut model = NaiveBayes::<Gaussian>::new();
//!
//! // Train the model.
//! model.train(&inputs, &targets).unwrap();
//!
//! // Predict the classes on the input data
//! let outputs = model.predict(&inputs).unwrap();
//!
//! // Will output the target classes - otherwise our classifier is bad!
//! println!("Final outputs --\n{}", outputs);
//! ```
use std::vec::*;
use linalg::{Matrix, Axes, BaseMatrix, BaseMatrixMut};
use learning::{LearningResult, SupModel};
use learning::error::{Error, ErrorKind};
use rulinalg::utils;
use std::f64::consts::PI;
/// The Naive Bayes model.
#[derive(Debug, Default)]
pub struct NaiveBayes<T: Distribution> {
distr: Option<T>,
cluster_count: Option<usize>,
class_prior: Option<Vec<f64>>,
class_counts: Vec<usize>,
}
impl<T: Distribution> NaiveBayes<T> {
/// Create a new NaiveBayes model from a given
/// distribution.
///
/// # Examples
///
/// ```
/// use rusty_machine::learning::naive_bayes::{NaiveBayes, Gaussian};
///
/// // Create a new Gaussian Naive Bayes model.
/// let _ = NaiveBayes::<Gaussian>::new();
/// ```
pub fn new() -> NaiveBayes<T> {
NaiveBayes {
distr: None,
cluster_count: None,
class_prior: None,
class_counts: Vec::new(),
}
}
/// Get the cluster count for this model.
///
/// Returns an option which is `None` until the model has been trained.
pub fn cluster_count(&self) -> Option<&usize> {
self.cluster_count.as_ref()
}
/// Get the class prior distribution for this model.
///
/// Returns an option which is `None` until the model has been trained.
pub fn class_prior(&self) -> Option<&Vec<f64>> {
self.class_prior.as_ref()
}
/// Get the distribution for this model.
///
/// Returns an option which is `None` until the model has been trained.
pub fn distr(&self) -> Option<&T> {
self.distr.as_ref()
}
}
/// Train and predict from the Naive Bayes model.
///
/// The input matrix must be rows made up of features.
/// The target matrix should have indicator vectors in each row specifying
/// the input class. e.g. [[1,0,0],[0,0,1]] shows class 1 first, then class 3.
impl<T: Distribution> SupModel<Matrix<f64>, Matrix<f64>> for NaiveBayes<T> {
/// Train the model using inputs and targets.
fn train(&mut self, inputs: &Matrix<f64>, targets: &Matrix<f64>) -> LearningResult<()> {
self.distr = Some(T::from_model_params(targets.cols(), inputs.cols()));
self.update_params(inputs, targets)
}
/// Predict output from inputs.
fn predict(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> {
let log_probs = try!(self.get_log_probs(inputs));
let input_classes = NaiveBayes::<T>::get_classes(log_probs);
if let Some(cluster_count) = self.cluster_count {
let mut class_data = Vec::with_capacity(inputs.rows() * cluster_count);
for c in input_classes {
let mut row = vec![0f64; cluster_count];
row[c] = 1f64;
class_data.append(&mut row);
}
Ok(Matrix::new(inputs.rows(), cluster_count, class_data))
} else {
Err(Error::new(ErrorKind::UntrainedModel, "The model has not been trained."))
}
}
}
impl<T: Distribution> NaiveBayes<T> {
/// Get the log-probabilities per class for each input.
pub fn get_log_probs(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> {
if let (&Some(ref distr), &Some(ref prior)) = (&self.distr, &self.class_prior) {
// Get the joint log likelihood from the distribution
distr.joint_log_lik(inputs, prior)
} else {
Err(Error::new_untrained())
}
}
fn update_params(&mut self, inputs: &Matrix<f64>, targets: &Matrix<f64>) -> LearningResult<()> {
let class_count = targets.cols();
let total_data = inputs.rows();
self.class_counts = vec![0; class_count];
let mut class_data = vec![Vec::new(); class_count];
for (idx, row) in targets.row_iter().enumerate() {
// Find the class of this input
let class = try!(NaiveBayes::<T>::find_class(row.raw_slice()));
// Note the class of the input
class_data[class].push(idx);
self.class_counts[class] += 1;
}
if let Some(ref mut distr) = self.distr {
for (idx, c) in class_data.into_iter().enumerate() {
// If this class' vector has not been populated, we can safely
// skip this iteration, since the user is clearly not interested
// in associating features with this class
if c.is_empty() {
continue;
}
// Update the parameters within this class
try!(distr.update_params(&inputs.select_rows(&c), idx));
}
}
let mut class_prior = Vec::with_capacity(class_count);
// Compute the prior as the proportion in each class
class_prior.extend(self.class_counts.iter().map(|c| *c as f64 / total_data as f64));
self.class_prior = Some(class_prior);
self.cluster_count = Some(class_count);
Ok(())
}
fn find_class(row: &[f64]) -> LearningResult<usize> {
// Find the `1` entry in the row
for (idx, r) in row.into_iter().enumerate() {
if *r == 1f64 {
return Ok(idx);
}
}
Err(Error::new(ErrorKind::InvalidState,
"No class found for entry in targets"))
}
fn get_classes(log_probs: Matrix<f64>) -> Vec<usize> {
let mut data_classes = Vec::with_capacity(log_probs.rows());
data_classes.extend(log_probs.row_iter().map(|row| {
// Argmax each class log-probability per input
let (class, _) = utils::argmax(row.raw_slice());
class
}));
data_classes
}
}
/// Naive Bayes Distribution.
pub trait Distribution {
/// Initialize the distribution parameters.
fn from_model_params(class_count: usize, features: usize) -> Self;
/// Updates the distribution parameters.
fn update_params(&mut self, data: &Matrix<f64>, class: usize) -> LearningResult<()>;
/// Compute the joint log likelihood of the data.
///
/// Returns a matrix with rows containing the probability that the input lies in each class.
fn joint_log_lik(&self,
data: &Matrix<f64>,
class_prior: &[f64])
-> LearningResult<Matrix<f64>>;
}
/// The Gaussian Naive Bayes model distribution.
///
/// Defines:
///
/// p(x|C<sub>k</sub>) = ∏<sub>i</sub> N(x<sub>i</sub> ;
/// μ<sub>k</sub>, σ<sup>2</sup><sub>k</sub>)
#[derive(Debug)]
pub struct Gaussian {
theta: Matrix<f64>,
sigma: Matrix<f64>,
}
impl Gaussian {
/// Returns the distribution means.
///
/// This is a matrix of class by feature means.
pub fn theta(&self) -> &Matrix<f64> {
&self.theta
}
/// Returns the distribution variances.
///
/// This is a matrix of class by feature variances.
pub fn sigma(&self) -> &Matrix<f64> {
&self.sigma
}
}
impl Distribution for Gaussian {
fn from_model_params(class_count: usize, features: usize) -> Gaussian {
Gaussian {
theta: Matrix::zeros(class_count, features),
sigma: Matrix::zeros(class_count, features),
}
}
fn update_params(&mut self, data: &Matrix<f64>, class: usize) -> LearningResult<()> {
// Compute mean and sample variance
let mean = data.mean(Axes::Row).into_vec();
let var = try!(data.variance(Axes::Row).map_err(|_| {
Error::new(ErrorKind::InvalidData,
"Cannot compute variance for Gaussian distribution.")
}))
.into_vec();
let features = data.cols();
for (idx, (m, v)) in mean.into_iter().zip(var.into_iter()).enumerate() {
self.theta.mut_data()[class * features + idx] = m;
self.sigma.mut_data()[class * features + idx] = v;
}
Ok(())
}
fn joint_log_lik(&self,
data: &Matrix<f64>,
class_prior: &[f64])
-> LearningResult<Matrix<f64>> {
let class_count = class_prior.len();
let mut log_lik = Vec::with_capacity(class_count);
for (i, item) in class_prior.into_iter().enumerate() {
let joint_i = item.ln();
let n_ij = -0.5 * (self.sigma.select_rows(&[i]) * 2.0 * PI).apply(&|x| x.ln()).sum();
// NOTE: Here we are copying the row data which is inefficient
let r_ij = (data - self.theta.select_rows(&vec![i; data.rows()]))
.apply(&|x| x * x)
.elediv(&self.sigma.select_rows(&vec![i; data.rows()]))
.sum_cols();
let res = (-r_ij * 0.5) + n_ij;
log_lik.append(&mut (res + joint_i).into_vec());
}
Ok(Matrix::new(class_count, data.rows(), log_lik).transpose())
}
}
/// The Bernoulli Naive Bayes model distribution.
///
/// Defines:
///
/// p(x|C<sub>k</sub>) = ∏<sub>i</sub> p<sub>k</sub><sup>x<sub>i</sub></sup>
/// (1-p)<sub>k</sub><sup>1-x<sub>i</sub></sup>
#[derive(Debug)]
pub struct Bernoulli {
log_probs: Matrix<f64>,
pseudo_count: f64,
}
impl Bernoulli {
/// The log probability matrix.
///
/// A matrix of class by feature model log-probabilities.
pub fn log_probs(&self) -> &Matrix<f64> {
&self.log_probs
}
}
impl Distribution for Bernoulli {
fn from_model_params(class_count: usize, features: usize) -> Bernoulli {
Bernoulli {
log_probs: Matrix::zeros(class_count, features),
pseudo_count: 1f64,
}
}
fn update_params(&mut self, data: &Matrix<f64>, class: usize) -> LearningResult<()> {
let features = data.cols();
// We add the pseudo count to the class count and feature count
let pseudo_cc = data.rows() as f64 + (2f64 * self.pseudo_count);
let pseudo_fc = data.sum_rows() + self.pseudo_count;
let log_probs = (pseudo_fc.apply(&|x| x.ln()) - pseudo_cc.ln()).into_vec();
for (i, item) in log_probs.iter().enumerate().take(features) {
self.log_probs[[class, i]] = *item;
}
Ok(())
}
fn joint_log_lik(&self,
data: &Matrix<f64>,
class_prior: &[f64])
-> LearningResult<Matrix<f64>> {
let class_count = class_prior.len();
let neg_prob = self.log_probs.clone().apply(&|x| (1f64 - x.exp()).ln());
let res = data * (&self.log_probs - &neg_prob).transpose();
// NOTE: Some messy stuff now to get the class row contribution.
// Really we want to add to each row the class log-priors and the
// neg_prob_sum contribution - the last term in
// x log(p) + (1-x)log(1-p) = x (log(p) - log(1-p)) + log(1-p)
let mut per_class_row = Vec::with_capacity(class_count);
let neg_prob_sum = neg_prob.sum_cols();
for (idx, p) in class_prior.into_iter().enumerate() {
per_class_row.push(p.ln() + neg_prob_sum[idx]);
}
let class_row_mat = Matrix::new(1, class_count, per_class_row);
Ok(res + class_row_mat.select_rows(&vec![0; data.rows()]))
}
}
/// The Multinomial Naive Bayes model distribution.
///
/// Defines:
///
/// p(x|C<sub>k</sub>) ∝ ∏<sub>i</sub> p<sub>k</sub><sup>x<sub>i</sub></sup>
#[derive(Debug)]
pub struct Multinomial {
log_probs: Matrix<f64>,
pseudo_count: f64,
}
impl Multinomial {
/// The log probability matrix.
///
/// A matrix of class by feature model log-probabilities.
pub fn log_probs(&self) -> &Matrix<f64> {
&self.log_probs
}
}
impl Distribution for Multinomial {
fn from_model_params(class_count: usize, features: usize) -> Multinomial {
Multinomial {
log_probs: Matrix::zeros(class_count, features),
pseudo_count: 1f64,
}
}
fn update_params(&mut self, data: &Matrix<f64>, class: usize) -> LearningResult<()> {
let features = data.cols();
let pseudo_fc = data.sum_rows() + self.pseudo_count;
let pseudo_cc = pseudo_fc.sum();
let log_probs = (pseudo_fc.apply(&|x| x.ln()) - pseudo_cc.ln()).into_vec();
for (i, item) in log_probs.iter().enumerate().take(features) {
self.log_probs[[class, i]] = *item;
}
Ok(())
}
fn joint_log_lik(&self,
data: &Matrix<f64>,
class_prior: &[f64])
-> LearningResult<Matrix<f64>> {
let class_count = class_prior.len();
let res = data * self.log_probs.transpose();
let mut per_class_row = Vec::with_capacity(class_count);
for p in class_prior {
per_class_row.push(p.ln());
}
let class_row_mat = Matrix::new(1, class_count, per_class_row);
Ok(res + class_row_mat.select_rows(&vec![0; data.rows()]))
}
}
#[cfg(test)]
mod tests {
use super::NaiveBayes;
use super::Gaussian;
use super::Bernoulli;
use super::Multinomial;
use learning::SupModel;
use linalg::Matrix;
#[test]
fn test_gaussian() {
let inputs = Matrix::new(6,
2,
vec![1.0, 1.1, 1.1, 0.9, 2.2, 2.3, 2.5, 2.7, 5.2, 4.3, 6.2, 7.3]);
let targets = Matrix::new(6,
3,
vec![1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0,
0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0]);
let mut model = NaiveBayes::<Gaussian>::new();
model.train(&inputs, &targets).unwrap();
let outputs = model.predict(&inputs).unwrap();
assert_eq!(outputs.into_vec(), targets.into_vec());
}
#[test]
fn test_bernoulli() {
let inputs = Matrix::new(4,
3,
vec![1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0]);
let targets = Matrix::new(4, 2, vec![1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0]);
let mut model = NaiveBayes::<Bernoulli>::new();
model.train(&inputs, &targets).unwrap();
let outputs = model.predict(&inputs).unwrap();
assert_eq!(outputs.into_vec(), targets.into_vec());
}
#[test]
fn test_multinomial() {
let inputs = Matrix::new(4,
3,
vec![1.0, 0.0, 5.0, 0.0, 0.0, 11.0, 13.0, 1.0, 0.0, 12.0, 3.0,
0.0]);
let targets = Matrix::new(4, 2, vec![1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0]);
let mut model = NaiveBayes::<Multinomial>::new();
model.train(&inputs, &targets).unwrap();
let outputs = model.predict(&inputs).unwrap();
assert_eq!(outputs.into_vec(), targets.into_vec());
}
}