third_party/rusty-machine/src/learning/naive_bayes.rs - incubator-teaclave-sgx-sdk - Git at Google

 //! Naive Bayes Classifiers
 //!
 //! The classifier supports Gaussian, Bernoulli and Multinomial distributions.
 //!
 //! A naive Bayes classifier works by treating the features of each input as independent
 //! observations. Under this assumption we utilize Bayes' rule to compute the
 //! probability that each input belongs to a given class.
 //!
 //! # Examples
 //!
 //! ```
 //! use rusty_machine::learning::naive_bayes::{NaiveBayes, Gaussian};
 //! use rusty_machine::linalg::Matrix;
 //! use rusty_machine::learning::SupModel;
 //!
 //! let inputs = Matrix::new(6, 2, vec![1.0, 1.1,
 //!                                     1.1, 0.9,
 //!                                     2.2, 2.3,
 //!                                     2.5, 2.7,
 //!                                     5.2, 4.3,
 //!                                     6.2, 7.3]);
 //!
 //! let targets = Matrix::new(6,3, vec![1.0, 0.0, 0.0,
 //!                                     1.0, 0.0, 0.0,
 //!                                     0.0, 1.0, 0.0,
 //!                                     0.0, 1.0, 0.0,
 //!                                     0.0, 0.0, 1.0,
 //!                                     0.0, 0.0, 1.0]);
 //!
 //! // Create a Gaussian Naive Bayes classifier.
 //! let mut model = NaiveBayes::<Gaussian>::new();
 //!
 //! // Train the model.
 //! model.train(&inputs, &targets).unwrap();
 //!
 //! // Predict the classes on the input data
 //! let outputs = model.predict(&inputs).unwrap();
 //!
 //! // Will output the target classes - otherwise our classifier is bad!
 //! println!("Final outputs --\n{}", outputs);
 //! ```
 use std::vec::*;
 use linalg::{Matrix, Axes, BaseMatrix, BaseMatrixMut};
 use learning::{LearningResult, SupModel};
 use learning::error::{Error, ErrorKind};
 use rulinalg::utils;

 use std::f64::consts::PI;

 /// The Naive Bayes model.
 #[derive(Debug, Default)]
 pub struct NaiveBayes<T: Distribution> {
     distr: Option<T>,
     cluster_count: Option<usize>,
     class_prior: Option<Vec<f64>>,
     class_counts: Vec<usize>,
 }

 impl<T: Distribution> NaiveBayes<T> {
     /// Create a new NaiveBayes model from a given
     /// distribution.
     ///
     /// # Examples
     ///
     /// ```
     /// use rusty_machine::learning::naive_bayes::{NaiveBayes, Gaussian};
     ///
     /// // Create a new Gaussian Naive Bayes model.
     /// let _ = NaiveBayes::<Gaussian>::new();
     /// ```
     pub fn new() -> NaiveBayes<T> {
         NaiveBayes {
             distr: None,
             cluster_count: None,
             class_prior: None,
             class_counts: Vec::new(),
         }
     }

     /// Get the cluster count for this model.
     ///
     /// Returns an option which is `None` until the model has been trained.
     pub fn cluster_count(&self) -> Option<&usize> {
         self.cluster_count.as_ref()
     }

     /// Get the class prior distribution for this model.
     ///
     /// Returns an option which is `None` until the model has been trained.
     pub fn class_prior(&self) -> Option<&Vec<f64>> {
         self.class_prior.as_ref()
     }

     /// Get the distribution for this model.
     ///
     /// Returns an option which is `None` until the model has been trained.
     pub fn distr(&self) -> Option<&T> {
         self.distr.as_ref()
     }
 }

 /// Train and predict from the Naive Bayes model.
 ///
 /// The input matrix must be rows made up of features.
 /// The target matrix should have indicator vectors in each row specifying
 /// the input class. e.g. [[1,0,0],[0,0,1]] shows class 1 first, then class 3.
 impl<T: Distribution> SupModel<Matrix<f64>, Matrix<f64>> for NaiveBayes<T> {
     /// Train the model using inputs and targets.
     fn train(&mut self, inputs: &Matrix<f64>, targets: &Matrix<f64>) -> LearningResult<()> {
         self.distr = Some(T::from_model_params(targets.cols(), inputs.cols()));
         self.update_params(inputs, targets)
     }

     /// Predict output from inputs.
     fn predict(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> {
         let log_probs = try!(self.get_log_probs(inputs));
         let input_classes = NaiveBayes::<T>::get_classes(log_probs);

         if let Some(cluster_count) = self.cluster_count {
             let mut class_data = Vec::with_capacity(inputs.rows() * cluster_count);

             for c in input_classes {
                 let mut row = vec![0f64; cluster_count];
                 row[c] = 1f64;

                 class_data.append(&mut row);
             }

             Ok(Matrix::new(inputs.rows(), cluster_count, class_data))
         } else {
             Err(Error::new(ErrorKind::UntrainedModel, "The model has not been trained."))
         }
     }
 }

 impl<T: Distribution> NaiveBayes<T> {
     /// Get the log-probabilities per class for each input.
     pub fn get_log_probs(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> {

         if let (&Some(ref distr), &Some(ref prior)) = (&self.distr, &self.class_prior) {
             // Get the joint log likelihood from the distribution
             distr.joint_log_lik(inputs, prior)
         } else {
             Err(Error::new_untrained())
         }
     }

     fn update_params(&mut self, inputs: &Matrix<f64>, targets: &Matrix<f64>) -> LearningResult<()> {
         let class_count = targets.cols();
         let total_data = inputs.rows();

         self.class_counts = vec![0; class_count];
         let mut class_data = vec![Vec::new(); class_count];

         for (idx, row) in targets.row_iter().enumerate() {
             // Find the class of this input
             let class = try!(NaiveBayes::<T>::find_class(row.raw_slice()));

             // Note the class of the input
             class_data[class].push(idx);
             self.class_counts[class] += 1;
         }

         if let Some(ref mut distr) = self.distr {
             for (idx, c) in class_data.into_iter().enumerate() {
                 // If this class' vector has not been populated, we can safely
                 // skip this iteration, since the user is clearly not interested
                 // in associating features with this class
                 if c.is_empty() {
                     continue;
                 }
                 // Update the parameters within this class
                 try!(distr.update_params(&inputs.select_rows(&c), idx));
             }
         }

         let mut class_prior = Vec::with_capacity(class_count);

         // Compute the prior as the proportion in each class
         class_prior.extend(self.class_counts.iter().map(|c| *c as f64 / total_data as f64));

         self.class_prior = Some(class_prior);
         self.cluster_count = Some(class_count);
         Ok(())
     }

     fn find_class(row: &[f64]) -> LearningResult<usize> {
         // Find the `1` entry in the row
         for (idx, r) in row.into_iter().enumerate() {
             if *r == 1f64 {
                 return Ok(idx);
             }
         }

         Err(Error::new(ErrorKind::InvalidState,
                        "No class found for entry in targets"))
     }

     fn get_classes(log_probs: Matrix<f64>) -> Vec<usize> {
         let mut data_classes = Vec::with_capacity(log_probs.rows());

         data_classes.extend(log_probs.row_iter().map(|row| {
             // Argmax each class log-probability per input
             let (class, _) = utils::argmax(row.raw_slice());
             class
         }));

         data_classes
     }
 }

 /// Naive Bayes Distribution.
 pub trait Distribution {
     /// Initialize the distribution parameters.
     fn from_model_params(class_count: usize, features: usize) -> Self;

     /// Updates the distribution parameters.
     fn update_params(&mut self, data: &Matrix<f64>, class: usize) -> LearningResult<()>;

     /// Compute the joint log likelihood of the data.
     ///
     /// Returns a matrix with rows containing the probability that the input lies in each class.
     fn joint_log_lik(&self,
                      data: &Matrix<f64>,
                      class_prior: &[f64])
                      -> LearningResult<Matrix<f64>>;
 }

 /// The Gaussian Naive Bayes model distribution.
 ///
 /// Defines:
 ///
 /// p(x|C<sub>k</sub>) = ∏<sub>i</sub> N(x<sub>i</sub> ;
 /// μ<sub>k</sub>, σ<sup>2</sup><sub>k</sub>)
 #[derive(Debug)]
 pub struct Gaussian {
     theta: Matrix<f64>,
     sigma: Matrix<f64>,
 }

 impl Gaussian {
     /// Returns the distribution means.
     ///
     /// This is a matrix of class by feature means.
     pub fn theta(&self) -> &Matrix<f64> {
         &self.theta
     }

     /// Returns the distribution variances.
     ///
     /// This is a matrix of class by feature variances.
     pub fn sigma(&self) -> &Matrix<f64> {
         &self.sigma
     }
 }

 impl Distribution for Gaussian {
     fn from_model_params(class_count: usize, features: usize) -> Gaussian {
         Gaussian {
             theta: Matrix::zeros(class_count, features),
             sigma: Matrix::zeros(class_count, features),
         }
     }

     fn update_params(&mut self, data: &Matrix<f64>, class: usize) -> LearningResult<()> {
         // Compute mean and sample variance
         let mean = data.mean(Axes::Row).into_vec();
         let var = try!(data.variance(Axes::Row).map_err(|_| {
                 Error::new(ErrorKind::InvalidData,
                            "Cannot compute variance for Gaussian distribution.")
             }))
             .into_vec();

         let features = data.cols();

         for (idx, (m, v)) in mean.into_iter().zip(var.into_iter()).enumerate() {
             self.theta.mut_data()[class * features + idx] = m;
             self.sigma.mut_data()[class * features + idx] = v;
         }

         Ok(())
     }

     fn joint_log_lik(&self,
                      data: &Matrix<f64>,
                      class_prior: &[f64])
                      -> LearningResult<Matrix<f64>> {
         let class_count = class_prior.len();
         let mut log_lik = Vec::with_capacity(class_count);

         for (i, item) in class_prior.into_iter().enumerate() {
             let joint_i = item.ln();
             let n_ij = -0.5 * (self.sigma.select_rows(&[i]) * 2.0 * PI).apply(&|x| x.ln()).sum();

             // NOTE: Here we are copying the row data which is inefficient
             let r_ij = (data - self.theta.select_rows(&vec![i; data.rows()]))
                 .apply(&|x| x * x)
                 .elediv(&self.sigma.select_rows(&vec![i; data.rows()]))
                 .sum_cols();

             let res = (-r_ij * 0.5) + n_ij;

             log_lik.append(&mut (res + joint_i).into_vec());
         }

         Ok(Matrix::new(class_count, data.rows(), log_lik).transpose())
     }
 }

 /// The Bernoulli Naive Bayes model distribution.
 ///
 /// Defines:
 ///
 ///    p(x|C<sub>k</sub>) = ∏<sub>i</sub> p<sub>k</sub><sup>x<sub>i</sub></sup>
 /// (1-p)<sub>k</sub><sup>1-x<sub>i</sub></sup>
 #[derive(Debug)]
 pub struct Bernoulli {
     log_probs: Matrix<f64>,
     pseudo_count: f64,
 }

 impl Bernoulli {
     /// The log probability matrix.
     ///
     /// A matrix of class by feature model log-probabilities.
     pub fn log_probs(&self) -> &Matrix<f64> {
         &self.log_probs
     }
 }

 impl Distribution for Bernoulli {
     fn from_model_params(class_count: usize, features: usize) -> Bernoulli {
         Bernoulli {
             log_probs: Matrix::zeros(class_count, features),
             pseudo_count: 1f64,
         }
     }

     fn update_params(&mut self, data: &Matrix<f64>, class: usize) -> LearningResult<()> {
         let features = data.cols();

         // We add the pseudo count to the class count and feature count
         let pseudo_cc = data.rows() as f64 + (2f64 * self.pseudo_count);
         let pseudo_fc = data.sum_rows() + self.pseudo_count;

         let log_probs = (pseudo_fc.apply(&|x| x.ln()) - pseudo_cc.ln()).into_vec();

         for (i, item) in log_probs.iter().enumerate().take(features) {
             self.log_probs[[class, i]] = *item;
         }

         Ok(())

     }

     fn joint_log_lik(&self,
                      data: &Matrix<f64>,
                      class_prior: &[f64])
                      -> LearningResult<Matrix<f64>> {
         let class_count = class_prior.len();

         let neg_prob = self.log_probs.clone().apply(&|x| (1f64 - x.exp()).ln());

         let res = data * (&self.log_probs - &neg_prob).transpose();

         // NOTE: Some messy stuff now to get the class row contribution.
         // Really we want to add to each row the class log-priors and the
         // neg_prob_sum contribution - the last term in
         // x log(p) + (1-x)log(1-p) = x (log(p) - log(1-p)) + log(1-p)

         let mut per_class_row = Vec::with_capacity(class_count);
         let neg_prob_sum = neg_prob.sum_cols();

         for (idx, p) in class_prior.into_iter().enumerate() {
             per_class_row.push(p.ln() + neg_prob_sum[idx]);
         }

         let class_row_mat = Matrix::new(1, class_count, per_class_row);

         Ok(res + class_row_mat.select_rows(&vec![0; data.rows()]))
     }
 }

 /// The Multinomial Naive Bayes model distribution.
 ///
 /// Defines:
 ///
 ///    p(x|C<sub>k</sub>) ∝ ∏<sub>i</sub> p<sub>k</sub><sup>x<sub>i</sub></sup>
 #[derive(Debug)]
 pub struct Multinomial {
     log_probs: Matrix<f64>,
     pseudo_count: f64,
 }

 impl Multinomial {
     /// The log probability matrix.
     ///
     /// A matrix of class by feature model log-probabilities.
     pub fn log_probs(&self) -> &Matrix<f64> {
         &self.log_probs
     }
 }

 impl Distribution for Multinomial {
     fn from_model_params(class_count: usize, features: usize) -> Multinomial {
         Multinomial {
             log_probs: Matrix::zeros(class_count, features),
             pseudo_count: 1f64,
         }
     }

     fn update_params(&mut self, data: &Matrix<f64>, class: usize) -> LearningResult<()> {
         let features = data.cols();

         let pseudo_fc = data.sum_rows() + self.pseudo_count;
         let pseudo_cc = pseudo_fc.sum();

         let log_probs = (pseudo_fc.apply(&|x| x.ln()) - pseudo_cc.ln()).into_vec();

         for (i, item) in log_probs.iter().enumerate().take(features) {
             self.log_probs[[class, i]] = *item;
         }

         Ok(())
     }

     fn joint_log_lik(&self,
                      data: &Matrix<f64>,
                      class_prior: &[f64])
                      -> LearningResult<Matrix<f64>> {
         let class_count = class_prior.len();

         let res = data * self.log_probs.transpose();

         let mut per_class_row = Vec::with_capacity(class_count);
         for p in class_prior {
             per_class_row.push(p.ln());
         }

         let class_row_mat = Matrix::new(1, class_count, per_class_row);

         Ok(res + class_row_mat.select_rows(&vec![0; data.rows()]))
     }
 }

 #[cfg(test)]
 mod tests {
     use super::NaiveBayes;
     use super::Gaussian;
     use super::Bernoulli;
     use super::Multinomial;

     use learning::SupModel;

     use linalg::Matrix;

     #[test]
     fn test_gaussian() {
         let inputs = Matrix::new(6,
                                  2,
                                  vec![1.0, 1.1, 1.1, 0.9, 2.2, 2.3, 2.5, 2.7, 5.2, 4.3, 6.2, 7.3]);

         let targets = Matrix::new(6,
                                   3,
                                   vec![1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0,
                                        0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0]);

         let mut model = NaiveBayes::<Gaussian>::new();
         model.train(&inputs, &targets).unwrap();

         let outputs = model.predict(&inputs).unwrap();
         assert_eq!(outputs.into_vec(), targets.into_vec());
     }

     #[test]
     fn test_bernoulli() {
         let inputs = Matrix::new(4,
                                  3,
                                  vec![1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0]);

         let targets = Matrix::new(4, 2, vec![1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0]);

         let mut model = NaiveBayes::<Bernoulli>::new();
         model.train(&inputs, &targets).unwrap();

         let outputs = model.predict(&inputs).unwrap();
         assert_eq!(outputs.into_vec(), targets.into_vec());
     }

     #[test]
     fn test_multinomial() {
         let inputs = Matrix::new(4,
                                  3,
                                  vec![1.0, 0.0, 5.0, 0.0, 0.0, 11.0, 13.0, 1.0, 0.0, 12.0, 3.0,
                                       0.0]);

         let targets = Matrix::new(4, 2, vec![1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0]);

         let mut model = NaiveBayes::<Multinomial>::new();
         model.train(&inputs, &targets).unwrap();

         let outputs = model.predict(&inputs).unwrap();
         assert_eq!(outputs.into_vec(), targets.into_vec());
     }
 }
	//! Naive Bayes Classifiers
	//!
	//! The classifier supports Gaussian, Bernoulli and Multinomial distributions.
	//!
	//! A naive Bayes classifier works by treating the features of each input as independent
	//! observations. Under this assumption we utilize Bayes' rule to compute the
	//! probability that each input belongs to a given class.
	//!
	//! # Examples
	//!
	//! ```
	//! use rusty_machine::learning::naive_bayes::{NaiveBayes, Gaussian};
	//! use rusty_machine::linalg::Matrix;
	//! use rusty_machine::learning::SupModel;
	//!
	//! let inputs = Matrix::new(6, 2, vec![1.0, 1.1,
	//! 1.1, 0.9,
	//! 2.2, 2.3,
	//! 2.5, 2.7,
	//! 5.2, 4.3,
	//! 6.2, 7.3]);
	//!
	//! let targets = Matrix::new(6,3, vec![1.0, 0.0, 0.0,
	//! 1.0, 0.0, 0.0,
	//! 0.0, 1.0, 0.0,
	//! 0.0, 1.0, 0.0,
	//! 0.0, 0.0, 1.0,
	//! 0.0, 0.0, 1.0]);
	//!
	//! // Create a Gaussian Naive Bayes classifier.
	//! let mut model = NaiveBayes::<Gaussian>::new();
	//!
	//! // Train the model.
	//! model.train(&inputs, &targets).unwrap();
	//!
	//! // Predict the classes on the input data
	//! let outputs = model.predict(&inputs).unwrap();
	//!
	//! // Will output the target classes - otherwise our classifier is bad!
	//! println!("Final outputs --\n{}", outputs);
	//! ```
	use std::vec::*;
	use linalg::{Matrix, Axes, BaseMatrix, BaseMatrixMut};
	use learning::{LearningResult, SupModel};
	use learning::error::{Error, ErrorKind};
	use rulinalg::utils;

	use std::f64::consts::PI;

	/// The Naive Bayes model.
	#[derive(Debug, Default)]
	pub struct NaiveBayes<T: Distribution> {
	distr: Option<T>,
	cluster_count: Option<usize>,
	class_prior: Option<Vec<f64>>,
	class_counts: Vec<usize>,
	}

	impl<T: Distribution> NaiveBayes<T> {
	/// Create a new NaiveBayes model from a given
	/// distribution.
	///
	/// # Examples
	///
	/// ```
	/// use rusty_machine::learning::naive_bayes::{NaiveBayes, Gaussian};
	///
	/// // Create a new Gaussian Naive Bayes model.
	/// let _ = NaiveBayes::<Gaussian>::new();
	/// ```
	pub fn new() -> NaiveBayes<T> {
	NaiveBayes {
	distr: None,
	cluster_count: None,
	class_prior: None,
	class_counts: Vec::new(),
	}
	}

	/// Get the cluster count for this model.
	///
	/// Returns an option which is `None` until the model has been trained.
	pub fn cluster_count(&self) -> Option<&usize> {
	self.cluster_count.as_ref()
	}

	/// Get the class prior distribution for this model.
	///
	/// Returns an option which is `None` until the model has been trained.
	pub fn class_prior(&self) -> Option<&Vec<f64>> {
	self.class_prior.as_ref()
	}

	/// Get the distribution for this model.
	///
	/// Returns an option which is `None` until the model has been trained.
	pub fn distr(&self) -> Option<&T> {
	self.distr.as_ref()
	}
	}

	/// Train and predict from the Naive Bayes model.
	///
	/// The input matrix must be rows made up of features.
	/// The target matrix should have indicator vectors in each row specifying
	/// the input class. e.g. [[1,0,0],[0,0,1]] shows class 1 first, then class 3.
	impl<T: Distribution> SupModel<Matrix<f64>, Matrix<f64>> for NaiveBayes<T> {
	/// Train the model using inputs and targets.
	fn train(&mut self, inputs: &Matrix<f64>, targets: &Matrix<f64>) -> LearningResult<()> {
	self.distr = Some(T::from_model_params(targets.cols(), inputs.cols()));
	self.update_params(inputs, targets)
	}

	/// Predict output from inputs.
	fn predict(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> {
	let log_probs = try!(self.get_log_probs(inputs));
	let input_classes = NaiveBayes::<T>::get_classes(log_probs);

	if let Some(cluster_count) = self.cluster_count {
	let mut class_data = Vec::with_capacity(inputs.rows() * cluster_count);

	for c in input_classes {
	let mut row = vec![0f64; cluster_count];
	row[c] = 1f64;

	class_data.append(&mut row);
	}

	Ok(Matrix::new(inputs.rows(), cluster_count, class_data))
	} else {
	Err(Error::new(ErrorKind::UntrainedModel, "The model has not been trained."))
	}
	}
	}

	impl<T: Distribution> NaiveBayes<T> {
	/// Get the log-probabilities per class for each input.
	pub fn get_log_probs(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> {

	if let (&Some(ref distr), &Some(ref prior)) = (&self.distr, &self.class_prior) {
	// Get the joint log likelihood from the distribution
	distr.joint_log_lik(inputs, prior)
	} else {
	Err(Error::new_untrained())
	}
	}

	fn update_params(&mut self, inputs: &Matrix<f64>, targets: &Matrix<f64>) -> LearningResult<()> {
	let class_count = targets.cols();
	let total_data = inputs.rows();

	self.class_counts = vec![0; class_count];
	let mut class_data = vec![Vec::new(); class_count];

	for (idx, row) in targets.row_iter().enumerate() {
	// Find the class of this input
	let class = try!(NaiveBayes::<T>::find_class(row.raw_slice()));

	// Note the class of the input
	class_data[class].push(idx);
	self.class_counts[class] += 1;
	}

	if let Some(ref mut distr) = self.distr {
	for (idx, c) in class_data.into_iter().enumerate() {
	// If this class' vector has not been populated, we can safely
	// skip this iteration, since the user is clearly not interested
	// in associating features with this class
	if c.is_empty() {
	continue;
	}
	// Update the parameters within this class
	try!(distr.update_params(&inputs.select_rows(&c), idx));
	}
	}

	let mut class_prior = Vec::with_capacity(class_count);

	// Compute the prior as the proportion in each class
	class_prior.extend(self.class_counts.iter().map(\|c\| *c as f64 / total_data as f64));

	self.class_prior = Some(class_prior);
	self.cluster_count = Some(class_count);
	Ok(())
	}

	fn find_class(row: &[f64]) -> LearningResult<usize> {
	// Find the `1` entry in the row
	for (idx, r) in row.into_iter().enumerate() {
	if *r == 1f64 {
	return Ok(idx);
	}
	}

	Err(Error::new(ErrorKind::InvalidState,
	"No class found for entry in targets"))
	}

	fn get_classes(log_probs: Matrix<f64>) -> Vec<usize> {
	let mut data_classes = Vec::with_capacity(log_probs.rows());

	data_classes.extend(log_probs.row_iter().map(\|row\| {
	// Argmax each class log-probability per input
	let (class, _) = utils::argmax(row.raw_slice());
	class
	}));

	data_classes
	}
	}

	/// Naive Bayes Distribution.
	pub trait Distribution {
	/// Initialize the distribution parameters.
	fn from_model_params(class_count: usize, features: usize) -> Self;

	/// Updates the distribution parameters.
	fn update_params(&mut self, data: &Matrix<f64>, class: usize) -> LearningResult<()>;

	/// Compute the joint log likelihood of the data.
	///
	/// Returns a matrix with rows containing the probability that the input lies in each class.
	fn joint_log_lik(&self,
	data: &Matrix<f64>,
	class_prior: &[f64])
	-> LearningResult<Matrix<f64>>;
	}

	/// The Gaussian Naive Bayes model distribution.
	///
	/// Defines:
	///
	/// p(x\|C<sub>k</sub>) = ∏<sub>i</sub> N(x<sub>i</sub> ;
	/// μ<sub>k</sub>, σ<sup>2</sup><sub>k</sub>)
	#[derive(Debug)]
	pub struct Gaussian {
	theta: Matrix<f64>,
	sigma: Matrix<f64>,
	}

	impl Gaussian {
	/// Returns the distribution means.
	///
	/// This is a matrix of class by feature means.
	pub fn theta(&self) -> &Matrix<f64> {
	&self.theta
	}

	/// Returns the distribution variances.
	///
	/// This is a matrix of class by feature variances.
	pub fn sigma(&self) -> &Matrix<f64> {
	&self.sigma
	}
	}

	impl Distribution for Gaussian {
	fn from_model_params(class_count: usize, features: usize) -> Gaussian {
	Gaussian {
	theta: Matrix::zeros(class_count, features),
	sigma: Matrix::zeros(class_count, features),
	}
	}

	fn update_params(&mut self, data: &Matrix<f64>, class: usize) -> LearningResult<()> {
	// Compute mean and sample variance
	let mean = data.mean(Axes::Row).into_vec();
	let var = try!(data.variance(Axes::Row).map_err(\|_\| {
	Error::new(ErrorKind::InvalidData,
	"Cannot compute variance for Gaussian distribution.")
	}))
	.into_vec();

	let features = data.cols();

	for (idx, (m, v)) in mean.into_iter().zip(var.into_iter()).enumerate() {
	self.theta.mut_data()[class * features + idx] = m;
	self.sigma.mut_data()[class * features + idx] = v;
	}

	Ok(())
	}

	fn joint_log_lik(&self,
	data: &Matrix<f64>,
	class_prior: &[f64])
	-> LearningResult<Matrix<f64>> {
	let class_count = class_prior.len();
	let mut log_lik = Vec::with_capacity(class_count);

	for (i, item) in class_prior.into_iter().enumerate() {
	let joint_i = item.ln();
	let n_ij = -0.5 * (self.sigma.select_rows(&[i]) * 2.0 * PI).apply(&\|x\| x.ln()).sum();

	// NOTE: Here we are copying the row data which is inefficient
	let r_ij = (data - self.theta.select_rows(&vec![i; data.rows()]))
	.apply(&\|x\| x * x)
	.elediv(&self.sigma.select_rows(&vec![i; data.rows()]))
	.sum_cols();

	let res = (-r_ij * 0.5) + n_ij;

	log_lik.append(&mut (res + joint_i).into_vec());
	}

	Ok(Matrix::new(class_count, data.rows(), log_lik).transpose())
	}
	}

	/// The Bernoulli Naive Bayes model distribution.
	///
	/// Defines:
	///
	/// p(x\|C<sub>k</sub>) = ∏<sub>i</sub> p<sub>k</sub><sup>x<sub>i</sub></sup>
	/// (1-p)<sub>k</sub><sup>1-x<sub>i</sub></sup>
	#[derive(Debug)]
	pub struct Bernoulli {
	log_probs: Matrix<f64>,
	pseudo_count: f64,
	}

	impl Bernoulli {
	/// The log probability matrix.
	///
	/// A matrix of class by feature model log-probabilities.
	pub fn log_probs(&self) -> &Matrix<f64> {
	&self.log_probs
	}
	}

	impl Distribution for Bernoulli {
	fn from_model_params(class_count: usize, features: usize) -> Bernoulli {
	Bernoulli {
	log_probs: Matrix::zeros(class_count, features),
	pseudo_count: 1f64,
	}
	}

	fn update_params(&mut self, data: &Matrix<f64>, class: usize) -> LearningResult<()> {
	let features = data.cols();

	// We add the pseudo count to the class count and feature count
	let pseudo_cc = data.rows() as f64 + (2f64 * self.pseudo_count);
	let pseudo_fc = data.sum_rows() + self.pseudo_count;

	let log_probs = (pseudo_fc.apply(&\|x\| x.ln()) - pseudo_cc.ln()).into_vec();

	for (i, item) in log_probs.iter().enumerate().take(features) {
	self.log_probs[[class, i]] = *item;
	}

	Ok(())

	}

	fn joint_log_lik(&self,
	data: &Matrix<f64>,
	class_prior: &[f64])
	-> LearningResult<Matrix<f64>> {
	let class_count = class_prior.len();

	let neg_prob = self.log_probs.clone().apply(&\|x\| (1f64 - x.exp()).ln());

	let res = data * (&self.log_probs - &neg_prob).transpose();

	// NOTE: Some messy stuff now to get the class row contribution.
	// Really we want to add to each row the class log-priors and the
	// neg_prob_sum contribution - the last term in
	// x log(p) + (1-x)log(1-p) = x (log(p) - log(1-p)) + log(1-p)

	let mut per_class_row = Vec::with_capacity(class_count);
	let neg_prob_sum = neg_prob.sum_cols();

	for (idx, p) in class_prior.into_iter().enumerate() {
	per_class_row.push(p.ln() + neg_prob_sum[idx]);
	}

	let class_row_mat = Matrix::new(1, class_count, per_class_row);

	Ok(res + class_row_mat.select_rows(&vec![0; data.rows()]))
	}
	}

	/// The Multinomial Naive Bayes model distribution.
	///
	/// Defines:
	///
	/// p(x\|C<sub>k</sub>) ∝ ∏<sub>i</sub> p<sub>k</sub><sup>x<sub>i</sub></sup>
	#[derive(Debug)]
	pub struct Multinomial {
	log_probs: Matrix<f64>,
	pseudo_count: f64,
	}

	impl Multinomial {
	/// The log probability matrix.
	///
	/// A matrix of class by feature model log-probabilities.
	pub fn log_probs(&self) -> &Matrix<f64> {
	&self.log_probs
	}
	}

	impl Distribution for Multinomial {
	fn from_model_params(class_count: usize, features: usize) -> Multinomial {
	Multinomial {
	log_probs: Matrix::zeros(class_count, features),
	pseudo_count: 1f64,
	}
	}

	fn update_params(&mut self, data: &Matrix<f64>, class: usize) -> LearningResult<()> {
	let features = data.cols();

	let pseudo_fc = data.sum_rows() + self.pseudo_count;
	let pseudo_cc = pseudo_fc.sum();

	let log_probs = (pseudo_fc.apply(&\|x\| x.ln()) - pseudo_cc.ln()).into_vec();

	for (i, item) in log_probs.iter().enumerate().take(features) {
	self.log_probs[[class, i]] = *item;
	}

	Ok(())
	}

	fn joint_log_lik(&self,
	data: &Matrix<f64>,
	class_prior: &[f64])
	-> LearningResult<Matrix<f64>> {
	let class_count = class_prior.len();

	let res = data * self.log_probs.transpose();

	let mut per_class_row = Vec::with_capacity(class_count);
	for p in class_prior {
	per_class_row.push(p.ln());
	}

	let class_row_mat = Matrix::new(1, class_count, per_class_row);

	Ok(res + class_row_mat.select_rows(&vec![0; data.rows()]))
	}
	}

	#[cfg(test)]
	mod tests {
	use super::NaiveBayes;
	use super::Gaussian;
	use super::Bernoulli;
	use super::Multinomial;

	use learning::SupModel;

	use linalg::Matrix;

	#[test]
	fn test_gaussian() {
	let inputs = Matrix::new(6,
	2,
	vec![1.0, 1.1, 1.1, 0.9, 2.2, 2.3, 2.5, 2.7, 5.2, 4.3, 6.2, 7.3]);

	let targets = Matrix::new(6,
	3,
	vec![1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0,
	0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0]);

	let mut model = NaiveBayes::<Gaussian>::new();
	model.train(&inputs, &targets).unwrap();

	let outputs = model.predict(&inputs).unwrap();
	assert_eq!(outputs.into_vec(), targets.into_vec());
	}

	#[test]
	fn test_bernoulli() {
	let inputs = Matrix::new(4,
	3,
	vec![1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0]);

	let targets = Matrix::new(4, 2, vec![1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0]);

	let mut model = NaiveBayes::<Bernoulli>::new();
	model.train(&inputs, &targets).unwrap();

	let outputs = model.predict(&inputs).unwrap();
	assert_eq!(outputs.into_vec(), targets.into_vec());
	}

	#[test]
	fn test_multinomial() {
	let inputs = Matrix::new(4,
	3,
	vec![1.0, 0.0, 5.0, 0.0, 0.0, 11.0, 13.0, 1.0, 0.0, 12.0, 3.0,
	0.0]);

	let targets = Matrix::new(4, 2, vec![1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0]);

	let mut model = NaiveBayes::<Multinomial>::new();
	model.train(&inputs, &targets).unwrap();

	let outputs = model.predict(&inputs).unwrap();
	assert_eq!(outputs.into_vec(), targets.into_vec());
	}
	}