third_party/rusty-machine/src/data/transforms/normalize.rs - incubator-teaclave-sgx-sdk - Git at Google

 //! The Normalizing Transformer
 //!
 //! This module contains the `Normalizer` transformer.
 //!
 //! The `Normalizer` transformer is used to transform input data
 //! so that the norm of each row is equal to 1. By default the
 //! `Normalizer` uses the `Euclidean` norm.
 //!
 //! If input data has a row with all 0, `Normalizer` keeps the row as it is.
 //!
 //! Because transformation is performed per row independently,
 //! inverse transformation is not supported.
 //!
 //! # Examples
 //!
 //! ```
 //! use rusty_machine::data::transforms::{Transformer, Normalizer};
 //! use rusty_machine::linalg::Matrix;
 //!
 //! // Constructs a new `Normalizer`
 //! let mut transformer = Normalizer::default();
 //!
 //! let inputs = Matrix::new(2, 2, vec![-1.0, 2.0, 1.5, 3.0]);
 //!
 //! // Transform the inputs
 //! let transformed = transformer.transform(inputs).unwrap();
 //! ```
 use std::vec::*;
 use learning::error::{Error, ErrorKind};
 use linalg::{Matrix, MatrixSlice, BaseMatrix, BaseMatrixMut};
 use rulinalg::norm::{MatrixNorm, Euclidean};

 use super::Transformer;

 use libnum::Float;

 use std::marker::PhantomData;

 /// The Normalizer
 ///
 /// The Normalizer provides an implementation of `Transformer`
 /// which allows us to transform the all rows to have the same norm.
 ///
 /// The default `Normalizer` will use the `Euclidean` norm.
 ///
 /// See the module description for more information.
 #[derive(Debug)]
 pub struct Normalizer<T: Float, M>
     where for<'a> M: MatrixNorm<T, MatrixSlice<'a, T>>
 {
     norm: M,
     _marker: PhantomData<T>
 }

 /// Create a `Normalizer` with a Euclidean norm.
 impl<T: Float> Default for Normalizer<T, Euclidean> {
     fn default() -> Self {
         Normalizer {
             norm: Euclidean,
             _marker: PhantomData,
         }
     }
 }

 impl<T: Float, M> Normalizer<T, M>
     where for<'a> M: MatrixNorm<T, MatrixSlice<'a, T>>
 {
     /// Constructs a new `Normalizer` with given norm.
     ///
     /// # Examples
     ///
     /// ```
     /// use rusty_machine::data::transforms::Normalizer;
     /// use rusty_machine::linalg::norm::Euclidean;
     ///
     /// // Constructs a new `Normalizer`
     /// let _ = Normalizer::<f64, Euclidean>::new(Euclidean);
     /// ```
     pub fn new(norm: M) -> Self {
         Normalizer {
             norm: norm,
             _marker: PhantomData
         }
     }
 }

 impl<T: Float, M> Transformer<Matrix<T>> for Normalizer<T, M>
     where for<'a> M: MatrixNorm<T, MatrixSlice<'a, T>>
 {
     fn transform(&mut self, mut inputs: Matrix<T>) -> Result<Matrix<T>, Error> {
         let dists: Vec<T> = inputs.row_iter().map(|m| self.norm.norm(&*m)).collect();
         for (mut row, &d) in inputs.row_iter_mut().zip(dists.iter()) {

             if !d.is_finite() {
                 return Err(Error::new(ErrorKind::InvalidData,
                                       "Some data point is non-finite."));
             } else if d != T::zero() {
                 // no change if distance is 0
                 *row /= d;
             }
         }
         Ok(inputs)
     }
 }


 #[cfg(test)]
 mod tests {
     use super::*;
     use super::super::Transformer;
     use linalg::Matrix;

     use std::f64;

     #[test]
     fn nan_data_test() {
         let inputs = Matrix::new(2, 2, vec![f64::NAN; 4]);
         let mut normalizer = Normalizer::default();
         let res = normalizer.transform(inputs);
         assert!(res.is_err());
     }

     #[test]
     fn inf_data_test() {
         let inputs = Matrix::new(2, 2, vec![f64::INFINITY; 4]);
         let mut normalizer = Normalizer::default();
         let res = normalizer.transform(inputs);
         assert!(res.is_err());
     }

     #[test]
     fn single_row_test() {
         let inputs = matrix![1.0, 2.0];
         let mut normalizer = Normalizer::default();
         let transformed = normalizer.transform(inputs).unwrap();

         let exp = matrix![0.4472135954999579, 0.8944271909999159];
         assert_matrix_eq!(transformed, exp);
     }

     #[test]
     fn basic_normalizer_test() {
         let inputs = matrix![-1.0f32, 2.0;
                              0.0, 3.0];

         let mut normalizer = Normalizer::default();
         let transformed = normalizer.transform(inputs).unwrap();

         let exp = matrix![-0.4472135954999579, 0.8944271909999159;
                           0., 1.];
         assert_matrix_eq!(transformed, exp);

         let inputs = matrix![1., 2.;
                              10., 20.;
                              100., 200.];

         let transformed = normalizer.transform(inputs).unwrap();

         let exp = matrix![0.4472135954999579, 0.8944271909999159;
                           0.4472135954999579, 0.8944271909999159;
                           0.4472135954999579, 0.8944271909999159];
         assert_matrix_eq!(transformed, exp);

         let inputs = matrix![1., 2., 10.;
                              0., 10., 20.;
                              100., 10., 200.;
                              0., 0., 0.];
         let transformed = normalizer.transform(inputs).unwrap();

         let exp = matrix![0.09759000729485333, 0.19518001458970666, 0.9759000729485332;
                           0., 0.4472135954999579, 0.8944271909999159;
                           0.4467670516087703, 0.04467670516087703, 0.8935341032175406;
                           0., 0., 0.];
         assert_matrix_eq!(transformed, exp);
     }
 }
	//! The Normalizing Transformer
	//!
	//! This module contains the `Normalizer` transformer.
	//!
	//! The `Normalizer` transformer is used to transform input data
	//! so that the norm of each row is equal to 1. By default the
	//! `Normalizer` uses the `Euclidean` norm.
	//!
	//! If input data has a row with all 0, `Normalizer` keeps the row as it is.
	//!
	//! Because transformation is performed per row independently,
	//! inverse transformation is not supported.
	//!
	//! # Examples
	//!
	//! ```
	//! use rusty_machine::data::transforms::{Transformer, Normalizer};
	//! use rusty_machine::linalg::Matrix;
	//!
	//! // Constructs a new `Normalizer`
	//! let mut transformer = Normalizer::default();
	//!
	//! let inputs = Matrix::new(2, 2, vec![-1.0, 2.0, 1.5, 3.0]);
	//!
	//! // Transform the inputs
	//! let transformed = transformer.transform(inputs).unwrap();
	//! ```
	use std::vec::*;
	use learning::error::{Error, ErrorKind};
	use linalg::{Matrix, MatrixSlice, BaseMatrix, BaseMatrixMut};
	use rulinalg::norm::{MatrixNorm, Euclidean};

	use super::Transformer;

	use libnum::Float;

	use std::marker::PhantomData;

	/// The Normalizer
	///
	/// The Normalizer provides an implementation of `Transformer`
	/// which allows us to transform the all rows to have the same norm.
	///
	/// The default `Normalizer` will use the `Euclidean` norm.
	///
	/// See the module description for more information.
	#[derive(Debug)]
	pub struct Normalizer<T: Float, M>
	where for<'a> M: MatrixNorm<T, MatrixSlice<'a, T>>
	{
	norm: M,
	_marker: PhantomData<T>
	}

	/// Create a `Normalizer` with a Euclidean norm.
	impl<T: Float> Default for Normalizer<T, Euclidean> {
	fn default() -> Self {
	Normalizer {
	norm: Euclidean,
	_marker: PhantomData,
	}
	}
	}

	impl<T: Float, M> Normalizer<T, M>
	where for<'a> M: MatrixNorm<T, MatrixSlice<'a, T>>
	{
	/// Constructs a new `Normalizer` with given norm.
	///
	/// # Examples
	///
	/// ```
	/// use rusty_machine::data::transforms::Normalizer;
	/// use rusty_machine::linalg::norm::Euclidean;
	///
	/// // Constructs a new `Normalizer`
	/// let _ = Normalizer::<f64, Euclidean>::new(Euclidean);
	/// ```
	pub fn new(norm: M) -> Self {
	Normalizer {
	norm: norm,
	_marker: PhantomData
	}
	}
	}

	impl<T: Float, M> Transformer<Matrix<T>> for Normalizer<T, M>
	where for<'a> M: MatrixNorm<T, MatrixSlice<'a, T>>
	{
	fn transform(&mut self, mut inputs: Matrix<T>) -> Result<Matrix<T>, Error> {
	let dists: Vec<T> = inputs.row_iter().map(\|m\| self.norm.norm(&*m)).collect();
	for (mut row, &d) in inputs.row_iter_mut().zip(dists.iter()) {

	if !d.is_finite() {
	return Err(Error::new(ErrorKind::InvalidData,
	"Some data point is non-finite."));
	} else if d != T::zero() {
	// no change if distance is 0
	*row /= d;
	}
	}
	Ok(inputs)
	}
	}


	#[cfg(test)]
	mod tests {
	use super::*;
	use super::super::Transformer;
	use linalg::Matrix;

	use std::f64;

	#[test]
	fn nan_data_test() {
	let inputs = Matrix::new(2, 2, vec![f64::NAN; 4]);
	let mut normalizer = Normalizer::default();
	let res = normalizer.transform(inputs);
	assert!(res.is_err());
	}

	#[test]
	fn inf_data_test() {
	let inputs = Matrix::new(2, 2, vec![f64::INFINITY; 4]);
	let mut normalizer = Normalizer::default();
	let res = normalizer.transform(inputs);
	assert!(res.is_err());
	}

	#[test]
	fn single_row_test() {
	let inputs = matrix![1.0, 2.0];
	let mut normalizer = Normalizer::default();
	let transformed = normalizer.transform(inputs).unwrap();

	let exp = matrix![0.4472135954999579, 0.8944271909999159];
	assert_matrix_eq!(transformed, exp);
	}

	#[test]
	fn basic_normalizer_test() {
	let inputs = matrix![-1.0f32, 2.0;
	0.0, 3.0];

	let mut normalizer = Normalizer::default();
	let transformed = normalizer.transform(inputs).unwrap();

	let exp = matrix![-0.4472135954999579, 0.8944271909999159;
	0., 1.];
	assert_matrix_eq!(transformed, exp);

	let inputs = matrix![1., 2.;
	10., 20.;
	100., 200.];

	let transformed = normalizer.transform(inputs).unwrap();

	let exp = matrix![0.4472135954999579, 0.8944271909999159;
	0.4472135954999579, 0.8944271909999159;
	0.4472135954999579, 0.8944271909999159];
	assert_matrix_eq!(transformed, exp);

	let inputs = matrix![1., 2., 10.;
	0., 10., 20.;
	100., 10., 200.;
	0., 0., 0.];
	let transformed = normalizer.transform(inputs).unwrap();

	let exp = matrix![0.09759000729485333, 0.19518001458970666, 0.9759000729485332;
	0., 0.4472135954999579, 0.8944271909999159;
	0.4467670516087703, 0.04467670516087703, 0.8935341032175406;
	0., 0., 0.];
	assert_matrix_eq!(transformed, exp);
	}
	}