third_party/rusty-machine/src/learning/nnet/net_layer.rs - incubator-teaclave-sgx-sdk - Git at Google

 //! Neural Network Layers
 use std::vec::*;
 use linalg::{Matrix, MatrixSlice, BaseMatrix};

 use learning::LearningResult;
 use learning::error::{Error, ErrorKind};
 use learning::toolkit::activ_fn::ActivationFunc;

 use rand::thread_rng;
 use rand::distributions::Sample;
 use rand::distributions::normal::Normal;

 use std::fmt::Debug;
 use serde;
 use erased_serde;

 /// Trait for neural net layers
 pub trait NetLayer : Debug + erased_serde::Serialize  {
     /// The result of propogating data forward through this layer
     fn forward(&self, input: &Matrix<f64>, params: MatrixSlice<f64>) -> LearningResult<Matrix<f64>>;

     /// The gradient of the output of this layer with respect to its input
     fn back_input(&self, out_grad: &Matrix<f64>, input: &Matrix<f64>, output: &Matrix<f64>, params: MatrixSlice<f64>) -> Matrix<f64>;

     /// The gradient of the output of this layer with respect to its parameters
     fn back_params(&self, out_grad: &Matrix<f64>, input: &Matrix<f64>, output: &Matrix<f64>, params: MatrixSlice<f64>) -> Matrix<f64>;

     /// The default value of the parameters of this layer before training
     fn default_params(&self) -> Vec<f64>;

     /// The shape of the parameters used by this layer
     fn param_shape(&self) -> (usize, usize);

     /// The number of parameters used by this layer
     fn num_params(&self) -> usize {
         let shape = self.param_shape();
         shape.0 * shape.1
     }
 }

 /// Linear network layer
 ///
 /// Represents a fully connected layer with optional bias term
 ///
 /// The parameters are a matrix of weights of size I x N
 /// where N is the dimensionality of the output and I the dimensionality of the input
 #[derive(Debug, Clone, Copy, Serialize, Deserialize)]
 pub struct Linear {
     /// The number of dimensions of the input
     input_size: usize,
     /// The number of dimensions of the output
     output_size: usize,
     /// Whether or not to include a bias term
     has_bias: bool,
 }

 impl Linear {
     /// Construct a new Linear layer
     pub fn new(input_size: usize, output_size: usize) -> Linear {
         Linear {
             input_size: input_size + 1,
             output_size: output_size,
             has_bias: true
         }
     }

     /// Construct a Linear layer without a bias term
     pub fn without_bias(input_size: usize, output_size: usize) -> Linear {
         Linear {
             input_size: input_size,
             output_size: output_size,
             has_bias: false
         }
     }
 }

 fn remove_first_col(mat: Matrix<f64>) -> Matrix<f64>
 {
     let rows = mat.rows();
     let cols = mat.cols();
     let mut data = mat.into_vec();

     let len = data.len();
     let mut del = 0;
     {
         let v = &mut *data;

         for i in 0..len {
             if i % cols == 0 {
                 del += 1;
             } else if del > 0 {
                 v[i - del] = v[i];
             }
         }
     }
     if del > 0 {
         data.truncate(len - del);
     }
     Matrix::new(rows, cols - 1, data)
 }

 impl NetLayer for Linear {
     /// Computes a matrix product
     ///
     /// input should have dimensions N x I
     /// where N is the number of samples and I is the dimensionality of the input
     fn forward(&self, input: &Matrix<f64>, params: MatrixSlice<f64>) -> LearningResult<Matrix<f64>> {
         if self.has_bias {
             if input.cols()+1 != params.rows() {
                 Err(Error::new(ErrorKind::InvalidData, "The input had the wrong number of columns"))
             } else {
                 Ok(&Matrix::ones(input.rows(), 1).hcat(input) * &params)
             }
         } else {
             if input.cols() != params.rows() {
                 Err(Error::new(ErrorKind::InvalidData, "The input had the wrong number of columns"))
             } else {
                 Ok(input * &params)
             }
         }
     }

     fn back_input(&self, out_grad: &Matrix<f64>, _: &Matrix<f64>, _: &Matrix<f64>, params: MatrixSlice<f64>) -> Matrix<f64> {
         debug_assert_eq!(out_grad.cols(), params.cols());
         let gradient = out_grad * &params.transpose();
         if self.has_bias {
             remove_first_col(gradient)
         } else {
             gradient
         }
     }

     fn back_params(&self, out_grad: &Matrix<f64>, input: &Matrix<f64>, _: &Matrix<f64>, _: MatrixSlice<f64>) -> Matrix<f64> {
         debug_assert_eq!(input.rows(), out_grad.rows());
         if self.has_bias {
             &Matrix::ones(input.rows(), 1).hcat(input).transpose() * out_grad
         } else {
             &input.transpose() * out_grad
         }
     }

     /// Initializes weights using Xavier initialization
     ///
     /// weights drawn from gaussian distribution with 0 mean and variance 2/(input_size+output_size)
     fn default_params(&self) -> Vec<f64> {
         let mut distro = Normal::new(0.0, (2.0/(self.input_size+self.output_size) as f64).sqrt());
         let mut rng = thread_rng();

         (0..self.input_size*self.output_size).map(|_| distro.sample(&mut rng))
                                              .collect()
     }

     fn param_shape(&self) -> (usize, usize) {
         (self.input_size, self.output_size)
     }
 }

 impl<T: ActivationFunc> NetLayer for T
     where T: serde::Serialize {
     /// Applies the activation function to each element of the input
     fn forward(&self, input: &Matrix<f64>, _: MatrixSlice<f64>) -> LearningResult<Matrix<f64>> {
         let mut output = Vec::with_capacity(input.rows()*input.cols());
         for val in input.data() {
             output.push(T::func(*val));
         }
         Ok(Matrix::new(input.rows(), input.cols(), output))
     }

     fn back_input(&self, out_grad: &Matrix<f64>, _: &Matrix<f64>, output: &Matrix<f64>, _: MatrixSlice<f64>) -> Matrix<f64> {
         let mut in_grad = Vec::with_capacity(output.rows()*output.cols());
         for (y, g) in output.data().iter().zip(out_grad.data()) {
             in_grad.push(T::func_grad_from_output(*y) * g);
         }
         Matrix::new(output.rows(), output.cols(), in_grad)
     }

     fn back_params(&self, _: &Matrix<f64>, _: &Matrix<f64>, _: &Matrix<f64>, _: MatrixSlice<f64>) -> Matrix<f64> {
         Matrix::new(0, 0, Vec::new())
     }

     fn default_params(&self) -> Vec<f64> {
         Vec::new()
     }

     fn param_shape(&self) -> (usize, usize) {
         (0, 0)
     }
 }

 serialize_trait_object!(NetLayer);
	//! Neural Network Layers
	use std::vec::*;
	use linalg::{Matrix, MatrixSlice, BaseMatrix};

	use learning::LearningResult;
	use learning::error::{Error, ErrorKind};
	use learning::toolkit::activ_fn::ActivationFunc;

	use rand::thread_rng;
	use rand::distributions::Sample;
	use rand::distributions::normal::Normal;

	use std::fmt::Debug;
	use serde;
	use erased_serde;

	/// Trait for neural net layers
	pub trait NetLayer : Debug + erased_serde::Serialize {
	/// The result of propogating data forward through this layer
	fn forward(&self, input: &Matrix<f64>, params: MatrixSlice<f64>) -> LearningResult<Matrix<f64>>;

	/// The gradient of the output of this layer with respect to its input
	fn back_input(&self, out_grad: &Matrix<f64>, input: &Matrix<f64>, output: &Matrix<f64>, params: MatrixSlice<f64>) -> Matrix<f64>;

	/// The gradient of the output of this layer with respect to its parameters
	fn back_params(&self, out_grad: &Matrix<f64>, input: &Matrix<f64>, output: &Matrix<f64>, params: MatrixSlice<f64>) -> Matrix<f64>;

	/// The default value of the parameters of this layer before training
	fn default_params(&self) -> Vec<f64>;

	/// The shape of the parameters used by this layer
	fn param_shape(&self) -> (usize, usize);

	/// The number of parameters used by this layer
	fn num_params(&self) -> usize {
	let shape = self.param_shape();
	shape.0 * shape.1
	}
	}

	/// Linear network layer
	///
	/// Represents a fully connected layer with optional bias term
	///
	/// The parameters are a matrix of weights of size I x N
	/// where N is the dimensionality of the output and I the dimensionality of the input
	#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
	pub struct Linear {
	/// The number of dimensions of the input
	input_size: usize,
	/// The number of dimensions of the output
	output_size: usize,
	/// Whether or not to include a bias term
	has_bias: bool,
	}

	impl Linear {
	/// Construct a new Linear layer
	pub fn new(input_size: usize, output_size: usize) -> Linear {
	Linear {
	input_size: input_size + 1,
	output_size: output_size,
	has_bias: true
	}
	}

	/// Construct a Linear layer without a bias term
	pub fn without_bias(input_size: usize, output_size: usize) -> Linear {
	Linear {
	input_size: input_size,
	output_size: output_size,
	has_bias: false
	}
	}
	}

	fn remove_first_col(mat: Matrix<f64>) -> Matrix<f64>
	{
	let rows = mat.rows();
	let cols = mat.cols();
	let mut data = mat.into_vec();

	let len = data.len();
	let mut del = 0;
	{
	let v = &mut *data;

	for i in 0..len {
	if i % cols == 0 {
	del += 1;
	} else if del > 0 {
	v[i - del] = v[i];
	}
	}
	}
	if del > 0 {
	data.truncate(len - del);
	}
	Matrix::new(rows, cols - 1, data)
	}

	impl NetLayer for Linear {
	/// Computes a matrix product
	///
	/// input should have dimensions N x I
	/// where N is the number of samples and I is the dimensionality of the input
	fn forward(&self, input: &Matrix<f64>, params: MatrixSlice<f64>) -> LearningResult<Matrix<f64>> {
	if self.has_bias {
	if input.cols()+1 != params.rows() {
	Err(Error::new(ErrorKind::InvalidData, "The input had the wrong number of columns"))
	} else {
	Ok(&Matrix::ones(input.rows(), 1).hcat(input) * &params)
	}
	} else {
	if input.cols() != params.rows() {
	Err(Error::new(ErrorKind::InvalidData, "The input had the wrong number of columns"))
	} else {
	Ok(input * &params)
	}
	}
	}

	fn back_input(&self, out_grad: &Matrix<f64>, _: &Matrix<f64>, _: &Matrix<f64>, params: MatrixSlice<f64>) -> Matrix<f64> {
	debug_assert_eq!(out_grad.cols(), params.cols());
	let gradient = out_grad * &params.transpose();
	if self.has_bias {
	remove_first_col(gradient)
	} else {
	gradient
	}
	}

	fn back_params(&self, out_grad: &Matrix<f64>, input: &Matrix<f64>, _: &Matrix<f64>, _: MatrixSlice<f64>) -> Matrix<f64> {
	debug_assert_eq!(input.rows(), out_grad.rows());
	if self.has_bias {
	&Matrix::ones(input.rows(), 1).hcat(input).transpose() * out_grad
	} else {
	&input.transpose() * out_grad
	}
	}

	/// Initializes weights using Xavier initialization
	///
	/// weights drawn from gaussian distribution with 0 mean and variance 2/(input_size+output_size)
	fn default_params(&self) -> Vec<f64> {
	let mut distro = Normal::new(0.0, (2.0/(self.input_size+self.output_size) as f64).sqrt());
	let mut rng = thread_rng();

	(0..self.input_size*self.output_size).map(\|_\| distro.sample(&mut rng))
	.collect()
	}

	fn param_shape(&self) -> (usize, usize) {
	(self.input_size, self.output_size)
	}
	}

	impl<T: ActivationFunc> NetLayer for T
	where T: serde::Serialize {
	/// Applies the activation function to each element of the input
	fn forward(&self, input: &Matrix<f64>, _: MatrixSlice<f64>) -> LearningResult<Matrix<f64>> {
	let mut output = Vec::with_capacity(input.rows()*input.cols());
	for val in input.data() {
	output.push(T::func(*val));
	}
	Ok(Matrix::new(input.rows(), input.cols(), output))
	}

	fn back_input(&self, out_grad: &Matrix<f64>, _: &Matrix<f64>, output: &Matrix<f64>, _: MatrixSlice<f64>) -> Matrix<f64> {
	let mut in_grad = Vec::with_capacity(output.rows()*output.cols());
	for (y, g) in output.data().iter().zip(out_grad.data()) {
	in_grad.push(T::func_grad_from_output(y) g);
	}
	Matrix::new(output.rows(), output.cols(), in_grad)
	}

	fn back_params(&self, _: &Matrix<f64>, _: &Matrix<f64>, _: &Matrix<f64>, _: MatrixSlice<f64>) -> Matrix<f64> {
	Matrix::new(0, 0, Vec::new())
	}

	fn default_params(&self) -> Vec<f64> {
	Vec::new()
	}

	fn param_shape(&self) -> (usize, usize) {
	(0, 0)
	}
	}

	serialize_trait_object!(NetLayer);