| //! Neural Network Layers |
| use std::vec::*; |
| use linalg::{Matrix, MatrixSlice, BaseMatrix}; |
| |
| use learning::LearningResult; |
| use learning::error::{Error, ErrorKind}; |
| use learning::toolkit::activ_fn::ActivationFunc; |
| |
| use rand::thread_rng; |
| use rand::distributions::Sample; |
| use rand::distributions::normal::Normal; |
| |
| use std::fmt::Debug; |
| use serde; |
| use erased_serde; |
| |
| /// Trait for neural net layers |
| pub trait NetLayer : Debug + erased_serde::Serialize { |
| /// The result of propogating data forward through this layer |
| fn forward(&self, input: &Matrix<f64>, params: MatrixSlice<f64>) -> LearningResult<Matrix<f64>>; |
| |
| /// The gradient of the output of this layer with respect to its input |
| fn back_input(&self, out_grad: &Matrix<f64>, input: &Matrix<f64>, output: &Matrix<f64>, params: MatrixSlice<f64>) -> Matrix<f64>; |
| |
| /// The gradient of the output of this layer with respect to its parameters |
| fn back_params(&self, out_grad: &Matrix<f64>, input: &Matrix<f64>, output: &Matrix<f64>, params: MatrixSlice<f64>) -> Matrix<f64>; |
| |
| /// The default value of the parameters of this layer before training |
| fn default_params(&self) -> Vec<f64>; |
| |
| /// The shape of the parameters used by this layer |
| fn param_shape(&self) -> (usize, usize); |
| |
| /// The number of parameters used by this layer |
| fn num_params(&self) -> usize { |
| let shape = self.param_shape(); |
| shape.0 * shape.1 |
| } |
| } |
| |
| /// Linear network layer |
| /// |
| /// Represents a fully connected layer with optional bias term |
| /// |
| /// The parameters are a matrix of weights of size I x N |
| /// where N is the dimensionality of the output and I the dimensionality of the input |
| #[derive(Debug, Clone, Copy, Serialize, Deserialize)] |
| pub struct Linear { |
| /// The number of dimensions of the input |
| input_size: usize, |
| /// The number of dimensions of the output |
| output_size: usize, |
| /// Whether or not to include a bias term |
| has_bias: bool, |
| } |
| |
| impl Linear { |
| /// Construct a new Linear layer |
| pub fn new(input_size: usize, output_size: usize) -> Linear { |
| Linear { |
| input_size: input_size + 1, |
| output_size: output_size, |
| has_bias: true |
| } |
| } |
| |
| /// Construct a Linear layer without a bias term |
| pub fn without_bias(input_size: usize, output_size: usize) -> Linear { |
| Linear { |
| input_size: input_size, |
| output_size: output_size, |
| has_bias: false |
| } |
| } |
| } |
| |
| fn remove_first_col(mat: Matrix<f64>) -> Matrix<f64> |
| { |
| let rows = mat.rows(); |
| let cols = mat.cols(); |
| let mut data = mat.into_vec(); |
| |
| let len = data.len(); |
| let mut del = 0; |
| { |
| let v = &mut *data; |
| |
| for i in 0..len { |
| if i % cols == 0 { |
| del += 1; |
| } else if del > 0 { |
| v[i - del] = v[i]; |
| } |
| } |
| } |
| if del > 0 { |
| data.truncate(len - del); |
| } |
| Matrix::new(rows, cols - 1, data) |
| } |
| |
| impl NetLayer for Linear { |
| /// Computes a matrix product |
| /// |
| /// input should have dimensions N x I |
| /// where N is the number of samples and I is the dimensionality of the input |
| fn forward(&self, input: &Matrix<f64>, params: MatrixSlice<f64>) -> LearningResult<Matrix<f64>> { |
| if self.has_bias { |
| if input.cols()+1 != params.rows() { |
| Err(Error::new(ErrorKind::InvalidData, "The input had the wrong number of columns")) |
| } else { |
| Ok(&Matrix::ones(input.rows(), 1).hcat(input) * ¶ms) |
| } |
| } else { |
| if input.cols() != params.rows() { |
| Err(Error::new(ErrorKind::InvalidData, "The input had the wrong number of columns")) |
| } else { |
| Ok(input * ¶ms) |
| } |
| } |
| } |
| |
| fn back_input(&self, out_grad: &Matrix<f64>, _: &Matrix<f64>, _: &Matrix<f64>, params: MatrixSlice<f64>) -> Matrix<f64> { |
| debug_assert_eq!(out_grad.cols(), params.cols()); |
| let gradient = out_grad * ¶ms.transpose(); |
| if self.has_bias { |
| remove_first_col(gradient) |
| } else { |
| gradient |
| } |
| } |
| |
| fn back_params(&self, out_grad: &Matrix<f64>, input: &Matrix<f64>, _: &Matrix<f64>, _: MatrixSlice<f64>) -> Matrix<f64> { |
| debug_assert_eq!(input.rows(), out_grad.rows()); |
| if self.has_bias { |
| &Matrix::ones(input.rows(), 1).hcat(input).transpose() * out_grad |
| } else { |
| &input.transpose() * out_grad |
| } |
| } |
| |
| /// Initializes weights using Xavier initialization |
| /// |
| /// weights drawn from gaussian distribution with 0 mean and variance 2/(input_size+output_size) |
| fn default_params(&self) -> Vec<f64> { |
| let mut distro = Normal::new(0.0, (2.0/(self.input_size+self.output_size) as f64).sqrt()); |
| let mut rng = thread_rng(); |
| |
| (0..self.input_size*self.output_size).map(|_| distro.sample(&mut rng)) |
| .collect() |
| } |
| |
| fn param_shape(&self) -> (usize, usize) { |
| (self.input_size, self.output_size) |
| } |
| } |
| |
| impl<T: ActivationFunc> NetLayer for T |
| where T: serde::Serialize { |
| /// Applies the activation function to each element of the input |
| fn forward(&self, input: &Matrix<f64>, _: MatrixSlice<f64>) -> LearningResult<Matrix<f64>> { |
| let mut output = Vec::with_capacity(input.rows()*input.cols()); |
| for val in input.data() { |
| output.push(T::func(*val)); |
| } |
| Ok(Matrix::new(input.rows(), input.cols(), output)) |
| } |
| |
| fn back_input(&self, out_grad: &Matrix<f64>, _: &Matrix<f64>, output: &Matrix<f64>, _: MatrixSlice<f64>) -> Matrix<f64> { |
| let mut in_grad = Vec::with_capacity(output.rows()*output.cols()); |
| for (y, g) in output.data().iter().zip(out_grad.data()) { |
| in_grad.push(T::func_grad_from_output(*y) * g); |
| } |
| Matrix::new(output.rows(), output.cols(), in_grad) |
| } |
| |
| fn back_params(&self, _: &Matrix<f64>, _: &Matrix<f64>, _: &Matrix<f64>, _: MatrixSlice<f64>) -> Matrix<f64> { |
| Matrix::new(0, 0, Vec::new()) |
| } |
| |
| fn default_params(&self) -> Vec<f64> { |
| Vec::new() |
| } |
| |
| fn param_shape(&self) -> (usize, usize) { |
| (0, 0) |
| } |
| } |
| |
| serialize_trait_object!(NetLayer); |