| //! Neural Network module |
| //! |
| //! Contains implementation of simple feed forward neural network. |
| //! |
| //! # Usage |
| //! |
| //! ``` |
| //! use rusty_machine::learning::nnet::{NeuralNet, BCECriterion}; |
| //! use rusty_machine::learning::toolkit::regularization::Regularization; |
| //! use rusty_machine::learning::toolkit::activ_fn::Sigmoid; |
| //! use rusty_machine::learning::optim::grad_desc::StochasticGD; |
| //! use rusty_machine::linalg::Matrix; |
| //! use rusty_machine::learning::SupModel; |
| //! |
| //! let inputs = Matrix::new(5,3, vec![1.,1.,1.,2.,2.,2.,3.,3.,3., |
| //! 4.,4.,4.,5.,5.,5.,]); |
| //! let targets = Matrix::new(5,3, vec![1.,0.,0.,0.,1.,0.,0.,0.,1., |
| //! 0.,0.,1.,0.,0.,1.]); |
| //! |
| //! // Set the layer sizes - from input to output |
| //! let layers = &[3,5,11,7,3]; |
| //! |
| //! // Choose the BCE criterion with L2 regularization (`lambda=0.1`). |
| //! let criterion = BCECriterion::new(Regularization::L2(0.1)); |
| //! |
| //! // We will create a multilayer perceptron and just use the default stochastic gradient descent. |
| //! let mut model = NeuralNet::mlp(layers, criterion, StochasticGD::default(), Sigmoid); |
| //! |
| //! // Train the model! |
| //! model.train(&inputs, &targets).unwrap(); |
| //! |
| //! let test_inputs = Matrix::new(2,3, vec![1.5,1.5,1.5,5.1,5.1,5.1]); |
| //! |
| //! // And predict new output from the test inputs |
| //! let outputs = model.predict(&test_inputs).unwrap(); |
| //! ``` |
| //! |
| //! The neural networks are specified via a criterion - similar to |
| //! [Torch](https://github.com/torch/nn/blob/master/doc/criterion.md). |
| //! The criterions specify a cost function and any regularization. |
| //! |
| //! You can define your own criterion by implementing the `Criterion` |
| //! trait with a concrete `CostFunc`. |
| |
| |
| pub mod net_layer; |
| use std::vec::*; |
| use std::boxed::*; |
| use linalg::{Matrix, MatrixSlice}; |
| use rulinalg::utils; |
| |
| use learning::{LearningResult, SupModel}; |
| use learning::error::{Error, ErrorKind}; |
| use learning::toolkit::activ_fn; |
| use learning::toolkit::activ_fn::ActivationFunc; |
| use learning::toolkit::cost_fn; |
| use learning::toolkit::cost_fn::CostFunc; |
| use learning::toolkit::regularization::Regularization; |
| use learning::optim::{Optimizable, OptimAlgorithm}; |
| use learning::optim::grad_desc::StochasticGD; |
| |
| use self::net_layer::NetLayer; |
| |
| /// Neural Network Model |
| /// |
| /// The Neural Network struct specifies a `Criterion` and |
| /// a gradient descent algorithm. |
| #[derive(Debug)] |
| pub struct NeuralNet<T, A> |
| where T: Criterion, |
| A: OptimAlgorithm<BaseNeuralNet<T>> |
| { |
| base: BaseNeuralNet<T>, |
| alg: A, |
| } |
| |
| /// Supervised learning for the Neural Network. |
| /// |
| /// The model is trained using back propagation. |
| impl<T, A> SupModel<Matrix<f64>, Matrix<f64>> for NeuralNet<T, A> |
| where T: Criterion, |
| A: OptimAlgorithm<BaseNeuralNet<T>> |
| { |
| /// Predict neural network output using forward propagation. |
| fn predict(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> { |
| self.base.forward_prop(inputs) |
| } |
| |
| /// Train the model using gradient optimization and back propagation. |
| fn train(&mut self, inputs: &Matrix<f64>, targets: &Matrix<f64>) -> LearningResult<()> { |
| let optimal_w = self.alg.optimize(&self.base, &self.base.weights, inputs, targets); |
| self.base.weights = optimal_w; |
| Ok(()) |
| } |
| } |
| |
| impl NeuralNet<BCECriterion, StochasticGD> { |
| /// Creates a neural network with the specified layer sizes. |
| /// |
| /// The layer sizes slice should include the input, hidden layers, and output layer sizes. |
| /// The type of activation function must be specified. |
| /// |
| /// Uses the default settings (stochastic gradient descent and sigmoid activation function). |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use rusty_machine::learning::nnet::NeuralNet; |
| /// |
| /// // Create a neural net with 4 layers, 3 neurons in each. |
| /// let layers = &[3; 4]; |
| /// let mut net = NeuralNet::default(layers); |
| /// ``` |
| pub fn default(layer_sizes: &[usize]) -> NeuralNet<BCECriterion, StochasticGD> { |
| NeuralNet { |
| base: BaseNeuralNet::default(layer_sizes, activ_fn::Sigmoid), |
| alg: StochasticGD::default(), |
| } |
| } |
| } |
| |
| impl<T, A> NeuralNet<T, A> |
| where T: Criterion, |
| A: OptimAlgorithm<BaseNeuralNet<T>> |
| { |
| /// Create a new neural network with no layers |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use rusty_machine::learning::nnet::BCECriterion; |
| /// use rusty_machine::learning::nnet::NeuralNet; |
| /// use rusty_machine::learning::optim::grad_desc::StochasticGD; |
| /// |
| /// // Create a an empty neural net |
| /// let mut net = NeuralNet::new(BCECriterion::default(), StochasticGD::default()); |
| /// ``` |
| pub fn new(criterion: T, alg: A) -> NeuralNet<T, A> { |
| NeuralNet { |
| base: BaseNeuralNet::new(criterion), |
| alg: alg, |
| } |
| } |
| |
| /// Create a multilayer perceptron with the specified layer sizes. |
| /// |
| /// The layer sizes slice should include the input, hidden layers, and output layer sizes. |
| /// The type of activation function must be specified. |
| /// |
| /// Currently defaults to simple batch Gradient Descent for optimization. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use rusty_machine::learning::nnet::BCECriterion; |
| /// use rusty_machine::learning::nnet::NeuralNet; |
| /// use rusty_machine::learning::toolkit::activ_fn::Sigmoid; |
| /// use rusty_machine::learning::optim::grad_desc::StochasticGD; |
| /// |
| /// // Create a neural net with 4 layers, 3 neurons in each. |
| /// let layers = &[3; 4]; |
| /// let mut net = NeuralNet::mlp(layers, BCECriterion::default(), StochasticGD::default(), Sigmoid); |
| /// ``` |
| pub fn mlp<U>(layer_sizes: &[usize], criterion: T, alg: A, activ_fn: U) -> NeuralNet<T, A> |
| where U: ActivationFunc + 'static { |
| NeuralNet { |
| base: BaseNeuralNet::mlp(layer_sizes, criterion, activ_fn), |
| alg: alg, |
| } |
| } |
| |
| /// Adds the specified layer to the end of the network |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use rusty_machine::linalg::BaseMatrix; |
| /// use rusty_machine::learning::nnet::BCECriterion; |
| /// use rusty_machine::learning::nnet::NeuralNet; |
| /// use rusty_machine::learning::nnet::net_layer::Linear; |
| /// use rusty_machine::learning::optim::grad_desc::StochasticGD; |
| /// |
| /// // Create a new neural net |
| /// let mut net = NeuralNet::new(BCECriterion::default(), StochasticGD::default()); |
| /// |
| /// // Give net an input layer of size 3, hidden layer of size 4, and output layer of size 5 |
| /// // This net will not apply any activation function to the Linear layer outputs |
| /// net.add(Box::new(Linear::new(3, 4))) |
| /// .add(Box::new(Linear::new(4, 5))); |
| /// ``` |
| pub fn add<'a>(&'a mut self, layer: Box<NetLayer>) -> &'a mut NeuralNet<T, A> { |
| self.base.add(layer); |
| self |
| } |
| |
| /// Adds multiple layers to the end of the network |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use rusty_machine::linalg::BaseMatrix; |
| /// use rusty_machine::learning::nnet::BCECriterion; |
| /// use rusty_machine::learning::nnet::NeuralNet; |
| /// use rusty_machine::learning::nnet::net_layer::{NetLayer, Linear}; |
| /// use rusty_machine::learning::toolkit::activ_fn::Sigmoid; |
| /// use rusty_machine::learning::optim::grad_desc::StochasticGD; |
| /// |
| /// // Create a new neural net |
| /// let mut net = NeuralNet::new(BCECriterion::default(), StochasticGD::default()); |
| /// |
| /// let linear_sig: Vec<Box<NetLayer>> = vec![Box::new(Linear::new(5, 5)), Box::new(Sigmoid)]; |
| /// |
| /// // Give net a layer of size 5, followed by a Sigmoid activation function |
| /// net.add_layers(linear_sig); |
| /// ``` |
| pub fn add_layers<'a, U>(&'a mut self, layers: U) -> &'a mut NeuralNet<T, A> |
| where U: IntoIterator<Item = Box<NetLayer>> { |
| self.base.add_layers(layers); |
| self |
| } |
| |
| /// Gets matrix of weights between specified layer and forward layer. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use rusty_machine::linalg::BaseMatrix; |
| /// use rusty_machine::learning::nnet::NeuralNet; |
| /// |
| /// // Create a neural net with 4 layers, 3 neurons in each. |
| /// let layers = &[3; 4]; |
| /// let mut net = NeuralNet::default(layers); |
| /// |
| /// let w = &net.get_net_weights(2); |
| /// |
| /// // We add a bias term to the weight matrix |
| /// assert_eq!(w.rows(), 4); |
| /// assert_eq!(w.cols(), 3); |
| /// ``` |
| pub fn get_net_weights(&self, idx: usize) -> MatrixSlice<f64> { |
| self.base.get_layer_weights(&self.base.weights[..], idx) |
| } |
| } |
| |
| /// Base Neural Network struct |
| /// |
| /// This struct cannot be instantiated and is used internally only. |
| #[derive(Debug)] |
| pub struct BaseNeuralNet<T: Criterion> { |
| layers: Vec<Box<NetLayer>>, |
| weights: Vec<f64>, |
| criterion: T, |
| } |
| |
| |
| impl BaseNeuralNet<BCECriterion> { |
| /// Creates a base neural network with the specified layer sizes. |
| fn default<U>(layer_sizes: &[usize], activ_fn: U) -> BaseNeuralNet<BCECriterion> |
| where U: ActivationFunc + 'static { |
| BaseNeuralNet::mlp(layer_sizes, BCECriterion::default(), activ_fn) |
| } |
| } |
| |
| |
| impl<T: Criterion> BaseNeuralNet<T> { |
| /// Create a base neural network with no layers |
| fn new(criterion: T) -> BaseNeuralNet<T> { |
| BaseNeuralNet { |
| layers: Vec::new(), |
| weights: Vec::new(), |
| criterion: criterion |
| } |
| } |
| |
| /// Create a multilayer perceptron with the specified layer sizes. |
| fn mlp<U>(layer_sizes: &[usize], criterion: T, activ_fn: U) -> BaseNeuralNet<T> |
| where U: ActivationFunc + 'static { |
| let mut mlp = BaseNeuralNet { |
| layers: Vec::with_capacity(2*(layer_sizes.len()-1)), |
| weights: Vec::new(), |
| criterion: criterion |
| }; |
| for shape in layer_sizes.windows(2) { |
| mlp.add(Box::new(net_layer::Linear::new(shape[0], shape[1]))); |
| mlp.add(Box::new(activ_fn.clone())); |
| } |
| mlp |
| } |
| |
| /// Adds the specified layer to the end of the network |
| fn add<'a>(&'a mut self, layer: Box<NetLayer>) -> &'a mut BaseNeuralNet<T> { |
| self.weights.extend_from_slice(&layer.default_params()); |
| self.layers.push(layer); |
| self |
| } |
| |
| /// Adds multiple layers to the end of the network |
| fn add_layers<'a, U>(&'a mut self, layers: U) -> &'a mut BaseNeuralNet<T> |
| where U: IntoIterator<Item = Box<NetLayer>> |
| { |
| for layer in layers { |
| self.add(layer); |
| } |
| self |
| } |
| |
| /// Gets matrix of weights for the specified layer for the weights. |
| fn get_layer_weights(&self, weights: &[f64], idx: usize) -> MatrixSlice<f64> { |
| debug_assert!(idx < self.layers.len()); |
| |
| // Check that the weights are the right size. |
| let full_size: usize = self.layers.iter().map(|l| l.num_params()).sum(); |
| |
| debug_assert_eq!(full_size, weights.len()); |
| |
| let start: usize = self.layers.iter().take(idx).map(|l| l.num_params()).sum(); |
| |
| let shape = self.layers[idx].param_shape(); |
| unsafe { |
| MatrixSlice::from_raw_parts(weights.as_ptr().offset(start as isize), |
| shape.0, |
| shape.1, |
| shape.1) |
| } |
| } |
| |
| /// Compute the gradient using the back propagation algorithm. |
| fn compute_grad(&self, |
| weights: &[f64], |
| inputs: &Matrix<f64>, |
| targets: &Matrix<f64>) |
| -> (f64, Vec<f64>) { |
| let mut gradients = Vec::with_capacity(weights.len()); |
| unsafe { |
| gradients.set_len(weights.len()); |
| } |
| // activations[i] is the output of layer[i] |
| let mut activations = Vec::with_capacity(self.layers.len()); |
| // params[i] is the weights for layer[i] |
| let mut params = Vec::with_capacity(self.layers.len()); |
| |
| // Forward propagation |
| |
| let mut index = 0; |
| for (i, layer) in self.layers.iter().enumerate() { |
| let shape = layer.param_shape(); |
| |
| let slice = unsafe { |
| MatrixSlice::from_raw_parts(weights.as_ptr().offset(index as isize), |
| shape.0, |
| shape.1, |
| shape.1) |
| }; |
| |
| let output = if i == 0 { |
| layer.forward(inputs, slice).unwrap() |
| } else { |
| layer.forward(activations.last().unwrap(), slice).unwrap() |
| }; |
| |
| activations.push(output); |
| params.push(slice); |
| index += layer.num_params(); |
| } |
| let output = activations.last().unwrap(); |
| |
| // Backward propagation |
| |
| // The gradient with respect to the current layer's output |
| let mut out_grad = self.criterion.cost_grad(output, targets); |
| // at this point index == weights.len() |
| for (i, layer) in self.layers.iter().enumerate().rev() { |
| let activation = if i == 0 {inputs} else {&activations[i-1]}; |
| let result = &activations[i]; |
| index -= layer.num_params(); |
| |
| let grad_params = &mut gradients[index..index+layer.num_params()]; |
| grad_params.copy_from_slice(layer.back_params(&out_grad, activation, result, params[i]).data()); |
| |
| out_grad = layer.back_input(&out_grad, activation, result, params[i]); |
| } |
| |
| let mut cost = self.criterion.cost(output, targets); |
| if self.criterion.is_regularized() { |
| let all_params = unsafe { |
| MatrixSlice::from_raw_parts(weights.as_ptr(), weights.len(), 1, 1) |
| }; |
| utils::in_place_vec_bin_op(&mut gradients, |
| self.criterion.reg_cost_grad(all_params).data(), |
| |x, &y| *x = *x + y); |
| cost += self.criterion.reg_cost(all_params); |
| } |
| (cost, gradients) |
| } |
| |
| /// Forward propagation of the model weights to get the outputs. |
| fn forward_prop(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> { |
| if self.layers.is_empty() { |
| return Ok(inputs.clone()); |
| } |
| |
| let mut outputs = unsafe { |
| let shape = self.layers[0].param_shape(); |
| let slice = MatrixSlice::from_raw_parts(self.weights.as_ptr(), |
| shape.0, |
| shape.1, |
| shape.1); |
| try!(self.layers[0].forward(inputs, slice)) |
| }; |
| |
| let mut index = self.layers[0].num_params(); |
| for layer in self.layers.iter().skip(1) { |
| let shape = layer.param_shape(); |
| |
| let slice = unsafe { |
| MatrixSlice::from_raw_parts(self.weights.as_ptr().offset(index as isize), |
| shape.0, |
| shape.1, |
| shape.1) |
| }; |
| |
| outputs = match layer.forward(&outputs, slice) { |
| Ok(act) => act, |
| Err(_) => {return Err(Error::new(ErrorKind::InvalidParameters, |
| "The network's layers do not line up correctly."))} |
| }; |
| |
| index += layer.num_params(); |
| } |
| Ok(outputs) |
| } |
| } |
| |
| /// Compute the gradient of the Neural Network using the |
| /// back propagation algorithm. |
| impl<T: Criterion> Optimizable for BaseNeuralNet<T> { |
| type Inputs = Matrix<f64>; |
| type Targets = Matrix<f64>; |
| |
| /// Compute the gradient of the neural network. |
| fn compute_grad(&self, |
| params: &[f64], |
| inputs: &Matrix<f64>, |
| targets: &Matrix<f64>) |
| -> (f64, Vec<f64>) { |
| self.compute_grad(params, inputs, targets) |
| } |
| } |
| |
| /// Criterion for Neural Networks |
| /// |
| /// Specifies an activation function and a cost function. |
| pub trait Criterion { |
| /// The cost function for the criterion. |
| type Cost: CostFunc<Matrix<f64>>; |
| |
| /// The cost function. |
| /// |
| /// Returns a scalar cost. |
| fn cost(&self, outputs: &Matrix<f64>, targets: &Matrix<f64>) -> f64 { |
| Self::Cost::cost(outputs, targets) |
| } |
| |
| /// The gradient of the cost function. |
| /// |
| /// Returns a matrix of cost gradients. |
| fn cost_grad(&self, outputs: &Matrix<f64>, targets: &Matrix<f64>) -> Matrix<f64> { |
| Self::Cost::grad_cost(outputs, targets) |
| } |
| |
| /// Returns the regularization for this criterion. |
| /// |
| /// Will return `Regularization::None` by default. |
| fn regularization(&self) -> Regularization<f64> { |
| Regularization::None |
| } |
| |
| /// Checks if the current criterion includes regularization. |
| /// |
| /// Will return `false` by default. |
| fn is_regularized(&self) -> bool { |
| match self.regularization() { |
| Regularization::None => false, |
| _ => true, |
| } |
| } |
| |
| /// Returns the regularization cost for the criterion. |
| /// |
| /// Will return `0` by default. |
| /// |
| /// This method will not be invoked by the neural network |
| /// if there is explicitly no regularization. |
| fn reg_cost(&self, reg_weights: MatrixSlice<f64>) -> f64 { |
| self.regularization().reg_cost(reg_weights) |
| } |
| |
| /// Returns the regularization gradient for the criterion. |
| /// |
| /// Will return a matrix of zeros by default. |
| /// |
| /// This method will not be invoked by the neural network |
| /// if there is explicitly no regularization. |
| fn reg_cost_grad(&self, reg_weights: MatrixSlice<f64>) -> Matrix<f64> { |
| self.regularization().reg_grad(reg_weights) |
| } |
| } |
| |
| /// The binary cross entropy criterion. |
| /// |
| /// Uses the Sigmoid activation function and the |
| /// cross entropy error. |
| #[derive(Clone, Copy, Debug)] |
| pub struct BCECriterion { |
| regularization: Regularization<f64>, |
| } |
| |
| impl Criterion for BCECriterion { |
| type Cost = cost_fn::CrossEntropyError; |
| |
| fn regularization(&self) -> Regularization<f64> { |
| self.regularization |
| } |
| } |
| |
| /// Creates an MSE Criterion without any regularization. |
| impl Default for BCECriterion { |
| fn default() -> Self { |
| BCECriterion { regularization: Regularization::None } |
| } |
| } |
| |
| impl BCECriterion { |
| /// Constructs a new BCECriterion with the given regularization. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use rusty_machine::learning::nnet::BCECriterion; |
| /// use rusty_machine::learning::toolkit::regularization::Regularization; |
| /// |
| /// // Create a new BCE criterion with L2 regularization of 0.3. |
| /// let criterion = BCECriterion::new(Regularization::L2(0.3f64)); |
| /// ``` |
| pub fn new(regularization: Regularization<f64>) -> Self { |
| BCECriterion { regularization: regularization } |
| } |
| } |
| |
| /// The mean squared error criterion. |
| /// |
| /// Uses the Linear activation function and the |
| /// mean squared error. |
| #[derive(Clone, Copy, Debug)] |
| pub struct MSECriterion { |
| regularization: Regularization<f64>, |
| } |
| |
| impl Criterion for MSECriterion { |
| type Cost = cost_fn::MeanSqError; |
| |
| fn regularization(&self) -> Regularization<f64> { |
| self.regularization |
| } |
| } |
| |
| /// Creates an MSE Criterion without any regularization. |
| impl Default for MSECriterion { |
| fn default() -> Self { |
| MSECriterion { regularization: Regularization::None } |
| } |
| } |
| |
| impl MSECriterion { |
| /// Constructs a new BCECriterion with the given regularization. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use rusty_machine::learning::nnet::MSECriterion; |
| /// use rusty_machine::learning::toolkit::regularization::Regularization; |
| /// |
| /// // Create a new MSE criterion with L2 regularization of 0.3. |
| /// let criterion = MSECriterion::new(Regularization::L2(0.3f64)); |
| /// ``` |
| pub fn new(regularization: Regularization<f64>) -> Self { |
| MSECriterion { regularization: regularization } |
| } |
| } |