third_party/rusty-machine/src/learning/nnet/mod.rs - incubator-teaclave-sgx-sdk - Git at Google

 //! Neural Network module
 //!
 //! Contains implementation of simple feed forward neural network.
 //!
 //! # Usage
 //!
 //! ```
 //! use rusty_machine::learning::nnet::{NeuralNet, BCECriterion};
 //! use rusty_machine::learning::toolkit::regularization::Regularization;
 //! use rusty_machine::learning::toolkit::activ_fn::Sigmoid;
 //! use rusty_machine::learning::optim::grad_desc::StochasticGD;
 //! use rusty_machine::linalg::Matrix;
 //! use rusty_machine::learning::SupModel;
 //!
 //! let inputs = Matrix::new(5,3, vec![1.,1.,1.,2.,2.,2.,3.,3.,3.,
 //!                                 4.,4.,4.,5.,5.,5.,]);
 //! let targets = Matrix::new(5,3, vec![1.,0.,0.,0.,1.,0.,0.,0.,1.,
 //!                                     0.,0.,1.,0.,0.,1.]);
 //!
 //! // Set the layer sizes - from input to output
 //! let layers = &[3,5,11,7,3];
 //!
 //! // Choose the BCE criterion with L2 regularization (`lambda=0.1`).
 //! let criterion = BCECriterion::new(Regularization::L2(0.1));
 //!
 //! // We will create a multilayer perceptron and just use the default stochastic gradient descent.
 //! let mut model = NeuralNet::mlp(layers, criterion, StochasticGD::default(), Sigmoid);
 //!
 //! // Train the model!
 //! model.train(&inputs, &targets).unwrap();
 //!
 //! let test_inputs = Matrix::new(2,3, vec![1.5,1.5,1.5,5.1,5.1,5.1]);
 //!
 //! // And predict new output from the test inputs
 //! let outputs = model.predict(&test_inputs).unwrap();
 //! ```
 //!
 //! The neural networks are specified via a criterion - similar to
 //! [Torch](https://github.com/torch/nn/blob/master/doc/criterion.md).
 //! The criterions specify a cost function and any regularization.
 //!
 //! You can define your own criterion by implementing the `Criterion`
 //! trait with a concrete `CostFunc`.


 pub mod net_layer;
 use std::vec::*;
 use std::boxed::*;
 use linalg::{Matrix, MatrixSlice};
 use rulinalg::utils;

 use learning::{LearningResult, SupModel};
 use learning::error::{Error, ErrorKind};
 use learning::toolkit::activ_fn;
 use learning::toolkit::activ_fn::ActivationFunc;
 use learning::toolkit::cost_fn;
 use learning::toolkit::cost_fn::CostFunc;
 use learning::toolkit::regularization::Regularization;
 use learning::optim::{Optimizable, OptimAlgorithm};
 use learning::optim::grad_desc::StochasticGD;

 use self::net_layer::NetLayer;

 /// Neural Network Model
 ///
 /// The Neural Network struct specifies a `Criterion` and
 /// a gradient descent algorithm.
 #[derive(Debug)]
 pub struct NeuralNet<T, A>
     where T: Criterion,
           A: OptimAlgorithm<BaseNeuralNet<T>>
 {
     base: BaseNeuralNet<T>,
     alg: A,
 }

 /// Supervised learning for the Neural Network.
 ///
 /// The model is trained using back propagation.
 impl<T, A> SupModel<Matrix<f64>, Matrix<f64>> for NeuralNet<T, A>
     where T: Criterion,
           A: OptimAlgorithm<BaseNeuralNet<T>>
 {
     /// Predict neural network output using forward propagation.
     fn predict(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> {
         self.base.forward_prop(inputs)
     }

     /// Train the model using gradient optimization and back propagation.
     fn train(&mut self, inputs: &Matrix<f64>, targets: &Matrix<f64>) -> LearningResult<()> {
         let optimal_w = self.alg.optimize(&self.base, &self.base.weights, inputs, targets);
         self.base.weights = optimal_w;
         Ok(())
     }
 }

 impl NeuralNet<BCECriterion, StochasticGD> {
     /// Creates a neural network with the specified layer sizes.
     ///
     /// The layer sizes slice should include the input, hidden layers, and output layer sizes.
     /// The type of activation function must be specified.
     ///
     /// Uses the default settings (stochastic gradient descent and sigmoid activation function).
     ///
     /// # Examples
     ///
     /// ```
     /// use rusty_machine::learning::nnet::NeuralNet;
     ///
     /// // Create a neural net with 4 layers, 3 neurons in each.
     /// let layers = &[3; 4];
     /// let mut net = NeuralNet::default(layers);
     /// ```
     pub fn default(layer_sizes: &[usize]) -> NeuralNet<BCECriterion, StochasticGD> {
         NeuralNet {
             base: BaseNeuralNet::default(layer_sizes, activ_fn::Sigmoid),
             alg: StochasticGD::default(),
         }
     }
 }

 impl<T, A> NeuralNet<T, A>
     where T: Criterion,
           A: OptimAlgorithm<BaseNeuralNet<T>>
 {
     /// Create a new neural network with no layers
     ///
     /// # Examples
     ///
     /// ```
     /// use rusty_machine::learning::nnet::BCECriterion;
     /// use rusty_machine::learning::nnet::NeuralNet;
     /// use rusty_machine::learning::optim::grad_desc::StochasticGD;
     ///
     /// // Create a an empty neural net
     /// let mut net = NeuralNet::new(BCECriterion::default(), StochasticGD::default());
     /// ```
     pub fn new(criterion: T, alg: A) -> NeuralNet<T, A> {
         NeuralNet {
             base: BaseNeuralNet::new(criterion),
             alg: alg,
         }
     }

     /// Create a multilayer perceptron with the specified layer sizes.
     ///
     /// The layer sizes slice should include the input, hidden layers, and output layer sizes.
     /// The type of activation function must be specified.
     ///
     /// Currently defaults to simple batch Gradient Descent for optimization.
     ///
     /// # Examples
     ///
     /// ```
     /// use rusty_machine::learning::nnet::BCECriterion;
     /// use rusty_machine::learning::nnet::NeuralNet;
     /// use rusty_machine::learning::toolkit::activ_fn::Sigmoid;
     /// use rusty_machine::learning::optim::grad_desc::StochasticGD;
     ///
     /// // Create a neural net with 4 layers, 3 neurons in each.
     /// let layers = &[3; 4];
     /// let mut net = NeuralNet::mlp(layers, BCECriterion::default(), StochasticGD::default(), Sigmoid);
     /// ```
     pub fn mlp<U>(layer_sizes: &[usize], criterion: T, alg: A, activ_fn: U) -> NeuralNet<T, A>
         where U: ActivationFunc + 'static {
         NeuralNet {
             base: BaseNeuralNet::mlp(layer_sizes, criterion, activ_fn),
             alg: alg,
         }
     }

     /// Adds the specified layer to the end of the network
     ///
     /// # Examples
     ///
     /// ```
     /// use rusty_machine::linalg::BaseMatrix;
     /// use rusty_machine::learning::nnet::BCECriterion;
     /// use rusty_machine::learning::nnet::NeuralNet;
     /// use rusty_machine::learning::nnet::net_layer::Linear;
     /// use rusty_machine::learning::optim::grad_desc::StochasticGD;
     ///
     /// // Create a new neural net
     /// let mut net = NeuralNet::new(BCECriterion::default(), StochasticGD::default());
     ///
     /// // Give net an input layer of size 3, hidden layer of size 4, and output layer of size 5
     /// // This net will not apply any activation function to the Linear layer outputs
     /// net.add(Box::new(Linear::new(3, 4)))
     ///    .add(Box::new(Linear::new(4, 5)));
     /// ```
     pub fn add<'a>(&'a mut self, layer: Box<NetLayer>) -> &'a mut NeuralNet<T, A> {
         self.base.add(layer);
         self
     }

     /// Adds multiple layers to the end of the network
     ///
     /// # Examples
     ///
     /// ```
     /// use rusty_machine::linalg::BaseMatrix;
     /// use rusty_machine::learning::nnet::BCECriterion;
     /// use rusty_machine::learning::nnet::NeuralNet;
     /// use rusty_machine::learning::nnet::net_layer::{NetLayer, Linear};
     /// use rusty_machine::learning::toolkit::activ_fn::Sigmoid;
     /// use rusty_machine::learning::optim::grad_desc::StochasticGD;
     ///
     /// // Create a new neural net
     /// let mut net = NeuralNet::new(BCECriterion::default(), StochasticGD::default());
     ///
     /// let linear_sig: Vec<Box<NetLayer>> = vec![Box::new(Linear::new(5, 5)), Box::new(Sigmoid)];
     ///
     /// // Give net a layer of size 5, followed by a Sigmoid activation function
     /// net.add_layers(linear_sig);
     /// ```
     pub fn add_layers<'a, U>(&'a mut self, layers: U) -> &'a mut NeuralNet<T, A>
         where U: IntoIterator<Item = Box<NetLayer>> {
             self.base.add_layers(layers);
             self
     }

     /// Gets matrix of weights between specified layer and forward layer.
     ///
     /// # Examples
     ///
     /// ```
     /// use rusty_machine::linalg::BaseMatrix;
     /// use rusty_machine::learning::nnet::NeuralNet;
     ///
     /// // Create a neural net with 4 layers, 3 neurons in each.
     /// let layers = &[3; 4];
     /// let mut net = NeuralNet::default(layers);
     ///
     /// let w = &net.get_net_weights(2);
     ///
     /// // We add a bias term to the weight matrix
     /// assert_eq!(w.rows(), 4);
     /// assert_eq!(w.cols(), 3);
     /// ```
     pub fn get_net_weights(&self, idx: usize) -> MatrixSlice<f64> {
         self.base.get_layer_weights(&self.base.weights[..], idx)
     }
 }

 /// Base Neural Network struct
 ///
 /// This struct cannot be instantiated and is used internally only.
 #[derive(Debug)]
 pub struct BaseNeuralNet<T: Criterion> {
     layers: Vec<Box<NetLayer>>,
     weights: Vec<f64>,
     criterion: T,
 }


 impl BaseNeuralNet<BCECriterion> {
     /// Creates a base neural network with the specified layer sizes.
     fn default<U>(layer_sizes: &[usize], activ_fn: U) -> BaseNeuralNet<BCECriterion>
         where U: ActivationFunc + 'static {
         BaseNeuralNet::mlp(layer_sizes, BCECriterion::default(), activ_fn)
     }
 }


 impl<T: Criterion> BaseNeuralNet<T> {
     /// Create a base neural network with no layers
     fn new(criterion: T) -> BaseNeuralNet<T> {
         BaseNeuralNet {
             layers: Vec::new(),
             weights: Vec::new(),
             criterion: criterion
         }
     }

     /// Create a multilayer perceptron with the specified layer sizes.
     fn mlp<U>(layer_sizes: &[usize], criterion: T, activ_fn: U) -> BaseNeuralNet<T>
         where U: ActivationFunc + 'static {
         let mut mlp = BaseNeuralNet {
             layers: Vec::with_capacity(2*(layer_sizes.len()-1)),
             weights: Vec::new(),
             criterion: criterion
         };
         for shape in layer_sizes.windows(2) {
             mlp.add(Box::new(net_layer::Linear::new(shape[0], shape[1])));
             mlp.add(Box::new(activ_fn.clone()));
         }
         mlp
     }

     /// Adds the specified layer to the end of the network
     fn add<'a>(&'a mut self, layer: Box<NetLayer>) -> &'a mut BaseNeuralNet<T> {
         self.weights.extend_from_slice(&layer.default_params());
         self.layers.push(layer);
         self
     }

     /// Adds multiple layers to the end of the network
     fn add_layers<'a, U>(&'a mut self, layers: U) -> &'a mut BaseNeuralNet<T>
         where U: IntoIterator<Item = Box<NetLayer>>
     {
         for layer in layers {
             self.add(layer);
         }
         self
     }

     /// Gets matrix of weights for the specified layer for the weights.
     fn get_layer_weights(&self, weights: &[f64], idx: usize) -> MatrixSlice<f64> {
         debug_assert!(idx < self.layers.len());

         // Check that the weights are the right size.
         let full_size: usize = self.layers.iter().map(|l| l.num_params()).sum();

         debug_assert_eq!(full_size, weights.len());

         let start: usize = self.layers.iter().take(idx).map(|l| l.num_params()).sum();

         let shape = self.layers[idx].param_shape();
         unsafe {
             MatrixSlice::from_raw_parts(weights.as_ptr().offset(start as isize),
                                         shape.0,
                                         shape.1,
                                         shape.1)
         }
     }

     /// Compute the gradient using the back propagation algorithm.
     fn compute_grad(&self,
                     weights: &[f64],
                     inputs: &Matrix<f64>,
                     targets: &Matrix<f64>)
                     -> (f64, Vec<f64>) {
         let mut gradients = Vec::with_capacity(weights.len());
         unsafe {
             gradients.set_len(weights.len());
         }
         // activations[i] is the output of layer[i]
         let mut activations = Vec::with_capacity(self.layers.len());
         // params[i] is the weights for layer[i]
         let mut params = Vec::with_capacity(self.layers.len());

         // Forward propagation

         let mut index = 0;
         for (i, layer) in self.layers.iter().enumerate() {
             let shape = layer.param_shape();

             let slice = unsafe {
                 MatrixSlice::from_raw_parts(weights.as_ptr().offset(index as isize),
                                             shape.0,
                                             shape.1,
                                             shape.1)
             };

             let output = if i == 0 {
                 layer.forward(inputs, slice).unwrap()
             } else {
                 layer.forward(activations.last().unwrap(), slice).unwrap()
             };

             activations.push(output);
             params.push(slice);
             index += layer.num_params();
         }
         let output = activations.last().unwrap();

         // Backward propagation

         // The gradient with respect to the current layer's output
         let mut out_grad = self.criterion.cost_grad(output, targets);
         // at this point index == weights.len()
         for (i, layer) in self.layers.iter().enumerate().rev() {
             let activation = if i == 0 {inputs} else {&activations[i-1]};
             let result = &activations[i];
             index -= layer.num_params();

             let grad_params = &mut gradients[index..index+layer.num_params()];
             grad_params.copy_from_slice(layer.back_params(&out_grad, activation, result, params[i]).data());

             out_grad = layer.back_input(&out_grad, activation, result, params[i]);
         }

         let mut cost = self.criterion.cost(output, targets);
         if self.criterion.is_regularized() {
             let all_params = unsafe {
                 MatrixSlice::from_raw_parts(weights.as_ptr(), weights.len(), 1, 1)
             };
             utils::in_place_vec_bin_op(&mut gradients,
                                        self.criterion.reg_cost_grad(all_params).data(),
                                        |x, &y| *x = *x + y);
             cost += self.criterion.reg_cost(all_params);
         }
         (cost, gradients)
     }

     /// Forward propagation of the model weights to get the outputs.
     fn forward_prop(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> {
         if self.layers.is_empty() {
             return Ok(inputs.clone());
         }

         let mut outputs = unsafe {
             let shape = self.layers[0].param_shape();
             let slice = MatrixSlice::from_raw_parts(self.weights.as_ptr(),
                                                     shape.0,
                                                     shape.1,
                                                     shape.1);
             try!(self.layers[0].forward(inputs, slice))
         };

         let mut index = self.layers[0].num_params();
         for layer in self.layers.iter().skip(1) {
             let shape = layer.param_shape();

             let slice = unsafe {
                 MatrixSlice::from_raw_parts(self.weights.as_ptr().offset(index as isize),
                                             shape.0,
                                             shape.1,
                                             shape.1)
             };

             outputs = match layer.forward(&outputs, slice) {
                 Ok(act) => act,
                 Err(_) => {return Err(Error::new(ErrorKind::InvalidParameters,
                     "The network's layers do not line up correctly."))}
             };

             index += layer.num_params();
         }
         Ok(outputs)
     }
 }

 /// Compute the gradient of the Neural Network using the
 /// back propagation algorithm.
 impl<T: Criterion> Optimizable for BaseNeuralNet<T> {
     type Inputs = Matrix<f64>;
     type Targets = Matrix<f64>;

     /// Compute the gradient of the neural network.
     fn compute_grad(&self,
                     params: &[f64],
                     inputs: &Matrix<f64>,
                     targets: &Matrix<f64>)
                     -> (f64, Vec<f64>) {
         self.compute_grad(params, inputs, targets)
     }
 }

 /// Criterion for Neural Networks
 ///
 /// Specifies an activation function and a cost function.
 pub trait Criterion {
     /// The cost function for the criterion.
     type Cost: CostFunc<Matrix<f64>>;

     /// The cost function.
     ///
     /// Returns a scalar cost.
     fn cost(&self, outputs: &Matrix<f64>, targets: &Matrix<f64>) -> f64 {
         Self::Cost::cost(outputs, targets)
     }

     /// The gradient of the cost function.
     ///
     /// Returns a matrix of cost gradients.
     fn cost_grad(&self, outputs: &Matrix<f64>, targets: &Matrix<f64>) -> Matrix<f64> {
         Self::Cost::grad_cost(outputs, targets)
     }

     /// Returns the regularization for this criterion.
     ///
     /// Will return `Regularization::None` by default.
     fn regularization(&self) -> Regularization<f64> {
         Regularization::None
     }

     /// Checks if the current criterion includes regularization.
     ///
     /// Will return `false` by default.
     fn is_regularized(&self) -> bool {
         match self.regularization() {
             Regularization::None => false,
             _ => true,
         }
     }

     /// Returns the regularization cost for the criterion.
     ///
     /// Will return `0` by default.
     ///
     /// This method will not be invoked by the neural network
     /// if there is explicitly no regularization.
     fn reg_cost(&self, reg_weights: MatrixSlice<f64>) -> f64 {
         self.regularization().reg_cost(reg_weights)
     }

     /// Returns the regularization gradient for the criterion.
     ///
     /// Will return a matrix of zeros by default.
     ///
     /// This method will not be invoked by the neural network
     /// if there is explicitly no regularization.
     fn reg_cost_grad(&self, reg_weights: MatrixSlice<f64>) -> Matrix<f64> {
         self.regularization().reg_grad(reg_weights)
     }
 }

 /// The binary cross entropy criterion.
 ///
 /// Uses the Sigmoid activation function and the
 /// cross entropy error.
 #[derive(Clone, Copy, Debug)]
 pub struct BCECriterion {
     regularization: Regularization<f64>,
 }

 impl Criterion for BCECriterion {
     type Cost = cost_fn::CrossEntropyError;

     fn regularization(&self) -> Regularization<f64> {
         self.regularization
     }
 }

 /// Creates an MSE Criterion without any regularization.
 impl Default for BCECriterion {
     fn default() -> Self {
         BCECriterion { regularization: Regularization::None }
     }
 }

 impl BCECriterion {
     /// Constructs a new BCECriterion with the given regularization.
     ///
     /// # Examples
     ///
     /// ```
     /// use rusty_machine::learning::nnet::BCECriterion;
     /// use rusty_machine::learning::toolkit::regularization::Regularization;
     ///
     /// // Create a new BCE criterion with L2 regularization of 0.3.
     /// let criterion = BCECriterion::new(Regularization::L2(0.3f64));
     /// ```
     pub fn new(regularization: Regularization<f64>) -> Self {
         BCECriterion { regularization: regularization }
     }
 }

 /// The mean squared error criterion.
 ///
 /// Uses the Linear activation function and the
 /// mean squared error.
 #[derive(Clone, Copy, Debug)]
 pub struct MSECriterion {
     regularization: Regularization<f64>,
 }

 impl Criterion for MSECriterion {
     type Cost = cost_fn::MeanSqError;

     fn regularization(&self) -> Regularization<f64> {
         self.regularization
     }
 }

 /// Creates an MSE Criterion without any regularization.
 impl Default for MSECriterion {
     fn default() -> Self {
         MSECriterion { regularization: Regularization::None }
     }
 }

 impl MSECriterion {
     /// Constructs a new BCECriterion with the given regularization.
     ///
     /// # Examples
     ///
     /// ```
     /// use rusty_machine::learning::nnet::MSECriterion;
     /// use rusty_machine::learning::toolkit::regularization::Regularization;
     ///
     /// // Create a new MSE criterion with L2 regularization of 0.3.
     /// let criterion = MSECriterion::new(Regularization::L2(0.3f64));
     /// ```
     pub fn new(regularization: Regularization<f64>) -> Self {
         MSECriterion { regularization: regularization }
     }
 }
	//! Neural Network module
	//!
	//! Contains implementation of simple feed forward neural network.
	//!
	//! # Usage
	//!
	//! ```
	//! use rusty_machine::learning::nnet::{NeuralNet, BCECriterion};
	//! use rusty_machine::learning::toolkit::regularization::Regularization;
	//! use rusty_machine::learning::toolkit::activ_fn::Sigmoid;
	//! use rusty_machine::learning::optim::grad_desc::StochasticGD;
	//! use rusty_machine::linalg::Matrix;
	//! use rusty_machine::learning::SupModel;
	//!
	//! let inputs = Matrix::new(5,3, vec![1.,1.,1.,2.,2.,2.,3.,3.,3.,
	//! 4.,4.,4.,5.,5.,5.,]);
	//! let targets = Matrix::new(5,3, vec![1.,0.,0.,0.,1.,0.,0.,0.,1.,
	//! 0.,0.,1.,0.,0.,1.]);
	//!
	//! // Set the layer sizes - from input to output
	//! let layers = &[3,5,11,7,3];
	//!
	//! // Choose the BCE criterion with L2 regularization (`lambda=0.1`).
	//! let criterion = BCECriterion::new(Regularization::L2(0.1));
	//!
	//! // We will create a multilayer perceptron and just use the default stochastic gradient descent.
	//! let mut model = NeuralNet::mlp(layers, criterion, StochasticGD::default(), Sigmoid);
	//!
	//! // Train the model!
	//! model.train(&inputs, &targets).unwrap();
	//!
	//! let test_inputs = Matrix::new(2,3, vec![1.5,1.5,1.5,5.1,5.1,5.1]);
	//!
	//! // And predict new output from the test inputs
	//! let outputs = model.predict(&test_inputs).unwrap();
	//! ```
	//!
	//! The neural networks are specified via a criterion - similar to
	//! [Torch](https://github.com/torch/nn/blob/master/doc/criterion.md).
	//! The criterions specify a cost function and any regularization.
	//!
	//! You can define your own criterion by implementing the `Criterion`
	//! trait with a concrete `CostFunc`.


	pub mod net_layer;
	use std::vec::*;
	use std::boxed::*;
	use linalg::{Matrix, MatrixSlice};
	use rulinalg::utils;

	use learning::{LearningResult, SupModel};
	use learning::error::{Error, ErrorKind};
	use learning::toolkit::activ_fn;
	use learning::toolkit::activ_fn::ActivationFunc;
	use learning::toolkit::cost_fn;
	use learning::toolkit::cost_fn::CostFunc;
	use learning::toolkit::regularization::Regularization;
	use learning::optim::{Optimizable, OptimAlgorithm};
	use learning::optim::grad_desc::StochasticGD;

	use self::net_layer::NetLayer;

	/// Neural Network Model
	///
	/// The Neural Network struct specifies a `Criterion` and
	/// a gradient descent algorithm.
	#[derive(Debug)]
	pub struct NeuralNet<T, A>
	where T: Criterion,
	A: OptimAlgorithm<BaseNeuralNet<T>>
	{
	base: BaseNeuralNet<T>,
	alg: A,
	}

	/// Supervised learning for the Neural Network.
	///
	/// The model is trained using back propagation.
	impl<T, A> SupModel<Matrix<f64>, Matrix<f64>> for NeuralNet<T, A>
	where T: Criterion,
	A: OptimAlgorithm<BaseNeuralNet<T>>
	{
	/// Predict neural network output using forward propagation.
	fn predict(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> {
	self.base.forward_prop(inputs)
	}

	/// Train the model using gradient optimization and back propagation.
	fn train(&mut self, inputs: &Matrix<f64>, targets: &Matrix<f64>) -> LearningResult<()> {
	let optimal_w = self.alg.optimize(&self.base, &self.base.weights, inputs, targets);
	self.base.weights = optimal_w;
	Ok(())
	}
	}

	impl NeuralNet<BCECriterion, StochasticGD> {
	/// Creates a neural network with the specified layer sizes.
	///
	/// The layer sizes slice should include the input, hidden layers, and output layer sizes.
	/// The type of activation function must be specified.
	///
	/// Uses the default settings (stochastic gradient descent and sigmoid activation function).
	///
	/// # Examples
	///
	/// ```
	/// use rusty_machine::learning::nnet::NeuralNet;
	///
	/// // Create a neural net with 4 layers, 3 neurons in each.
	/// let layers = &[3; 4];
	/// let mut net = NeuralNet::default(layers);
	/// ```
	pub fn default(layer_sizes: &[usize]) -> NeuralNet<BCECriterion, StochasticGD> {
	NeuralNet {
	base: BaseNeuralNet::default(layer_sizes, activ_fn::Sigmoid),
	alg: StochasticGD::default(),
	}
	}
	}

	impl<T, A> NeuralNet<T, A>
	where T: Criterion,
	A: OptimAlgorithm<BaseNeuralNet<T>>
	{
	/// Create a new neural network with no layers
	///
	/// # Examples
	///
	/// ```
	/// use rusty_machine::learning::nnet::BCECriterion;
	/// use rusty_machine::learning::nnet::NeuralNet;
	/// use rusty_machine::learning::optim::grad_desc::StochasticGD;
	///
	/// // Create a an empty neural net
	/// let mut net = NeuralNet::new(BCECriterion::default(), StochasticGD::default());
	/// ```
	pub fn new(criterion: T, alg: A) -> NeuralNet<T, A> {
	NeuralNet {
	base: BaseNeuralNet::new(criterion),
	alg: alg,
	}
	}

	/// Create a multilayer perceptron with the specified layer sizes.
	///
	/// The layer sizes slice should include the input, hidden layers, and output layer sizes.
	/// The type of activation function must be specified.
	///
	/// Currently defaults to simple batch Gradient Descent for optimization.
	///
	/// # Examples
	///
	/// ```
	/// use rusty_machine::learning::nnet::BCECriterion;
	/// use rusty_machine::learning::nnet::NeuralNet;
	/// use rusty_machine::learning::toolkit::activ_fn::Sigmoid;
	/// use rusty_machine::learning::optim::grad_desc::StochasticGD;
	///
	/// // Create a neural net with 4 layers, 3 neurons in each.
	/// let layers = &[3; 4];
	/// let mut net = NeuralNet::mlp(layers, BCECriterion::default(), StochasticGD::default(), Sigmoid);
	/// ```
	pub fn mlp<U>(layer_sizes: &[usize], criterion: T, alg: A, activ_fn: U) -> NeuralNet<T, A>
	where U: ActivationFunc + 'static {
	NeuralNet {
	base: BaseNeuralNet::mlp(layer_sizes, criterion, activ_fn),
	alg: alg,
	}
	}

	/// Adds the specified layer to the end of the network
	///
	/// # Examples
	///
	/// ```
	/// use rusty_machine::linalg::BaseMatrix;
	/// use rusty_machine::learning::nnet::BCECriterion;
	/// use rusty_machine::learning::nnet::NeuralNet;
	/// use rusty_machine::learning::nnet::net_layer::Linear;
	/// use rusty_machine::learning::optim::grad_desc::StochasticGD;
	///
	/// // Create a new neural net
	/// let mut net = NeuralNet::new(BCECriterion::default(), StochasticGD::default());
	///
	/// // Give net an input layer of size 3, hidden layer of size 4, and output layer of size 5
	/// // This net will not apply any activation function to the Linear layer outputs
	/// net.add(Box::new(Linear::new(3, 4)))
	/// .add(Box::new(Linear::new(4, 5)));
	/// ```
	pub fn add<'a>(&'a mut self, layer: Box<NetLayer>) -> &'a mut NeuralNet<T, A> {
	self.base.add(layer);
	self
	}

	/// Adds multiple layers to the end of the network
	///
	/// # Examples
	///
	/// ```
	/// use rusty_machine::linalg::BaseMatrix;
	/// use rusty_machine::learning::nnet::BCECriterion;
	/// use rusty_machine::learning::nnet::NeuralNet;
	/// use rusty_machine::learning::nnet::net_layer::{NetLayer, Linear};
	/// use rusty_machine::learning::toolkit::activ_fn::Sigmoid;
	/// use rusty_machine::learning::optim::grad_desc::StochasticGD;
	///
	/// // Create a new neural net
	/// let mut net = NeuralNet::new(BCECriterion::default(), StochasticGD::default());
	///
	/// let linear_sig: Vec<Box<NetLayer>> = vec![Box::new(Linear::new(5, 5)), Box::new(Sigmoid)];
	///
	/// // Give net a layer of size 5, followed by a Sigmoid activation function
	/// net.add_layers(linear_sig);
	/// ```
	pub fn add_layers<'a, U>(&'a mut self, layers: U) -> &'a mut NeuralNet<T, A>
	where U: IntoIterator<Item = Box<NetLayer>> {
	self.base.add_layers(layers);
	self
	}

	/// Gets matrix of weights between specified layer and forward layer.
	///
	/// # Examples
	///
	/// ```
	/// use rusty_machine::linalg::BaseMatrix;
	/// use rusty_machine::learning::nnet::NeuralNet;
	///
	/// // Create a neural net with 4 layers, 3 neurons in each.
	/// let layers = &[3; 4];
	/// let mut net = NeuralNet::default(layers);
	///
	/// let w = &net.get_net_weights(2);
	///
	/// // We add a bias term to the weight matrix
	/// assert_eq!(w.rows(), 4);
	/// assert_eq!(w.cols(), 3);
	/// ```
	pub fn get_net_weights(&self, idx: usize) -> MatrixSlice<f64> {
	self.base.get_layer_weights(&self.base.weights[..], idx)
	}
	}

	/// Base Neural Network struct
	///
	/// This struct cannot be instantiated and is used internally only.
	#[derive(Debug)]
	pub struct BaseNeuralNet<T: Criterion> {
	layers: Vec<Box<NetLayer>>,
	weights: Vec<f64>,
	criterion: T,
	}


	impl BaseNeuralNet<BCECriterion> {
	/// Creates a base neural network with the specified layer sizes.
	fn default<U>(layer_sizes: &[usize], activ_fn: U) -> BaseNeuralNet<BCECriterion>
	where U: ActivationFunc + 'static {
	BaseNeuralNet::mlp(layer_sizes, BCECriterion::default(), activ_fn)
	}
	}


	impl<T: Criterion> BaseNeuralNet<T> {
	/// Create a base neural network with no layers
	fn new(criterion: T) -> BaseNeuralNet<T> {
	BaseNeuralNet {
	layers: Vec::new(),
	weights: Vec::new(),
	criterion: criterion
	}
	}

	/// Create a multilayer perceptron with the specified layer sizes.
	fn mlp<U>(layer_sizes: &[usize], criterion: T, activ_fn: U) -> BaseNeuralNet<T>
	where U: ActivationFunc + 'static {
	let mut mlp = BaseNeuralNet {
	layers: Vec::with_capacity(2*(layer_sizes.len()-1)),
	weights: Vec::new(),
	criterion: criterion
	};
	for shape in layer_sizes.windows(2) {
	mlp.add(Box::new(net_layer::Linear::new(shape[0], shape[1])));
	mlp.add(Box::new(activ_fn.clone()));
	}
	mlp
	}

	/// Adds the specified layer to the end of the network
	fn add<'a>(&'a mut self, layer: Box<NetLayer>) -> &'a mut BaseNeuralNet<T> {
	self.weights.extend_from_slice(&layer.default_params());
	self.layers.push(layer);
	self
	}

	/// Adds multiple layers to the end of the network
	fn add_layers<'a, U>(&'a mut self, layers: U) -> &'a mut BaseNeuralNet<T>
	where U: IntoIterator<Item = Box<NetLayer>>
	{
	for layer in layers {
	self.add(layer);
	}
	self
	}

	/// Gets matrix of weights for the specified layer for the weights.
	fn get_layer_weights(&self, weights: &[f64], idx: usize) -> MatrixSlice<f64> {
	debug_assert!(idx < self.layers.len());

	// Check that the weights are the right size.
	let full_size: usize = self.layers.iter().map(\|l\| l.num_params()).sum();

	debug_assert_eq!(full_size, weights.len());

	let start: usize = self.layers.iter().take(idx).map(\|l\| l.num_params()).sum();

	let shape = self.layers[idx].param_shape();
	unsafe {
	MatrixSlice::from_raw_parts(weights.as_ptr().offset(start as isize),
	shape.0,
	shape.1,
	shape.1)
	}
	}

	/// Compute the gradient using the back propagation algorithm.
	fn compute_grad(&self,
	weights: &[f64],
	inputs: &Matrix<f64>,
	targets: &Matrix<f64>)
	-> (f64, Vec<f64>) {
	let mut gradients = Vec::with_capacity(weights.len());
	unsafe {
	gradients.set_len(weights.len());
	}
	// activations[i] is the output of layer[i]
	let mut activations = Vec::with_capacity(self.layers.len());
	// params[i] is the weights for layer[i]
	let mut params = Vec::with_capacity(self.layers.len());

	// Forward propagation

	let mut index = 0;
	for (i, layer) in self.layers.iter().enumerate() {
	let shape = layer.param_shape();

	let slice = unsafe {
	MatrixSlice::from_raw_parts(weights.as_ptr().offset(index as isize),
	shape.0,
	shape.1,
	shape.1)
	};

	let output = if i == 0 {
	layer.forward(inputs, slice).unwrap()
	} else {
	layer.forward(activations.last().unwrap(), slice).unwrap()
	};

	activations.push(output);
	params.push(slice);
	index += layer.num_params();
	}
	let output = activations.last().unwrap();

	// Backward propagation

	// The gradient with respect to the current layer's output
	let mut out_grad = self.criterion.cost_grad(output, targets);
	// at this point index == weights.len()
	for (i, layer) in self.layers.iter().enumerate().rev() {
	let activation = if i == 0 {inputs} else {&activations[i-1]};
	let result = &activations[i];
	index -= layer.num_params();

	let grad_params = &mut gradients[index..index+layer.num_params()];
	grad_params.copy_from_slice(layer.back_params(&out_grad, activation, result, params[i]).data());

	out_grad = layer.back_input(&out_grad, activation, result, params[i]);
	}

	let mut cost = self.criterion.cost(output, targets);
	if self.criterion.is_regularized() {
	let all_params = unsafe {
	MatrixSlice::from_raw_parts(weights.as_ptr(), weights.len(), 1, 1)
	};
	utils::in_place_vec_bin_op(&mut gradients,
	self.criterion.reg_cost_grad(all_params).data(),
	\|x, &y\| x = x + y);
	cost += self.criterion.reg_cost(all_params);
	}
	(cost, gradients)
	}

	/// Forward propagation of the model weights to get the outputs.
	fn forward_prop(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> {
	if self.layers.is_empty() {
	return Ok(inputs.clone());
	}

	let mut outputs = unsafe {
	let shape = self.layers[0].param_shape();
	let slice = MatrixSlice::from_raw_parts(self.weights.as_ptr(),
	shape.0,
	shape.1,
	shape.1);
	try!(self.layers[0].forward(inputs, slice))
	};

	let mut index = self.layers[0].num_params();
	for layer in self.layers.iter().skip(1) {
	let shape = layer.param_shape();

	let slice = unsafe {
	MatrixSlice::from_raw_parts(self.weights.as_ptr().offset(index as isize),
	shape.0,
	shape.1,
	shape.1)
	};

	outputs = match layer.forward(&outputs, slice) {
	Ok(act) => act,
	Err(_) => {return Err(Error::new(ErrorKind::InvalidParameters,
	"The network's layers do not line up correctly."))}
	};

	index += layer.num_params();
	}
	Ok(outputs)
	}
	}

	/// Compute the gradient of the Neural Network using the
	/// back propagation algorithm.
	impl<T: Criterion> Optimizable for BaseNeuralNet<T> {
	type Inputs = Matrix<f64>;
	type Targets = Matrix<f64>;

	/// Compute the gradient of the neural network.
	fn compute_grad(&self,
	params: &[f64],
	inputs: &Matrix<f64>,
	targets: &Matrix<f64>)
	-> (f64, Vec<f64>) {
	self.compute_grad(params, inputs, targets)
	}
	}

	/// Criterion for Neural Networks
	///
	/// Specifies an activation function and a cost function.
	pub trait Criterion {
	/// The cost function for the criterion.
	type Cost: CostFunc<Matrix<f64>>;

	/// The cost function.
	///
	/// Returns a scalar cost.
	fn cost(&self, outputs: &Matrix<f64>, targets: &Matrix<f64>) -> f64 {
	Self::Cost::cost(outputs, targets)
	}

	/// The gradient of the cost function.
	///
	/// Returns a matrix of cost gradients.
	fn cost_grad(&self, outputs: &Matrix<f64>, targets: &Matrix<f64>) -> Matrix<f64> {
	Self::Cost::grad_cost(outputs, targets)
	}

	/// Returns the regularization for this criterion.
	///
	/// Will return `Regularization::None` by default.
	fn regularization(&self) -> Regularization<f64> {
	Regularization::None
	}

	/// Checks if the current criterion includes regularization.
	///
	/// Will return `false` by default.
	fn is_regularized(&self) -> bool {
	match self.regularization() {
	Regularization::None => false,
	_ => true,
	}
	}

	/// Returns the regularization cost for the criterion.
	///
	/// Will return `0` by default.
	///
	/// This method will not be invoked by the neural network
	/// if there is explicitly no regularization.
	fn reg_cost(&self, reg_weights: MatrixSlice<f64>) -> f64 {
	self.regularization().reg_cost(reg_weights)
	}

	/// Returns the regularization gradient for the criterion.
	///
	/// Will return a matrix of zeros by default.
	///
	/// This method will not be invoked by the neural network
	/// if there is explicitly no regularization.
	fn reg_cost_grad(&self, reg_weights: MatrixSlice<f64>) -> Matrix<f64> {
	self.regularization().reg_grad(reg_weights)
	}
	}

	/// The binary cross entropy criterion.
	///
	/// Uses the Sigmoid activation function and the
	/// cross entropy error.
	#[derive(Clone, Copy, Debug)]
	pub struct BCECriterion {
	regularization: Regularization<f64>,
	}

	impl Criterion for BCECriterion {
	type Cost = cost_fn::CrossEntropyError;

	fn regularization(&self) -> Regularization<f64> {
	self.regularization
	}
	}

	/// Creates an MSE Criterion without any regularization.
	impl Default for BCECriterion {
	fn default() -> Self {
	BCECriterion { regularization: Regularization::None }
	}
	}

	impl BCECriterion {
	/// Constructs a new BCECriterion with the given regularization.
	///
	/// # Examples
	///
	/// ```
	/// use rusty_machine::learning::nnet::BCECriterion;
	/// use rusty_machine::learning::toolkit::regularization::Regularization;
	///
	/// // Create a new BCE criterion with L2 regularization of 0.3.
	/// let criterion = BCECriterion::new(Regularization::L2(0.3f64));
	/// ```
	pub fn new(regularization: Regularization<f64>) -> Self {
	BCECriterion { regularization: regularization }
	}
	}

	/// The mean squared error criterion.
	///
	/// Uses the Linear activation function and the
	/// mean squared error.
	#[derive(Clone, Copy, Debug)]
	pub struct MSECriterion {
	regularization: Regularization<f64>,
	}

	impl Criterion for MSECriterion {
	type Cost = cost_fn::MeanSqError;

	fn regularization(&self) -> Regularization<f64> {
	self.regularization
	}
	}

	/// Creates an MSE Criterion without any regularization.
	impl Default for MSECriterion {
	fn default() -> Self {
	MSECriterion { regularization: Regularization::None }
	}
	}

	impl MSECriterion {
	/// Constructs a new BCECriterion with the given regularization.
	///
	/// # Examples
	///
	/// ```
	/// use rusty_machine::learning::nnet::MSECriterion;
	/// use rusty_machine::learning::toolkit::regularization::Regularization;
	///
	/// // Create a new MSE criterion with L2 regularization of 0.3.
	/// let criterion = MSECriterion::new(Regularization::L2(0.3f64));
	/// ```
	pub fn new(regularization: Regularization<f64>) -> Self {
	MSECriterion { regularization: regularization }
	}
	}