third_party/rusty-machine/src/learning/optim/fmincg.rs - incubator-teaclave-sgx-sdk - Git at Google

 //! Module for the fmincg optimization algorithm.
 //!
 //! This algorithm was taken from Andrew Ng's coursera machine
 //! learning course. The function was translated from MATLAB into rust.
 //! Original source code can be found [here](http://www.mathworks.com/matlabcentral/fileexchange/42770-logistic-regression-with-regularization-used-to-classify-hand-written-digits/content/Logistic%20Regression%20with%20regularisation/fmincg.m).
 //!
 //! The attached license permits use and modification for research
 //! and education only.
 //!
 //! Copyright (C) 2001 and 2002 by Carl Edward Rasmussen. Date 2002-02-13
 //!
 //!
 //! (C) Copyright 1999, 2000 & 2001, Carl Edward Rasmussen
 //!
 //! Permission is granted for anyone to copy, use, or modify these
 //! programs and accompanying documents for purposes of research or
 //! education, provided this copyright notice is retained, and note is
 //! made of any changes that have been made.
 //!
 //! These programs and documents are distributed without any warranty,
 //! express or implied.  As the programs were written for research
 //! purposes only, they have not been tested to the degree that would be
 //! advisable in any important application.  All use of these programs is
 //! entirely at the user's own risk.
 //!
 //! [rusty-machine] Changes made:
 //!
 //! - Conversion to Rust.
 //! - Length hard defaults to the max iterations.
 use std::vec::*;
 use learning::optim::{Optimizable, OptimAlgorithm};
 use linalg::Vector;

 use std::cmp;
 use std::f64;


 /// Conjugate Gradient Descent algorithm
 #[derive(Clone, Copy, Debug)]
 pub struct ConjugateGD {
     /// Constant in the Wolfe-Powell conditions.
     pub rho: f64,
     /// Constant in the Wolfe-Powell conditions.
     pub sig: f64,
     /// Don't reevaluate within `int` of the limit of the current bracket.
     pub int: f64,
     /// Extrapolate max of `ext` times the current bracket.
     pub ext: f64,
     /// Max of `max` function evaluations per line search
     pub max: usize,
     /// The maximum allowed slope ratio
     pub ratio: f64,

     /// The default number of max iterations.
     pub iters: usize,
 }

 /// The default Conjugate GD algorithm.
 ///
 /// The defaults are:
 ///
 /// - rho = 0.01
 /// - sig = 0.5
 /// - int = 0.1
 /// - ext = 3
 /// - max = 20
 /// - ration = 100
 /// - iters = 100
 impl Default for ConjugateGD {
     fn default() -> ConjugateGD {
         ConjugateGD {
             rho: 0.01,
             sig: 0.5,
             int: 0.1,
             ext: 3.0,
             max: 20,
             ratio: 100.0,
             iters: 100,
         }
     }
 }

 impl<M: Optimizable> OptimAlgorithm<M> for ConjugateGD {
     fn optimize(&self,
                 model: &M,
                 start: &[f64],
                 inputs: &M::Inputs,
                 targets: &M::Targets)
                 -> Vec<f64> {
         let mut i = 0usize;
         let mut ls_failed = false;

         let (mut f1, vec_df1) = model.compute_grad(start, inputs, targets);
         let mut df1 = Vector::new(vec_df1);

         // The reduction in the function. Can also be specified as part of length
         let red = 1f64;

         let length = self.iters as i32;

         let mut s = -df1.clone();
         let mut d1 = -s.dot(&s);
         let mut z1 = red / (1f64 - d1);

         let mut x = Vector::new(start.to_vec());

         let (mut f2, mut df2): (f64, Vector<f64>);

         while (i as i32) < length.abs() {
             if length > 0 {
                 i += 1;
             }

             let (x0, f0) = (x.clone(), f1);

             x += &s * z1;

             let cost = model.compute_grad(x.data(), inputs, targets);
             f2 = cost.0;
             df2 = Vector::new(cost.1);

             if length < 0 {
                 i += 1;
             }

             let mut d2 = df2.dot(&s);

             let (mut f3, mut d3, mut z3) = (f1, d1, -z1);

             let mut m = if length > 0 {
                 self.max as i32
             } else {
                 cmp::min(self.max as i32, -length - (i as i32))
             };

             let mut success = false;
             let mut limit = -1f64;

             loop {
                 let mut z2: f64;

                 while ((f2 > (f1 + z1 * self.rho * d1)) || (d2 > -self.sig * d1)) && (m > 0i32) {

                     limit = z1;

                     if f2 > f1 {
                         z2 = z3 - (0.5 * d3 * z3 * z3) / (d3 * z3 + f2 - f3);
                     } else {
                         let a = 6f64 * (f2 - f3) / z3 + 3f64 * (d2 + d3);
                         let b = 3f64 * (f3 - f2) - z3 * (2f64 * d2 + d3);
                         z2 = ((b * b - a * d2 * z3 * z3).sqrt() - b) / a;
                     }

                     if z2.is_nan() || z2.is_infinite() {
                         z2 = z3 / 2f64;
                     }

                     if z2 <= self.int * z3 {
                         if z2 <= (1f64 - self.int) * z3 {
                             z2 = (1f64 - self.int) * z3;
                         }
                     } else if self.int * z3 <= (1f64 - self.int) * z3 {
                         z2 = (1f64 - self.int) * z3;
                     } else {
                         z2 = self.int * z3;
                     }

                     z1 += z2;
                     x += &s * z2;
                     let cost_grad = model.compute_grad(x.data(), inputs, targets);
                     f2 = cost_grad.0;
                     df2 = Vector::new(cost_grad.1);

                     m -= 1i32;
                     if length < 0 {
                         i += 1;
                     }

                     d2 = df2.dot(&s);
                     z3 -= z2;
                 }

                 if f2 > f1 + z1 * self.rho * d1 || d2 > -self.sig * d1 {
                     break;
                 } else if d2 > self.sig * d1 {
                     success = true;
                     break;
                 } else if m == 0i32 {
                     break;
                 }

                 let a = 6f64 * (f2 - f3) / z3 + 3f64 * (d2 + d3);
                 let b = 3f64 * (f3 - f2) - z3 * (2f64 * d2 + d3);
                 z2 = -d2 * z3 * z3 / (b + (b * b - a * d2 * z3 * z3).sqrt());

                 if z2.is_nan() || z2.is_infinite() || z2 < 0f64 {
                     if limit < -0.5 {
                         z2 = z1 * (self.ext - 1f64);
                     } else {
                         z2 = (limit - z1) / 2f64;
                     }
                 } else if (limit > -0.5) && (z2 + z1 > limit) {
                     z2 = (limit - z1) / 2f64;
                 } else if (limit < -0.5) && (z2 + z1 > z1 * self.ext) {
                     z2 = z1 * (self.ext - 1f64);
                 } else if z2 < -z3 * self.int {
                     z2 = -z3 * self.int;
                 } else if (limit > -0.5) && (z2 < (limit - z1) * (1f64 - self.int)) {
                     z2 = (limit - z1) * (1f64 - self.int);
                 }

                 f3 = f2;
                 d3 = d2;
                 z3 = -z2;
                 z1 += z2;
                 x += &s * z2;

                 let cost_grad = model.compute_grad(x.data(), inputs, targets);
                 f2 = cost_grad.0;
                 df2 = Vector::new(cost_grad.1);

                 m -= 1;
                 if length < 0 {
                     i += 1;
                 }

                 d2 = df2.dot(&s);
             }

             if success {
                 f1 = f2;
                 s = s * (&df2 - &df1).dot(&df2) / df1.dot(&df1) - &df2;

                 df1 = df2;

                 d2 = df1.dot(&s);

                 if d2 > 0f64 {
                     s = -&df1;
                     d2 = -s.dot(&s);
                 }

                 let ratio = d1 / (d2 - f64::MIN_POSITIVE);
                 if self.ratio < ratio {
                     z1 *= self.ratio;
                 } else {
                     z1 *= ratio;
                 }

                 d1 = d2;
                 ls_failed = false;
             } else {
                 x = x0;
                 f1 = f0;

                 if ls_failed || i as i32 > length.abs() {
                     break;
                 }

                 df1 = df2;

                 s = -&df1;
                 d1 = -s.dot(&s);

                 z1 = 1f64 / (1f64 - d1);
                 ls_failed = true;
             }

         }
         x.into_vec()
     }
 }
	//! Module for the fmincg optimization algorithm.
	//!
	//! This algorithm was taken from Andrew Ng's coursera machine
	//! learning course. The function was translated from MATLAB into rust.
	//! Original source code can be found [here](http://www.mathworks.com/matlabcentral/fileexchange/42770-logistic-regression-with-regularization-used-to-classify-hand-written-digits/content/Logistic%20Regression%20with%20regularisation/fmincg.m).
	//!
	//! The attached license permits use and modification for research
	//! and education only.
	//!
	//! Copyright (C) 2001 and 2002 by Carl Edward Rasmussen. Date 2002-02-13
	//!
	//!
	//! (C) Copyright 1999, 2000 & 2001, Carl Edward Rasmussen
	//!
	//! Permission is granted for anyone to copy, use, or modify these
	//! programs and accompanying documents for purposes of research or
	//! education, provided this copyright notice is retained, and note is
	//! made of any changes that have been made.
	//!
	//! These programs and documents are distributed without any warranty,
	//! express or implied. As the programs were written for research
	//! purposes only, they have not been tested to the degree that would be
	//! advisable in any important application. All use of these programs is
	//! entirely at the user's own risk.
	//!
	//! [rusty-machine] Changes made:
	//!
	//! - Conversion to Rust.
	//! - Length hard defaults to the max iterations.
	use std::vec::*;
	use learning::optim::{Optimizable, OptimAlgorithm};
	use linalg::Vector;

	use std::cmp;
	use std::f64;


	/// Conjugate Gradient Descent algorithm
	#[derive(Clone, Copy, Debug)]
	pub struct ConjugateGD {
	/// Constant in the Wolfe-Powell conditions.
	pub rho: f64,
	/// Constant in the Wolfe-Powell conditions.
	pub sig: f64,
	/// Don't reevaluate within `int` of the limit of the current bracket.
	pub int: f64,
	/// Extrapolate max of `ext` times the current bracket.
	pub ext: f64,
	/// Max of `max` function evaluations per line search
	pub max: usize,
	/// The maximum allowed slope ratio
	pub ratio: f64,

	/// The default number of max iterations.
	pub iters: usize,
	}

	/// The default Conjugate GD algorithm.
	///
	/// The defaults are:
	///
	/// - rho = 0.01
	/// - sig = 0.5
	/// - int = 0.1
	/// - ext = 3
	/// - max = 20
	/// - ration = 100
	/// - iters = 100
	impl Default for ConjugateGD {
	fn default() -> ConjugateGD {
	ConjugateGD {
	rho: 0.01,
	sig: 0.5,
	int: 0.1,
	ext: 3.0,
	max: 20,
	ratio: 100.0,
	iters: 100,
	}
	}
	}

	impl<M: Optimizable> OptimAlgorithm<M> for ConjugateGD {
	fn optimize(&self,
	model: &M,
	start: &[f64],
	inputs: &M::Inputs,
	targets: &M::Targets)
	-> Vec<f64> {
	let mut i = 0usize;
	let mut ls_failed = false;

	let (mut f1, vec_df1) = model.compute_grad(start, inputs, targets);
	let mut df1 = Vector::new(vec_df1);

	// The reduction in the function. Can also be specified as part of length
	let red = 1f64;

	let length = self.iters as i32;

	let mut s = -df1.clone();
	let mut d1 = -s.dot(&s);
	let mut z1 = red / (1f64 - d1);

	let mut x = Vector::new(start.to_vec());

	let (mut f2, mut df2): (f64, Vector<f64>);

	while (i as i32) < length.abs() {
	if length > 0 {
	i += 1;
	}

	let (x0, f0) = (x.clone(), f1);

	x += &s * z1;

	let cost = model.compute_grad(x.data(), inputs, targets);
	f2 = cost.0;
	df2 = Vector::new(cost.1);

	if length < 0 {
	i += 1;
	}

	let mut d2 = df2.dot(&s);

	let (mut f3, mut d3, mut z3) = (f1, d1, -z1);

	let mut m = if length > 0 {
	self.max as i32
	} else {
	cmp::min(self.max as i32, -length - (i as i32))
	};

	let mut success = false;
	let mut limit = -1f64;

	loop {
	let mut z2: f64;

	while ((f2 > (f1 + z1 * self.rho * d1)) \|\| (d2 > -self.sig * d1)) && (m > 0i32) {

	limit = z1;

	if f2 > f1 {
	z2 = z3 - (0.5 * d3 * z3 * z3) / (d3 * z3 + f2 - f3);
	} else {
	let a = 6f64 * (f2 - f3) / z3 + 3f64 * (d2 + d3);
	let b = 3f64 * (f3 - f2) - z3 * (2f64 * d2 + d3);
	z2 = ((b * b - a * d2 * z3 * z3).sqrt() - b) / a;
	}

	if z2.is_nan() \|\| z2.is_infinite() {
	z2 = z3 / 2f64;
	}

	if z2 <= self.int * z3 {
	if z2 <= (1f64 - self.int) * z3 {
	z2 = (1f64 - self.int) * z3;
	}
	} else if self.int * z3 <= (1f64 - self.int) * z3 {
	z2 = (1f64 - self.int) * z3;
	} else {
	z2 = self.int * z3;
	}

	z1 += z2;
	x += &s * z2;
	let cost_grad = model.compute_grad(x.data(), inputs, targets);
	f2 = cost_grad.0;
	df2 = Vector::new(cost_grad.1);

	m -= 1i32;
	if length < 0 {
	i += 1;
	}

	d2 = df2.dot(&s);
	z3 -= z2;
	}

	if f2 > f1 + z1 * self.rho * d1 \|\| d2 > -self.sig * d1 {
	break;
	} else if d2 > self.sig * d1 {
	success = true;
	break;
	} else if m == 0i32 {
	break;
	}

	let a = 6f64 * (f2 - f3) / z3 + 3f64 * (d2 + d3);
	let b = 3f64 * (f3 - f2) - z3 * (2f64 * d2 + d3);
	z2 = -d2 * z3 * z3 / (b + (b * b - a * d2 * z3 * z3).sqrt());

	if z2.is_nan() \|\| z2.is_infinite() \|\| z2 < 0f64 {
	if limit < -0.5 {
	z2 = z1 * (self.ext - 1f64);
	} else {
	z2 = (limit - z1) / 2f64;
	}
	} else if (limit > -0.5) && (z2 + z1 > limit) {
	z2 = (limit - z1) / 2f64;
	} else if (limit < -0.5) && (z2 + z1 > z1 * self.ext) {
	z2 = z1 * (self.ext - 1f64);
	} else if z2 < -z3 * self.int {
	z2 = -z3 * self.int;
	} else if (limit > -0.5) && (z2 < (limit - z1) * (1f64 - self.int)) {
	z2 = (limit - z1) * (1f64 - self.int);
	}

	f3 = f2;
	d3 = d2;
	z3 = -z2;
	z1 += z2;
	x += &s * z2;

	let cost_grad = model.compute_grad(x.data(), inputs, targets);
	f2 = cost_grad.0;
	df2 = Vector::new(cost_grad.1);

	m -= 1;
	if length < 0 {
	i += 1;
	}

	d2 = df2.dot(&s);
	}

	if success {
	f1 = f2;
	s = s * (&df2 - &df1).dot(&df2) / df1.dot(&df1) - &df2;

	df1 = df2;

	d2 = df1.dot(&s);

	if d2 > 0f64 {
	s = -&df1;
	d2 = -s.dot(&s);
	}

	let ratio = d1 / (d2 - f64::MIN_POSITIVE);
	if self.ratio < ratio {
	z1 *= self.ratio;
	} else {
	z1 *= ratio;
	}

	d1 = d2;
	ls_failed = false;
	} else {
	x = x0;
	f1 = f0;

	if ls_failed \|\| i as i32 > length.abs() {
	break;
	}

	df1 = df2;

	s = -&df1;
	d1 = -s.dot(&s);

	z1 = 1f64 / (1f64 - d1);
	ls_failed = true;
	}

	}
	x.into_vec()
	}
	}