blob: c33fcd386e0bf1f8fdfb88bd2f8e361adb7f5d72 [file] [log] [blame]
//! Module for the fmincg optimization algorithm.
//!
//! This algorithm was taken from Andrew Ng's coursera machine
//! learning course. The function was translated from MATLAB into rust.
//! Original source code can be found [here](http://www.mathworks.com/matlabcentral/fileexchange/42770-logistic-regression-with-regularization-used-to-classify-hand-written-digits/content/Logistic%20Regression%20with%20regularisation/fmincg.m).
//!
//! The attached license permits use and modification for research
//! and education only.
//!
//! Copyright (C) 2001 and 2002 by Carl Edward Rasmussen. Date 2002-02-13
//!
//!
//! (C) Copyright 1999, 2000 & 2001, Carl Edward Rasmussen
//!
//! Permission is granted for anyone to copy, use, or modify these
//! programs and accompanying documents for purposes of research or
//! education, provided this copyright notice is retained, and note is
//! made of any changes that have been made.
//!
//! These programs and documents are distributed without any warranty,
//! express or implied. As the programs were written for research
//! purposes only, they have not been tested to the degree that would be
//! advisable in any important application. All use of these programs is
//! entirely at the user's own risk.
//!
//! [rusty-machine] Changes made:
//!
//! - Conversion to Rust.
//! - Length hard defaults to the max iterations.
use std::vec::*;
use learning::optim::{Optimizable, OptimAlgorithm};
use linalg::Vector;
use std::cmp;
use std::f64;
/// Conjugate Gradient Descent algorithm
#[derive(Clone, Copy, Debug)]
pub struct ConjugateGD {
/// Constant in the Wolfe-Powell conditions.
pub rho: f64,
/// Constant in the Wolfe-Powell conditions.
pub sig: f64,
/// Don't reevaluate within `int` of the limit of the current bracket.
pub int: f64,
/// Extrapolate max of `ext` times the current bracket.
pub ext: f64,
/// Max of `max` function evaluations per line search
pub max: usize,
/// The maximum allowed slope ratio
pub ratio: f64,
/// The default number of max iterations.
pub iters: usize,
}
/// The default Conjugate GD algorithm.
///
/// The defaults are:
///
/// - rho = 0.01
/// - sig = 0.5
/// - int = 0.1
/// - ext = 3
/// - max = 20
/// - ration = 100
/// - iters = 100
impl Default for ConjugateGD {
fn default() -> ConjugateGD {
ConjugateGD {
rho: 0.01,
sig: 0.5,
int: 0.1,
ext: 3.0,
max: 20,
ratio: 100.0,
iters: 100,
}
}
}
impl<M: Optimizable> OptimAlgorithm<M> for ConjugateGD {
fn optimize(&self,
model: &M,
start: &[f64],
inputs: &M::Inputs,
targets: &M::Targets)
-> Vec<f64> {
let mut i = 0usize;
let mut ls_failed = false;
let (mut f1, vec_df1) = model.compute_grad(start, inputs, targets);
let mut df1 = Vector::new(vec_df1);
// The reduction in the function. Can also be specified as part of length
let red = 1f64;
let length = self.iters as i32;
let mut s = -df1.clone();
let mut d1 = -s.dot(&s);
let mut z1 = red / (1f64 - d1);
let mut x = Vector::new(start.to_vec());
let (mut f2, mut df2): (f64, Vector<f64>);
while (i as i32) < length.abs() {
if length > 0 {
i += 1;
}
let (x0, f0) = (x.clone(), f1);
x += &s * z1;
let cost = model.compute_grad(x.data(), inputs, targets);
f2 = cost.0;
df2 = Vector::new(cost.1);
if length < 0 {
i += 1;
}
let mut d2 = df2.dot(&s);
let (mut f3, mut d3, mut z3) = (f1, d1, -z1);
let mut m = if length > 0 {
self.max as i32
} else {
cmp::min(self.max as i32, -length - (i as i32))
};
let mut success = false;
let mut limit = -1f64;
loop {
let mut z2: f64;
while ((f2 > (f1 + z1 * self.rho * d1)) || (d2 > -self.sig * d1)) && (m > 0i32) {
limit = z1;
if f2 > f1 {
z2 = z3 - (0.5 * d3 * z3 * z3) / (d3 * z3 + f2 - f3);
} else {
let a = 6f64 * (f2 - f3) / z3 + 3f64 * (d2 + d3);
let b = 3f64 * (f3 - f2) - z3 * (2f64 * d2 + d3);
z2 = ((b * b - a * d2 * z3 * z3).sqrt() - b) / a;
}
if z2.is_nan() || z2.is_infinite() {
z2 = z3 / 2f64;
}
if z2 <= self.int * z3 {
if z2 <= (1f64 - self.int) * z3 {
z2 = (1f64 - self.int) * z3;
}
} else if self.int * z3 <= (1f64 - self.int) * z3 {
z2 = (1f64 - self.int) * z3;
} else {
z2 = self.int * z3;
}
z1 += z2;
x += &s * z2;
let cost_grad = model.compute_grad(x.data(), inputs, targets);
f2 = cost_grad.0;
df2 = Vector::new(cost_grad.1);
m -= 1i32;
if length < 0 {
i += 1;
}
d2 = df2.dot(&s);
z3 -= z2;
}
if f2 > f1 + z1 * self.rho * d1 || d2 > -self.sig * d1 {
break;
} else if d2 > self.sig * d1 {
success = true;
break;
} else if m == 0i32 {
break;
}
let a = 6f64 * (f2 - f3) / z3 + 3f64 * (d2 + d3);
let b = 3f64 * (f3 - f2) - z3 * (2f64 * d2 + d3);
z2 = -d2 * z3 * z3 / (b + (b * b - a * d2 * z3 * z3).sqrt());
if z2.is_nan() || z2.is_infinite() || z2 < 0f64 {
if limit < -0.5 {
z2 = z1 * (self.ext - 1f64);
} else {
z2 = (limit - z1) / 2f64;
}
} else if (limit > -0.5) && (z2 + z1 > limit) {
z2 = (limit - z1) / 2f64;
} else if (limit < -0.5) && (z2 + z1 > z1 * self.ext) {
z2 = z1 * (self.ext - 1f64);
} else if z2 < -z3 * self.int {
z2 = -z3 * self.int;
} else if (limit > -0.5) && (z2 < (limit - z1) * (1f64 - self.int)) {
z2 = (limit - z1) * (1f64 - self.int);
}
f3 = f2;
d3 = d2;
z3 = -z2;
z1 += z2;
x += &s * z2;
let cost_grad = model.compute_grad(x.data(), inputs, targets);
f2 = cost_grad.0;
df2 = Vector::new(cost_grad.1);
m -= 1;
if length < 0 {
i += 1;
}
d2 = df2.dot(&s);
}
if success {
f1 = f2;
s = s * (&df2 - &df1).dot(&df2) / df1.dot(&df1) - &df2;
df1 = df2;
d2 = df1.dot(&s);
if d2 > 0f64 {
s = -&df1;
d2 = -s.dot(&s);
}
let ratio = d1 / (d2 - f64::MIN_POSITIVE);
if self.ratio < ratio {
z1 *= self.ratio;
} else {
z1 *= ratio;
}
d1 = d2;
ls_failed = false;
} else {
x = x0;
f1 = f0;
if ls_failed || i as i32 > length.abs() {
break;
}
df1 = df2;
s = -&df1;
d1 = -s.dot(&s);
z1 = 1f64 / (1f64 - d1);
ls_failed = true;
}
}
x.into_vec()
}
}