nlp-utils/src/main/java/org/apache/opennlp/utils/regression/GradientDescentUtils.java - opennlp-sandbox - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 package org.apache.opennlp.utils.regression;

 import org.apache.opennlp.utils.TrainingSet;

 /**
  * A utility class for calculating gradient descent.
  */
 public class GradientDescentUtils {

   private static final double THRESHOLD = 0.5;
   private static final int MAX_ITERATIONS = 100000;

   /**
    * Calculates batch gradient descent on a {@link Hypothesis}, {@link TrainingSet} and
    * learning rate {@code alpha}. The algorithms iteratively adjusts the hypothesis parameters
    *
    * <p>
    * Note: This implementation uses {@link LinearCombinationHypothesis} as hypothesis.
    *
    * @param trainingSet the {@link TrainingSet} used to fit the parameters
    * @param alpha       the learning rate alpha used to define how big the descent steps are
    */
   public static void batchGradientDescent(TrainingSet trainingSet, double alpha) {
     // set initial random weights
     double[] parameters = initializeRandomWeights(trainingSet.iterator().next().getInputs().length);
     Hypothesis hypothesis = new LinearCombinationHypothesis(parameters);

     int iterations = 0;

     double cost = Double.MAX_VALUE;
     while (true) {
       // calculate cost
       double newCost = RegressionModelUtils.ordinaryLeastSquares(trainingSet, hypothesis);

       if (newCost > cost) {
         throw new RuntimeException("failed to converge at iteration " + iterations + " with cost going from " + cost + " to " + newCost);
       } else if (cost == newCost || newCost < THRESHOLD || iterations > MAX_ITERATIONS) {
         // System.out.println(cost + " with parameters " + Arrays.toString(parameters) + "(" + iterations + " iterations)");
         break;
       }

       // update registered cost
       cost = newCost;

       // calculate the updated parameters
       parameters = RegressionModelUtils.batchLeastMeanSquareUpdate(parameters, alpha, trainingSet, hypothesis);

       // update weights in the hypothesis
       hypothesis = new LinearCombinationHypothesis(parameters);

       iterations++;
     }
   }

   private static double[] initializeRandomWeights(int size) {
     double[] doubles = new double[size];
     for (int i = 0; i < doubles.length; i++) {
       doubles[i] = Math.random() * 0.1d;
     }
     return doubles;
   }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/
	package org.apache.opennlp.utils.regression;

	import org.apache.opennlp.utils.TrainingSet;

	/**
	* A utility class for calculating gradient descent.
	*/
	public class GradientDescentUtils {

	private static final double THRESHOLD = 0.5;
	private static final int MAX_ITERATIONS = 100000;

	/**
	* Calculates batch gradient descent on a {@link Hypothesis}, {@link TrainingSet} and
	* learning rate {@code alpha}. The algorithms iteratively adjusts the hypothesis parameters
	*
	* <p>
	* Note: This implementation uses {@link LinearCombinationHypothesis} as hypothesis.
	*
	* @param trainingSet the {@link TrainingSet} used to fit the parameters
	* @param alpha the learning rate alpha used to define how big the descent steps are
	*/
	public static void batchGradientDescent(TrainingSet trainingSet, double alpha) {
	// set initial random weights
	double[] parameters = initializeRandomWeights(trainingSet.iterator().next().getInputs().length);
	Hypothesis hypothesis = new LinearCombinationHypothesis(parameters);

	int iterations = 0;

	double cost = Double.MAX_VALUE;
	while (true) {
	// calculate cost
	double newCost = RegressionModelUtils.ordinaryLeastSquares(trainingSet, hypothesis);

	if (newCost > cost) {
	throw new RuntimeException("failed to converge at iteration " + iterations + " with cost going from " + cost + " to " + newCost);
	} else if (cost == newCost \|\| newCost < THRESHOLD \|\| iterations > MAX_ITERATIONS) {
	// System.out.println(cost + " with parameters " + Arrays.toString(parameters) + "(" + iterations + " iterations)");
	break;
	}

	// update registered cost
	cost = newCost;

	// calculate the updated parameters
	parameters = RegressionModelUtils.batchLeastMeanSquareUpdate(parameters, alpha, trainingSet, hypothesis);

	// update weights in the hypothesis
	hypothesis = new LinearCombinationHypothesis(parameters);

	iterations++;
	}
	}

	private static double[] initializeRandomWeights(int size) {
	double[] doubles = new double[size];
	for (int i = 0; i < doubles.length; i++) {
	doubles[i] = Math.random() * 0.1d;
	}
	return doubles;
	}

	}