opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/ParallelNegLogLikelihood.java - opennlp - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License. You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package opennlp.tools.ml.maxent.quasinewton;

 import java.lang.reflect.Constructor;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;

 import opennlp.tools.ml.ArrayMath;
 import opennlp.tools.ml.model.DataIndexer;

 /**
  * Evaluate negative log-likelihood and its gradient in parallel
  */
 public class ParallelNegLogLikelihood extends NegLogLikelihood {

   // Number of threads
   private int threads;

   // Partial value of negative log-likelihood to be computed by each thread
   private double[] negLogLikelihoodThread;

   // Partial gradient
   private double[][] gradientThread;

   public ParallelNegLogLikelihood(DataIndexer indexer, int threads) {
     super(indexer);

     if (threads <= 0)
       throw new IllegalArgumentException(
           "Number of threads must 1 or larger");

     this.threads                = threads;
     this.negLogLikelihoodThread = new double[threads];
     this.gradientThread         = new double[threads][dimension];
   }

   /**
    * Negative log-likelihood
    */
   @Override
   public double valueAt(double[] x) {

     if (x.length != dimension)
       throw new IllegalArgumentException(
           "x is invalid, its dimension is not equal to domain dimension.");

     // Compute partial value of negative log-likelihood in each thread
     computeInParallel(x, NegLLComputeTask.class);

     double negLogLikelihood = 0;
     for (int t = 0; t < threads; t++) {
       negLogLikelihood += negLogLikelihoodThread[t];
     }

     return negLogLikelihood;
   }

   /**
    * Compute gradient
    */
   @Override
   public double[] gradientAt(double[] x) {

     if (x.length != dimension)
       throw new IllegalArgumentException(
           "x is invalid, its dimension is not equal to the function.");

     // Compute partial gradient in each thread
     computeInParallel(x, GradientComputeTask.class);

     // Accumulate gradient
     for (int i = 0; i < dimension; i++) {
       gradient[i] = 0;
       for (int t = 0; t < threads; t++) {
         gradient[i] += gradientThread[t][i];
       }
     }

     return gradient;
   }

   /**
    * Compute tasks in parallel
    */
   private void computeInParallel(double[] x, Class<? extends ComputeTask> taskClass) {

     ExecutorService executor = Executors.newFixedThreadPool(threads, runnable -> {
       Thread thread = new Thread(runnable);
       thread.setName(
           "opennlp.tools.ml.maxent.quasinewton.ParallelNegLogLikelihood.computeInParallel()");
       thread.setDaemon(true);
       return thread;
     });

     int taskSize = numContexts / threads;
     int leftOver = numContexts % threads;

     try {
       Constructor<? extends ComputeTask> cons = taskClass.getConstructor(
           ParallelNegLogLikelihood.class,
           int.class, int.class, int.class, double[].class);

       List<Future<?>> futures = new ArrayList<>();
       for (int i = 0; i < threads; i++) {
         if (i != threads - 1)
           futures.add(executor.submit(
               cons.newInstance(this, i, i * taskSize, taskSize, x)));
         else
           futures.add(executor.submit(
               cons.newInstance(this, i, i * taskSize, taskSize + leftOver, x)));
       }

       for (Future<?> future: futures)
         future.get();

     } catch (Exception e) {
       e.printStackTrace();
     }

     executor.shutdown();
   }

   /**
    * Task that is computed in parallel
    */
   abstract class ComputeTask implements Callable<ComputeTask> {

     final int threadIndex;

     // Start index of contexts to compute
     final int startIndex;

     // Number of contexts to compute
     final int length;

     final double[] x;

     public ComputeTask(int threadIndex, int startIndex, int length, double[] x) {
       this.threadIndex = threadIndex;
       this.startIndex  = startIndex;
       this.length      = length;
       this.x           = x;
     }
   }

   /**
    * Task for computing partial value of negative log-likelihood
    */
   class NegLLComputeTask extends ComputeTask {

     final double[] tempSums;

     public NegLLComputeTask(int threadIndex, int startIndex, int length, double[] x) {
       super(threadIndex, startIndex, length, x);
       this.tempSums = new double[numOutcomes];
     }

     @Override
     public NegLLComputeTask call() {
       int ci, oi, ai, vectorIndex, outcome;
       double predValue, logSumOfExps;
       negLogLikelihoodThread[threadIndex] = 0;

       for (ci = startIndex; ci < startIndex + length; ci++) {
         for (oi = 0; oi < numOutcomes; oi++) {
           tempSums[oi] = 0;
           for (ai = 0; ai < contexts[ci].length; ai++) {
             vectorIndex = indexOf(oi, contexts[ci][ai]);
             predValue = values != null ? values[ci][ai] : 1.0;
             tempSums[oi] += predValue * x[vectorIndex];
           }
         }

         logSumOfExps = ArrayMath.logSumOfExps(tempSums);

         outcome = outcomeList[ci];
         negLogLikelihoodThread[threadIndex] -=
             (tempSums[outcome] - logSumOfExps) * numTimesEventsSeen[ci];
       }

       return this;
     }
   }

   /**
    * Task for computing partial gradient
    */
   class GradientComputeTask extends ComputeTask {

     final double[] expectation;

     public GradientComputeTask(int threadIndex, int startIndex, int length, double[] x) {
       super(threadIndex, startIndex, length, x);
       this.expectation = new double[numOutcomes];
     }

     @Override
     public GradientComputeTask call() {
       int ci, oi, ai, vectorIndex;
       double predValue, logSumOfExps;
       int empirical;

       // Reset gradientThread
       Arrays.fill(gradientThread[threadIndex], 0);

       for (ci = startIndex; ci < startIndex + length; ci++) {
         for (oi = 0; oi < numOutcomes; oi++) {
           expectation[oi] = 0;
           for (ai = 0; ai < contexts[ci].length; ai++) {
             vectorIndex = indexOf(oi, contexts[ci][ai]);
             predValue = values != null ? values[ci][ai] : 1.0;
             expectation[oi] += predValue * x[vectorIndex];
           }
         }

         logSumOfExps = ArrayMath.logSumOfExps(expectation);

         for (oi = 0; oi < numOutcomes; oi++) {
           expectation[oi] = StrictMath.exp(expectation[oi] - logSumOfExps);
         }

         for (oi = 0; oi < numOutcomes; oi++) {
           empirical = outcomeList[ci] == oi ? 1 : 0;
           for (ai = 0; ai < contexts[ci].length; ai++) {
             vectorIndex = indexOf(oi, contexts[ci][ai]);
             predValue = values != null ? values[ci][ai] : 1.0;
             gradientThread[threadIndex][vectorIndex] +=
                 predValue * (expectation[oi] - empirical) * numTimesEventsSeen[ci];
           }
         }
       }

       return this;
     }
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package opennlp.tools.ml.maxent.quasinewton;

	import java.lang.reflect.Constructor;
	import java.util.ArrayList;
	import java.util.Arrays;
	import java.util.List;
	import java.util.concurrent.Callable;
	import java.util.concurrent.ExecutorService;
	import java.util.concurrent.Executors;
	import java.util.concurrent.Future;

	import opennlp.tools.ml.ArrayMath;
	import opennlp.tools.ml.model.DataIndexer;

	/**
	* Evaluate negative log-likelihood and its gradient in parallel
	*/
	public class ParallelNegLogLikelihood extends NegLogLikelihood {

	// Number of threads
	private int threads;

	// Partial value of negative log-likelihood to be computed by each thread
	private double[] negLogLikelihoodThread;

	// Partial gradient
	private double[][] gradientThread;

	public ParallelNegLogLikelihood(DataIndexer indexer, int threads) {
	super(indexer);

	if (threads <= 0)
	throw new IllegalArgumentException(
	"Number of threads must 1 or larger");

	this.threads = threads;
	this.negLogLikelihoodThread = new double[threads];
	this.gradientThread = new double[threads][dimension];
	}

	/**
	* Negative log-likelihood
	*/
	@Override
	public double valueAt(double[] x) {

	if (x.length != dimension)
	throw new IllegalArgumentException(
	"x is invalid, its dimension is not equal to domain dimension.");

	// Compute partial value of negative log-likelihood in each thread
	computeInParallel(x, NegLLComputeTask.class);

	double negLogLikelihood = 0;
	for (int t = 0; t < threads; t++) {
	negLogLikelihood += negLogLikelihoodThread[t];
	}

	return negLogLikelihood;
	}

	/**
	* Compute gradient
	*/
	@Override
	public double[] gradientAt(double[] x) {

	if (x.length != dimension)
	throw new IllegalArgumentException(
	"x is invalid, its dimension is not equal to the function.");

	// Compute partial gradient in each thread
	computeInParallel(x, GradientComputeTask.class);

	// Accumulate gradient
	for (int i = 0; i < dimension; i++) {
	gradient[i] = 0;
	for (int t = 0; t < threads; t++) {
	gradient[i] += gradientThread[t][i];
	}
	}

	return gradient;
	}

	/**
	* Compute tasks in parallel
	*/
	private void computeInParallel(double[] x, Class<? extends ComputeTask> taskClass) {

	ExecutorService executor = Executors.newFixedThreadPool(threads, runnable -> {
	Thread thread = new Thread(runnable);
	thread.setName(
	"opennlp.tools.ml.maxent.quasinewton.ParallelNegLogLikelihood.computeInParallel()");
	thread.setDaemon(true);
	return thread;
	});

	int taskSize = numContexts / threads;
	int leftOver = numContexts % threads;

	try {
	Constructor<? extends ComputeTask> cons = taskClass.getConstructor(
	ParallelNegLogLikelihood.class,
	int.class, int.class, int.class, double[].class);

	List<Future<?>> futures = new ArrayList<>();
	for (int i = 0; i < threads; i++) {
	if (i != threads - 1)
	futures.add(executor.submit(
	cons.newInstance(this, i, i * taskSize, taskSize, x)));
	else
	futures.add(executor.submit(
	cons.newInstance(this, i, i * taskSize, taskSize + leftOver, x)));
	}

	for (Future<?> future: futures)
	future.get();

	} catch (Exception e) {
	e.printStackTrace();
	}

	executor.shutdown();
	}

	/**
	* Task that is computed in parallel
	*/
	abstract class ComputeTask implements Callable<ComputeTask> {

	final int threadIndex;

	// Start index of contexts to compute
	final int startIndex;

	// Number of contexts to compute
	final int length;

	final double[] x;

	public ComputeTask(int threadIndex, int startIndex, int length, double[] x) {
	this.threadIndex = threadIndex;
	this.startIndex = startIndex;
	this.length = length;
	this.x = x;
	}
	}

	/**
	* Task for computing partial value of negative log-likelihood
	*/
	class NegLLComputeTask extends ComputeTask {

	final double[] tempSums;

	public NegLLComputeTask(int threadIndex, int startIndex, int length, double[] x) {
	super(threadIndex, startIndex, length, x);
	this.tempSums = new double[numOutcomes];
	}

	@Override
	public NegLLComputeTask call() {
	int ci, oi, ai, vectorIndex, outcome;
	double predValue, logSumOfExps;
	negLogLikelihoodThread[threadIndex] = 0;

	for (ci = startIndex; ci < startIndex + length; ci++) {
	for (oi = 0; oi < numOutcomes; oi++) {
	tempSums[oi] = 0;
	for (ai = 0; ai < contexts[ci].length; ai++) {
	vectorIndex = indexOf(oi, contexts[ci][ai]);
	predValue = values != null ? values[ci][ai] : 1.0;
	tempSums[oi] += predValue * x[vectorIndex];
	}
	}

	logSumOfExps = ArrayMath.logSumOfExps(tempSums);

	outcome = outcomeList[ci];
	negLogLikelihoodThread[threadIndex] -=
	(tempSums[outcome] - logSumOfExps) * numTimesEventsSeen[ci];
	}

	return this;
	}
	}

	/**
	* Task for computing partial gradient
	*/
	class GradientComputeTask extends ComputeTask {

	final double[] expectation;

	public GradientComputeTask(int threadIndex, int startIndex, int length, double[] x) {
	super(threadIndex, startIndex, length, x);
	this.expectation = new double[numOutcomes];
	}

	@Override
	public GradientComputeTask call() {
	int ci, oi, ai, vectorIndex;
	double predValue, logSumOfExps;
	int empirical;

	// Reset gradientThread
	Arrays.fill(gradientThread[threadIndex], 0);

	for (ci = startIndex; ci < startIndex + length; ci++) {
	for (oi = 0; oi < numOutcomes; oi++) {
	expectation[oi] = 0;
	for (ai = 0; ai < contexts[ci].length; ai++) {
	vectorIndex = indexOf(oi, contexts[ci][ai]);
	predValue = values != null ? values[ci][ai] : 1.0;
	expectation[oi] += predValue * x[vectorIndex];
	}
	}

	logSumOfExps = ArrayMath.logSumOfExps(expectation);

	for (oi = 0; oi < numOutcomes; oi++) {
	expectation[oi] = StrictMath.exp(expectation[oi] - logSumOfExps);
	}

	for (oi = 0; oi < numOutcomes; oi++) {
	empirical = outcomeList[ci] == oi ? 1 : 0;
	for (ai = 0; ai < contexts[ci].length; ai++) {
	vectorIndex = indexOf(oi, contexts[ci][ai]);
	predValue = values != null ? values[ci][ai] : 1.0;
	gradientThread[threadIndex][vectorIndex] +=
	predValue * (expectation[oi] - empirical) * numTimesEventsSeen[ci];
	}
	}
	}

	return this;
	}
	}
	}