| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.opennlp.ml.maxent; |
| |
| import java.io.IOException; |
| |
| import org.apache.opennlp.ml.model.DataIndexer; |
| import org.apache.opennlp.ml.model.EventStream; |
| import org.apache.opennlp.ml.model.Prior; |
| import org.apache.opennlp.ml.model.UniformPrior; |
| |
| /** |
| * A Factory class which uses instances of GISTrainer to create and train |
| * GISModels. |
| */ |
| public class GIS { |
| /** |
| * Set this to false if you don't want messages about the progress of model |
| * training displayed. Alternately, you can use the overloaded version of |
| * trainModel() to conditionally enable progress messages. |
| */ |
| public static boolean PRINT_MESSAGES = true; |
| |
| /** |
| * If we are using smoothing, this is used as the "number" of times we want |
| * the trainer to imagine that it saw a feature that it actually didn't see. |
| * Defaulted to 0.1. |
| */ |
| public static double SMOOTHING_OBSERVATION = 0.1; |
| |
| /** |
| * Train a model using the GIS algorithm, assuming 100 iterations and no |
| * cutoff. |
| * |
| * @param eventStream |
| * The EventStream holding the data on which this model will be |
| * trained. |
| * @return The newly trained model, which can be used immediately or saved to |
| * disk using an opennlp.maxent.io.GISModelWriter object. |
| */ |
| public static GISModel trainModel(EventStream eventStream) throws IOException { |
| return trainModel(eventStream, 100, 0, false, PRINT_MESSAGES); |
| } |
| |
| /** |
| * Train a model using the GIS algorithm, assuming 100 iterations and no |
| * cutoff. |
| * |
| * @param eventStream |
| * The EventStream holding the data on which this model will be |
| * trained. |
| * @param smoothing |
| * Defines whether the created trainer will use smoothing while |
| * training the model. |
| * @return The newly trained model, which can be used immediately or saved to |
| * disk using an opennlp.maxent.io.GISModelWriter object. |
| */ |
| public static GISModel trainModel(EventStream eventStream, boolean smoothing) |
| throws IOException { |
| return trainModel(eventStream, 100, 0, smoothing, PRINT_MESSAGES); |
| } |
| |
| /** |
| * Train a model using the GIS algorithm. |
| * |
| * @param eventStream |
| * The EventStream holding the data on which this model will be |
| * trained. |
| * @param iterations |
| * The number of GIS iterations to perform. |
| * @param cutoff |
| * The number of times a feature must be seen in order to be relevant |
| * for training. |
| * @return The newly trained model, which can be used immediately or saved to |
| * disk using an opennlp.maxent.io.GISModelWriter object. |
| */ |
| public static GISModel trainModel(EventStream eventStream, int iterations, |
| int cutoff) throws IOException { |
| return trainModel(eventStream, iterations, cutoff, false, PRINT_MESSAGES); |
| } |
| |
| /** |
| * Train a model using the GIS algorithm. |
| * |
| * @param eventStream |
| * The EventStream holding the data on which this model will be |
| * trained. |
| * @param iterations |
| * The number of GIS iterations to perform. |
| * @param cutoff |
| * The number of times a feature must be seen in order to be relevant |
| * for training. |
| * @param smoothing |
| * Defines whether the created trainer will use smoothing while |
| * training the model. |
| * @param printMessagesWhileTraining |
| * Determines whether training status messages are written to STDOUT. |
| * @return The newly trained model, which can be used immediately or saved to |
| * disk using an opennlp.maxent.io.GISModelWriter object. |
| */ |
| public static GISModel trainModel(EventStream eventStream, int iterations, |
| int cutoff, boolean smoothing, boolean printMessagesWhileTraining) |
| throws IOException { |
| GISTrainer trainer = new GISTrainer(printMessagesWhileTraining); |
| trainer.setSmoothing(smoothing); |
| trainer.setSmoothingObservation(SMOOTHING_OBSERVATION); |
| return trainer.trainModel(eventStream, iterations, cutoff); |
| } |
| |
| /** |
| * Train a model using the GIS algorithm. |
| * |
| * @param eventStream |
| * The EventStream holding the data on which this model will be |
| * trained. |
| * @param iterations |
| * The number of GIS iterations to perform. |
| * @param cutoff |
| * The number of times a feature must be seen in order to be relevant |
| * for training. |
| * @param sigma |
| * The standard deviation for the gaussian smoother. |
| * @return The newly trained model, which can be used immediately or saved to |
| * disk using an opennlp.maxent.io.GISModelWriter object. |
| */ |
| public static GISModel trainModel(EventStream eventStream, int iterations, |
| int cutoff, double sigma) throws IOException { |
| GISTrainer trainer = new GISTrainer(PRINT_MESSAGES); |
| if (sigma > 0) |
| trainer.setGaussianSigma(sigma); |
| return trainer.trainModel(eventStream, iterations, cutoff); |
| } |
| |
| /** |
| * Train a model using the GIS algorithm. |
| * |
| * @param iterations |
| * The number of GIS iterations to perform. |
| * @param indexer |
| * The object which will be used for event compilation. |
| * @param smoothing |
| * Defines whether the created trainer will use smoothing while |
| * training the model. |
| * @return The newly trained model, which can be used immediately or saved to |
| * disk using an opennlp.maxent.io.GISModelWriter object. |
| */ |
| public static GISModel trainModel(int iterations, DataIndexer indexer, |
| boolean smoothing) { |
| return trainModel(iterations, indexer, true, smoothing, null, 0); |
| } |
| |
| /** |
| * Train a model using the GIS algorithm. |
| * |
| * @param iterations |
| * The number of GIS iterations to perform. |
| * @param indexer |
| * The object which will be used for event compilation. |
| * @return The newly trained model, which can be used immediately or saved to |
| * disk using an opennlp.maxent.io.GISModelWriter object. |
| */ |
| public static GISModel trainModel(int iterations, DataIndexer indexer) { |
| return trainModel(iterations, indexer, true, false, null, 0); |
| } |
| |
| /** |
| * Train a model using the GIS algorithm with the specified number of |
| * iterations, data indexer, and prior. |
| * |
| * @param iterations |
| * The number of GIS iterations to perform. |
| * @param indexer |
| * The object which will be used for event compilation. |
| * @param modelPrior |
| * The prior distribution for the model. |
| * @return The newly trained model, which can be used immediately or saved to |
| * disk using an opennlp.maxent.io.GISModelWriter object. |
| */ |
| public static GISModel trainModel(int iterations, DataIndexer indexer, |
| Prior modelPrior, int cutoff) { |
| return trainModel(iterations, indexer, true, false, modelPrior, cutoff); |
| } |
| |
| /** |
| * Train a model using the GIS algorithm. |
| * |
| * @param iterations |
| * The number of GIS iterations to perform. |
| * @param indexer |
| * The object which will be used for event compilation. |
| * @param printMessagesWhileTraining |
| * Determines whether training status messages are written to STDOUT. |
| * @param smoothing |
| * Defines whether the created trainer will use smoothing while |
| * training the model. |
| * @param modelPrior |
| * The prior distribution for the model. |
| * @param cutoff |
| * The number of times a predicate must occur to be used in a model. |
| * @return The newly trained model, which can be used immediately or saved to |
| * disk using an opennlp.maxent.io.GISModelWriter object. |
| */ |
| public static GISModel trainModel(int iterations, DataIndexer indexer, |
| boolean printMessagesWhileTraining, boolean smoothing, Prior modelPrior, |
| int cutoff) { |
| return trainModel(iterations, indexer, printMessagesWhileTraining, |
| smoothing, modelPrior, cutoff, 1); |
| } |
| |
| /** |
| * Train a model using the GIS algorithm. |
| * |
| * @param iterations |
| * The number of GIS iterations to perform. |
| * @param indexer |
| * The object which will be used for event compilation. |
| * @param printMessagesWhileTraining |
| * Determines whether training status messages are written to STDOUT. |
| * @param smoothing |
| * Defines whether the created trainer will use smoothing while |
| * training the model. |
| * @param modelPrior |
| * The prior distribution for the model. |
| * @param cutoff |
| * The number of times a predicate must occur to be used in a model. |
| * @return The newly trained model, which can be used immediately or saved to |
| * disk using an opennlp.maxent.io.GISModelWriter object. |
| */ |
| public static GISModel trainModel(int iterations, DataIndexer indexer, |
| boolean printMessagesWhileTraining, boolean smoothing, Prior modelPrior, |
| int cutoff, int threads) { |
| GISTrainer trainer = new GISTrainer(printMessagesWhileTraining); |
| trainer.setSmoothing(smoothing); |
| trainer.setSmoothingObservation(SMOOTHING_OBSERVATION); |
| if (modelPrior == null) { |
| modelPrior = new UniformPrior(); |
| } |
| |
| return trainer.trainModel(iterations, indexer, modelPrior, cutoff, threads); |
| } |
| } |
| |
| |
| |