OPENNLP-659 - added missing javadocs, minor tweaks git-svn-id: https://svn.apache.org/repos/asf/opennlp/trunk@1734210 13f79535-47bb-0310-9956-ffa450edef68

commit: 0236fe90909c002d5e3fe5538e23caa0aca0853d [log] [tgz]
author: Tommaso Teofili <tommaso@apache.org> Wed Mar 09 09:58:42 2016 +0000
committer: Tommaso Teofili <tommaso@apache.org> Wed Mar 09 09:58:42 2016 +0000
tree: 6f9687f5df7fdad7f1dbaeffa9984cbdc1e2bd33
parent: 0526adb2016ae3e6cd57f5bf9932221c3be6ca2a [diff]
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatEvaluationMonitor.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatEvaluationMonitor.java
index 1a1096a..f7b5a6f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatEvaluationMonitor.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatEvaluationMonitor.java

@@ -19,6 +19,9 @@
 
 import opennlp.tools.util.eval.EvaluationMonitor;
 
+/**
+ * {@link EvaluationMonitor} for doccat.
+ */
 public interface DoccatEvaluationMonitor extends
     EvaluationMonitor<DocumentSample> {
 

diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java
index fbe2477..9b30d95 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java

@@ -50,8 +50,8 @@
    * Creates a {@link DoccatFactory}. Use this constructor to programmatically
    * create a factory.
    *
-   * @param tokenizer
-   * @param featureGenerators
+   * @param tokenizer         the tokenizer
+   * @param featureGenerators the feature generators
    */
   public DoccatFactory(Tokenizer tokenizer, FeatureGenerator[] featureGenerators) {
     this.init(tokenizer, featureGenerators);
@@ -98,7 +98,7 @@
   }
 
   public static DoccatFactory create(String subclassName, Tokenizer tokenizer,
-      FeatureGenerator[] featureGenerators) throws InvalidFormatException {
+                                     FeatureGenerator[] featureGenerators) throws InvalidFormatException {
     if (subclassName == null) {
       // will create the default factory
       return new DoccatFactory(tokenizer, featureGenerators);
@@ -140,7 +140,7 @@
       }
       if (featureGenerators == null) { // could not load using artifact provider
         // load bag of words as default
-        FeatureGenerator[] bow = { new BagOfWordsFeatureGenerator() };
+        FeatureGenerator[] bow = {new BagOfWordsFeatureGenerator()};
         this.featureGenerators = bow;
       }
     }

diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java
index a4c7db3..b62d8eb 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java

@@ -35,9 +35,9 @@
 
     Collection<String> context = new LinkedList<String>();
 
-    for (int i = 0; i < mFeatureGenerators.length; i++) {
+    for (FeatureGenerator mFeatureGenerator : mFeatureGenerators) {
       Collection<String> extractedFeatures =
-          mFeatureGenerators[i].extractFeatures(text, extraInformation);
+          mFeatureGenerator.extractFeatures(text, extraInformation);
       context.addAll(extractedFeatures);
     }
 

diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEventStream.java
index 89ea768..18084c0 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEventStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEventStream.java

@@ -30,11 +30,11 @@
   private DocumentCategorizerContextGenerator mContextGenerator;
 
   /**
-   * Initializes the current instance.
+   * Initializes the current instance via samples and feature generators.
    *
    * @param data {@link ObjectStream} of {@link DocumentSample}s
    *
-   * @param featureGenerators
+   * @param featureGenerators the feature generators
    */
   public DocumentCategorizerEventStream(ObjectStream<DocumentSample> data, FeatureGenerator... featureGenerators) {
     super(data);

diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
index 447232c..b1b9e6e 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java

@@ -48,13 +48,12 @@
   private DocumentCategorizerContextGenerator mContextGenerator;
 
   /**
-   * Initializes a the current instance with a doccat model and custom feature
+   * Initializes the current instance with a doccat model and custom feature
    * generation. The feature generation must be identical to the configuration
    * at training time.
    *
-   * @param model
-   * @param featureGenerators
-   *
+   * @param model             the doccat model
+   * @param featureGenerators the feature generators
    * @deprecated train a {@link DoccatModel} with a specific
    * {@link DoccatFactory} to customize the {@link FeatureGenerator}s
    */
@@ -67,12 +66,12 @@
    * Initializes the current instance with a doccat model. Default feature
    * generation is used.
    *
-   * @param model
+   * @param model the doccat model
    */
   public DocumentCategorizerME(DoccatModel model) {
     this.model = model;
     this.mContextGenerator = new DocumentCategorizerContextGenerator(this.model
-            .getFactory().getFeatureGenerators());
+        .getFactory().getFeatureGenerators());
   }
 
   @Override
@@ -84,7 +83,7 @@
   /**
    * Categorizes the given text.
    *
-   * @param text
+   * @param text the text to categorize
    */
   public double[] categorize(String text[]) {
     return this.categorize(text, Collections.<String, Object>emptyMap());
@@ -97,7 +96,7 @@
    */
   @Override
   public double[] categorize(String documentText,
-      Map<String, Object> extraInformation) {
+                             Map<String, Object> extraInformation) {
     Tokenizer tokenizer = model.getFactory().getTokenizer();
     return categorize(tokenizer.tokenize(documentText), extraInformation);
   }
@@ -109,14 +108,15 @@
   public double[] categorize(String documentText) {
     Tokenizer tokenizer = model.getFactory().getTokenizer();
     return categorize(tokenizer.tokenize(documentText),
-        Collections.<String, Object> emptyMap());
+        Collections.<String, Object>emptyMap());
   }
 
-/**
- * Returns a map in which the key is the category name and the value is the score
- * @param text the input text to classify
- * @return
- */
+  /**
+   * Returns a map in which the key is the category name and the value is the score
+   *
+   * @param text the input text to classify
+   * @return the score map
+   */
   public Map<String, Double> scoreMap(String text) {
     Map<String, Double> probDist = new HashMap<String, Double>();
 
@@ -129,12 +129,14 @@
     return probDist;
 
   }
-/**
- * Returns a map with the score as a key in ascendng order. The value is a Set of categories with the score.
- * Many categories can have the same score, hence the Set as value
- * @param text the input text to classify
- * @return
- */
+
+  /**
+   * Returns a map with the score as a key in ascendng order. The value is a Set of categories with the score.
+   * Many categories can have the same score, hence the Set as value
+   *
+   * @param text the input text to classify
+   * @return the sorted score map
+   */
   public SortedMap<Double, Set<String>> sortedScoreMap(String text) {
     SortedMap<Double, Set<String>> descendingMap = new TreeMap<Double, Set<String>>();
     double[] categorize = categorize(text);
@@ -179,8 +181,8 @@
    * instead.
    */
   public static DoccatModel train(String languageCode, ObjectStream<DocumentSample> samples,
-          TrainingParameters mlParams, FeatureGenerator... featureGenerators)
-          throws IOException {
+                                  TrainingParameters mlParams, FeatureGenerator... featureGenerators)
+      throws IOException {
 
     if (featureGenerators.length == 0) {
       featureGenerators = new FeatureGenerator[]{defaultFeatureGenerator};
@@ -189,21 +191,21 @@
     Map<String, String> manifestInfoEntries = new HashMap<String, String>();
 
     MaxentModel model = TrainUtil.train(
-            new DocumentCategorizerEventStream(samples, featureGenerators),
-            mlParams.getSettings(), manifestInfoEntries);
+        new DocumentCategorizerEventStream(samples, featureGenerators),
+        mlParams.getSettings(), manifestInfoEntries);
 
     return new DoccatModel(languageCode, model, manifestInfoEntries);
   }
 
   public static DoccatModel train(String languageCode, ObjectStream<DocumentSample> samples,
-          TrainingParameters mlParams, DoccatFactory factory)
-          throws IOException {
+                                  TrainingParameters mlParams, DoccatFactory factory)
+      throws IOException {
 
     Map<String, String> manifestInfoEntries = new HashMap<String, String>();
 
     MaxentModel model = TrainUtil.train(
-            new DocumentCategorizerEventStream(samples, factory.getFeatureGenerators()),
-            mlParams.getSettings(), manifestInfoEntries);
+        new DocumentCategorizerEventStream(samples, factory.getFeatureGenerators()),
+        mlParams.getSettings(), manifestInfoEntries);
 
     return new DoccatModel(languageCode, model, manifestInfoEntries, factory);
   }
@@ -211,14 +213,11 @@
   /**
    * Trains a doccat model with default feature generation.
    *
-   * @param languageCode
-   * @param samples
-   *
+   * @param languageCode the language code
+   * @param samples      the samples
    * @return the trained doccat model
-   *
    * @throws IOException
    * @throws ObjectStreamException
-   *
    * @deprecated Use
    * {@link #train(String, ObjectStream, TrainingParameters, DoccatFactory)}
    * instead.

diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/FeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/FeatureGenerator.java
index 0df09b3..2ed5a30 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/FeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/FeatureGenerator.java

@@ -25,5 +25,13 @@
  * Interface for generating features for document categorization.
  */
 public interface FeatureGenerator {
-  public Collection<String> extractFeatures(String[] text, Map<String, Object> extraInformation);
+
+  /**
+   * Extract features from given text fragments
+   *
+   * @param text             the text fragments to extract features from
+   * @param extraInformation optional extra information to be used by the feature generator
+   * @return a collection of features
+   */
+  Collection<String> extractFeatures(String[] text, Map<String, Object> extraInformation);
 }
commit	0236fe90909c002d5e3fe5538e23caa0aca0853d	[log] [tgz]
author	Tommaso Teofili <tommaso@apache.org>	Wed Mar 09 09:58:42 2016 +0000
committer	Tommaso Teofili <tommaso@apache.org>	Wed Mar 09 09:58:42 2016 +0000
tree	6f9687f5df7fdad7f1dbaeffa9984cbdc1e2bd33
parent	0526adb2016ae3e6cd57f5bf9932221c3be6ca2a [diff]