OPENNLP-659 - added missing javadocs, minor tweaks
git-svn-id: https://svn.apache.org/repos/asf/opennlp/trunk@1734210 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatEvaluationMonitor.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatEvaluationMonitor.java
index 1a1096a..f7b5a6f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatEvaluationMonitor.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatEvaluationMonitor.java
@@ -19,6 +19,9 @@
import opennlp.tools.util.eval.EvaluationMonitor;
+/**
+ * {@link EvaluationMonitor} for doccat.
+ */
public interface DoccatEvaluationMonitor extends
EvaluationMonitor<DocumentSample> {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java
index fbe2477..9b30d95 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java
@@ -50,8 +50,8 @@
* Creates a {@link DoccatFactory}. Use this constructor to programmatically
* create a factory.
*
- * @param tokenizer
- * @param featureGenerators
+ * @param tokenizer the tokenizer
+ * @param featureGenerators the feature generators
*/
public DoccatFactory(Tokenizer tokenizer, FeatureGenerator[] featureGenerators) {
this.init(tokenizer, featureGenerators);
@@ -98,7 +98,7 @@
}
public static DoccatFactory create(String subclassName, Tokenizer tokenizer,
- FeatureGenerator[] featureGenerators) throws InvalidFormatException {
+ FeatureGenerator[] featureGenerators) throws InvalidFormatException {
if (subclassName == null) {
// will create the default factory
return new DoccatFactory(tokenizer, featureGenerators);
@@ -140,7 +140,7 @@
}
if (featureGenerators == null) { // could not load using artifact provider
// load bag of words as default
- FeatureGenerator[] bow = { new BagOfWordsFeatureGenerator() };
+ FeatureGenerator[] bow = {new BagOfWordsFeatureGenerator()};
this.featureGenerators = bow;
}
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java
index a4c7db3..b62d8eb 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java
@@ -35,9 +35,9 @@
Collection<String> context = new LinkedList<String>();
- for (int i = 0; i < mFeatureGenerators.length; i++) {
+ for (FeatureGenerator mFeatureGenerator : mFeatureGenerators) {
Collection<String> extractedFeatures =
- mFeatureGenerators[i].extractFeatures(text, extraInformation);
+ mFeatureGenerator.extractFeatures(text, extraInformation);
context.addAll(extractedFeatures);
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEventStream.java
index 89ea768..18084c0 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEventStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEventStream.java
@@ -30,11 +30,11 @@
private DocumentCategorizerContextGenerator mContextGenerator;
/**
- * Initializes the current instance.
+ * Initializes the current instance via samples and feature generators.
*
* @param data {@link ObjectStream} of {@link DocumentSample}s
*
- * @param featureGenerators
+ * @param featureGenerators the feature generators
*/
public DocumentCategorizerEventStream(ObjectStream<DocumentSample> data, FeatureGenerator... featureGenerators) {
super(data);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
index 447232c..b1b9e6e 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
@@ -48,13 +48,12 @@
private DocumentCategorizerContextGenerator mContextGenerator;
/**
- * Initializes a the current instance with a doccat model and custom feature
+ * Initializes the current instance with a doccat model and custom feature
* generation. The feature generation must be identical to the configuration
* at training time.
*
- * @param model
- * @param featureGenerators
- *
+ * @param model the doccat model
+ * @param featureGenerators the feature generators
* @deprecated train a {@link DoccatModel} with a specific
* {@link DoccatFactory} to customize the {@link FeatureGenerator}s
*/
@@ -67,12 +66,12 @@
* Initializes the current instance with a doccat model. Default feature
* generation is used.
*
- * @param model
+ * @param model the doccat model
*/
public DocumentCategorizerME(DoccatModel model) {
this.model = model;
this.mContextGenerator = new DocumentCategorizerContextGenerator(this.model
- .getFactory().getFeatureGenerators());
+ .getFactory().getFeatureGenerators());
}
@Override
@@ -84,7 +83,7 @@
/**
* Categorizes the given text.
*
- * @param text
+ * @param text the text to categorize
*/
public double[] categorize(String text[]) {
return this.categorize(text, Collections.<String, Object>emptyMap());
@@ -97,7 +96,7 @@
*/
@Override
public double[] categorize(String documentText,
- Map<String, Object> extraInformation) {
+ Map<String, Object> extraInformation) {
Tokenizer tokenizer = model.getFactory().getTokenizer();
return categorize(tokenizer.tokenize(documentText), extraInformation);
}
@@ -109,14 +108,15 @@
public double[] categorize(String documentText) {
Tokenizer tokenizer = model.getFactory().getTokenizer();
return categorize(tokenizer.tokenize(documentText),
- Collections.<String, Object> emptyMap());
+ Collections.<String, Object>emptyMap());
}
-/**
- * Returns a map in which the key is the category name and the value is the score
- * @param text the input text to classify
- * @return
- */
+ /**
+ * Returns a map in which the key is the category name and the value is the score
+ *
+ * @param text the input text to classify
+ * @return the score map
+ */
public Map<String, Double> scoreMap(String text) {
Map<String, Double> probDist = new HashMap<String, Double>();
@@ -129,12 +129,14 @@
return probDist;
}
-/**
- * Returns a map with the score as a key in ascendng order. The value is a Set of categories with the score.
- * Many categories can have the same score, hence the Set as value
- * @param text the input text to classify
- * @return
- */
+
+ /**
+ * Returns a map with the score as a key in ascendng order. The value is a Set of categories with the score.
+ * Many categories can have the same score, hence the Set as value
+ *
+ * @param text the input text to classify
+ * @return the sorted score map
+ */
public SortedMap<Double, Set<String>> sortedScoreMap(String text) {
SortedMap<Double, Set<String>> descendingMap = new TreeMap<Double, Set<String>>();
double[] categorize = categorize(text);
@@ -179,8 +181,8 @@
* instead.
*/
public static DoccatModel train(String languageCode, ObjectStream<DocumentSample> samples,
- TrainingParameters mlParams, FeatureGenerator... featureGenerators)
- throws IOException {
+ TrainingParameters mlParams, FeatureGenerator... featureGenerators)
+ throws IOException {
if (featureGenerators.length == 0) {
featureGenerators = new FeatureGenerator[]{defaultFeatureGenerator};
@@ -189,21 +191,21 @@
Map<String, String> manifestInfoEntries = new HashMap<String, String>();
MaxentModel model = TrainUtil.train(
- new DocumentCategorizerEventStream(samples, featureGenerators),
- mlParams.getSettings(), manifestInfoEntries);
+ new DocumentCategorizerEventStream(samples, featureGenerators),
+ mlParams.getSettings(), manifestInfoEntries);
return new DoccatModel(languageCode, model, manifestInfoEntries);
}
public static DoccatModel train(String languageCode, ObjectStream<DocumentSample> samples,
- TrainingParameters mlParams, DoccatFactory factory)
- throws IOException {
+ TrainingParameters mlParams, DoccatFactory factory)
+ throws IOException {
Map<String, String> manifestInfoEntries = new HashMap<String, String>();
MaxentModel model = TrainUtil.train(
- new DocumentCategorizerEventStream(samples, factory.getFeatureGenerators()),
- mlParams.getSettings(), manifestInfoEntries);
+ new DocumentCategorizerEventStream(samples, factory.getFeatureGenerators()),
+ mlParams.getSettings(), manifestInfoEntries);
return new DoccatModel(languageCode, model, manifestInfoEntries, factory);
}
@@ -211,14 +213,11 @@
/**
* Trains a doccat model with default feature generation.
*
- * @param languageCode
- * @param samples
- *
+ * @param languageCode the language code
+ * @param samples the samples
* @return the trained doccat model
- *
* @throws IOException
* @throws ObjectStreamException
- *
* @deprecated Use
* {@link #train(String, ObjectStream, TrainingParameters, DoccatFactory)}
* instead.
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/FeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/FeatureGenerator.java
index 0df09b3..2ed5a30 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/FeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/FeatureGenerator.java
@@ -25,5 +25,13 @@
* Interface for generating features for document categorization.
*/
public interface FeatureGenerator {
- public Collection<String> extractFeatures(String[] text, Map<String, Object> extraInformation);
+
+ /**
+ * Extract features from given text fragments
+ *
+ * @param text the text fragments to extract features from
+ * @param extraInformation optional extra information to be used by the feature generator
+ * @return a collection of features
+ */
+ Collection<String> extractFeatures(String[] text, Map<String, Object> extraInformation);
}