blob: edd7b1385c5bc2e712e5ba220e4a5838ae83447e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.doccat;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
/**
* Interface for classes which categorize documents.
*/
public interface DocumentCategorizer {
/**
* Categorizes the given text, provided in separate tokens.
*
* @param text the tokens of text to categorize
* @return per category probabilities
*/
double[] categorize(String text[]);
/**
* Categorizes the given text, provided in separate tokens.
*
* @param text the tokens of text to categorize
* @param extraInformation optional extra information to pass for evaluation
* @return per category probabilities
*/
double[] categorize(String text[], Map<String, Object> extraInformation);
/**
* get the best category from previously generated outcome probabilities
*
* @param outcome a vector of outcome probabilities
* @return the best category String
*/
String getBestCategory(double[] outcome);
/**
* get the index of a certain category
*
* @param category the category
* @return an index
*/
int getIndex(String category);
/**
* get the category at a given index
*
* @param index the index
* @return a category
*/
String getCategory(int index);
/**
* get the number of categories
*
* @return the no. of categories
*/
int getNumberOfCategories();
/**
* categorize a piece of text
*
* @param documentText the text to categorize
* @return the probabilities of each category (sum up to 1)
*/
double[] categorize(String documentText);
/**
* categorize a piece of text, providing extra metadata.
*
* @param documentText the text to categorize
* @param extraInformation extra metadata
* @return the probabilities of each category (sum up to 1)
*/
double[] categorize(String documentText, Map<String, Object> extraInformation);
/**
* get the name of the category associated with the given probabilties
*
* @param results the probabilities of each category
* @return the name of the outcome
*/
String getAllResults(double results[]);
/**
* Returns a map in which the key is the category name and the value is the score
*
* @param text the input text to classify
* @return a map with the score as a key. The value is a Set of categories with the score.
*/
Map<String, Double> scoreMap(String text);
/**
* Get a map of the scores sorted in ascending aorder together with their associated categories.
* Many categories can have the same score, hence the Set as value
*
* @param text the input text to classify
* @return a map with the score as a key. The value is a Set of categories with the score.
*/
SortedMap<Double, Set<String>> sortedScoreMap(String text);
}