| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| |
| package opennlp.tools.cmdline; |
| |
| import java.util.Collections; |
| import java.util.LinkedHashMap; |
| import java.util.LinkedList; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Set; |
| |
| import opennlp.tools.cmdline.chunker.ChunkerConverterTool; |
| import opennlp.tools.cmdline.chunker.ChunkerCrossValidatorTool; |
| import opennlp.tools.cmdline.chunker.ChunkerEvaluatorTool; |
| import opennlp.tools.cmdline.chunker.ChunkerMETool; |
| import opennlp.tools.cmdline.chunker.ChunkerTrainerTool; |
| import opennlp.tools.cmdline.dictionary.DictionaryBuilderTool; |
| import opennlp.tools.cmdline.doccat.DoccatConverterTool; |
| import opennlp.tools.cmdline.doccat.DoccatCrossValidatorTool; |
| import opennlp.tools.cmdline.doccat.DoccatEvaluatorTool; |
| import opennlp.tools.cmdline.doccat.DoccatTool; |
| import opennlp.tools.cmdline.doccat.DoccatTrainerTool; |
| import opennlp.tools.cmdline.entitylinker.EntityLinkerTool; |
| import opennlp.tools.cmdline.langdetect.LanguageDetectorConverterTool; |
| import opennlp.tools.cmdline.langdetect.LanguageDetectorCrossValidatorTool; |
| import opennlp.tools.cmdline.langdetect.LanguageDetectorEvaluatorTool; |
| import opennlp.tools.cmdline.langdetect.LanguageDetectorTool; |
| import opennlp.tools.cmdline.langdetect.LanguageDetectorTrainerTool; |
| import opennlp.tools.cmdline.languagemodel.NGramLanguageModelTool; |
| import opennlp.tools.cmdline.lemmatizer.LemmatizerEvaluatorTool; |
| import opennlp.tools.cmdline.lemmatizer.LemmatizerMETool; |
| import opennlp.tools.cmdline.lemmatizer.LemmatizerTrainerTool; |
| import opennlp.tools.cmdline.namefind.CensusDictionaryCreatorTool; |
| import opennlp.tools.cmdline.namefind.TokenNameFinderConverterTool; |
| import opennlp.tools.cmdline.namefind.TokenNameFinderCrossValidatorTool; |
| import opennlp.tools.cmdline.namefind.TokenNameFinderEvaluatorTool; |
| import opennlp.tools.cmdline.namefind.TokenNameFinderTool; |
| import opennlp.tools.cmdline.namefind.TokenNameFinderTrainerTool; |
| import opennlp.tools.cmdline.parser.BuildModelUpdaterTool; |
| import opennlp.tools.cmdline.parser.CheckModelUpdaterTool; |
| import opennlp.tools.cmdline.parser.ParserConverterTool; |
| import opennlp.tools.cmdline.parser.ParserEvaluatorTool; |
| import opennlp.tools.cmdline.parser.ParserTool; |
| import opennlp.tools.cmdline.parser.ParserTrainerTool; |
| import opennlp.tools.cmdline.parser.TaggerModelReplacerTool; |
| import opennlp.tools.cmdline.postag.POSTaggerConverterTool; |
| import opennlp.tools.cmdline.postag.POSTaggerCrossValidatorTool; |
| import opennlp.tools.cmdline.postag.POSTaggerEvaluatorTool; |
| import opennlp.tools.cmdline.postag.POSTaggerTrainerTool; |
| import opennlp.tools.cmdline.sentdetect.SentenceDetectorConverterTool; |
| import opennlp.tools.cmdline.sentdetect.SentenceDetectorCrossValidatorTool; |
| import opennlp.tools.cmdline.sentdetect.SentenceDetectorEvaluatorTool; |
| import opennlp.tools.cmdline.sentdetect.SentenceDetectorTool; |
| import opennlp.tools.cmdline.sentdetect.SentenceDetectorTrainerTool; |
| import opennlp.tools.cmdline.tokenizer.DictionaryDetokenizerTool; |
| import opennlp.tools.cmdline.tokenizer.SimpleTokenizerTool; |
| import opennlp.tools.cmdline.tokenizer.TokenizerConverterTool; |
| import opennlp.tools.cmdline.tokenizer.TokenizerCrossValidatorTool; |
| import opennlp.tools.cmdline.tokenizer.TokenizerMEEvaluatorTool; |
| import opennlp.tools.cmdline.tokenizer.TokenizerMETool; |
| import opennlp.tools.cmdline.tokenizer.TokenizerTrainerTool; |
| import opennlp.tools.util.Version; |
| |
| public final class CLI { |
| |
| public static final String CMD = "opennlp"; |
| |
| private static Map<String, CmdLineTool> toolLookupMap; |
| |
| static { |
| toolLookupMap = new LinkedHashMap<>(); |
| |
| List<CmdLineTool> tools = new LinkedList<>(); |
| |
| // Document Categorizer |
| tools.add(new DoccatTool()); |
| tools.add(new DoccatTrainerTool()); |
| tools.add(new DoccatEvaluatorTool()); |
| tools.add(new DoccatCrossValidatorTool()); |
| tools.add(new DoccatConverterTool()); |
| |
| // Language Detector |
| tools.add(new LanguageDetectorTool()); |
| tools.add(new LanguageDetectorTrainerTool()); |
| tools.add(new LanguageDetectorConverterTool()); |
| tools.add(new LanguageDetectorCrossValidatorTool()); |
| tools.add(new LanguageDetectorEvaluatorTool()); |
| |
| // Dictionary Builder |
| tools.add(new DictionaryBuilderTool()); |
| |
| // Tokenizer |
| tools.add(new SimpleTokenizerTool()); |
| tools.add(new TokenizerMETool()); |
| tools.add(new TokenizerTrainerTool()); |
| tools.add(new TokenizerMEEvaluatorTool()); |
| tools.add(new TokenizerCrossValidatorTool()); |
| tools.add(new TokenizerConverterTool()); |
| tools.add(new DictionaryDetokenizerTool()); |
| |
| // Sentence detector |
| tools.add(new SentenceDetectorTool()); |
| tools.add(new SentenceDetectorTrainerTool()); |
| tools.add(new SentenceDetectorEvaluatorTool()); |
| tools.add(new SentenceDetectorCrossValidatorTool()); |
| tools.add(new SentenceDetectorConverterTool()); |
| |
| // Name Finder |
| tools.add(new TokenNameFinderTool()); |
| tools.add(new TokenNameFinderTrainerTool()); |
| tools.add(new TokenNameFinderEvaluatorTool()); |
| tools.add(new TokenNameFinderCrossValidatorTool()); |
| tools.add(new TokenNameFinderConverterTool()); |
| tools.add(new CensusDictionaryCreatorTool()); |
| |
| |
| // POS Tagger |
| tools.add(new opennlp.tools.cmdline.postag.POSTaggerTool()); |
| tools.add(new POSTaggerTrainerTool()); |
| tools.add(new POSTaggerEvaluatorTool()); |
| tools.add(new POSTaggerCrossValidatorTool()); |
| tools.add(new POSTaggerConverterTool()); |
| |
| //Lemmatizer |
| tools.add(new LemmatizerMETool()); |
| tools.add(new LemmatizerTrainerTool()); |
| tools.add(new LemmatizerEvaluatorTool()); |
| |
| // Chunker |
| tools.add(new ChunkerMETool()); |
| tools.add(new ChunkerTrainerTool()); |
| tools.add(new ChunkerEvaluatorTool()); |
| tools.add(new ChunkerCrossValidatorTool()); |
| tools.add(new ChunkerConverterTool()); |
| |
| // Parser |
| tools.add(new ParserTool()); |
| tools.add(new ParserTrainerTool()); // trains everything |
| tools.add(new ParserEvaluatorTool()); |
| tools.add(new ParserConverterTool()); // trains everything |
| tools.add(new BuildModelUpdaterTool()); // re-trains build model |
| tools.add(new CheckModelUpdaterTool()); // re-trains build model |
| tools.add(new TaggerModelReplacerTool()); |
| |
| // Entity Linker |
| tools.add(new EntityLinkerTool()); |
| |
| // Language Model |
| tools.add(new NGramLanguageModelTool()); |
| |
| for (CmdLineTool tool : tools) { |
| toolLookupMap.put(tool.getName(), tool); |
| } |
| |
| toolLookupMap = Collections.unmodifiableMap(toolLookupMap); |
| } |
| |
| /** |
| * @return a set which contains all tool names |
| */ |
| public static Set<String> getToolNames() { |
| return toolLookupMap.keySet(); |
| } |
| |
| /** |
| * @return a read only map with tool names and instances |
| */ |
| public static Map<String, CmdLineTool> getToolLookupMap() { |
| return toolLookupMap; |
| } |
| |
| private static void usage() { |
| System.out.print("OpenNLP " + Version.currentVersion().toString() + ". "); |
| System.out.println("Usage: " + CMD + " TOOL"); |
| System.out.println("where TOOL is one of:"); |
| |
| // distance of tool name from line start |
| int numberOfSpaces = -1; |
| for (String toolName : toolLookupMap.keySet()) { |
| if (toolName.length() > numberOfSpaces) { |
| numberOfSpaces = toolName.length(); |
| } |
| } |
| numberOfSpaces = numberOfSpaces + 4; |
| |
| for (CmdLineTool tool : toolLookupMap.values()) { |
| |
| System.out.print(" " + tool.getName()); |
| |
| for (int i = 0; i < StrictMath.abs(tool.getName().length() - numberOfSpaces); i++) { |
| System.out.print(" "); |
| } |
| |
| System.out.println(tool.getShortDescription()); |
| } |
| |
| System.out.println("All tools print help when invoked with help parameter"); |
| System.out.println("Example: opennlp SimpleTokenizer help"); |
| } |
| |
| public static void main(String[] args) { |
| |
| if (args.length == 0) { |
| usage(); |
| System.exit(0); |
| } |
| |
| final long startTime = System.currentTimeMillis(); |
| String[] toolArguments = new String[args.length - 1]; |
| System.arraycopy(args, 1, toolArguments, 0, toolArguments.length); |
| |
| String toolName = args[0]; |
| |
| //check for format |
| String formatName = StreamFactoryRegistry.DEFAULT_FORMAT; |
| int idx = toolName.indexOf("."); |
| if (-1 < idx) { |
| formatName = toolName.substring(idx + 1); |
| toolName = toolName.substring(0, idx); |
| } |
| CmdLineTool tool = toolLookupMap.get(toolName); |
| |
| try { |
| if (null == tool) { |
| throw new TerminateToolException(1, "Tool " + toolName + " is not found."); |
| } |
| |
| if ((0 == toolArguments.length && tool.hasParams()) || |
| 0 < toolArguments.length && "help".equals(toolArguments[0])) { |
| if (tool instanceof TypedCmdLineTool) { |
| System.out.println(((TypedCmdLineTool<?>) tool).getHelp(formatName)); |
| } else if (tool instanceof BasicCmdLineTool) { |
| System.out.println(tool.getHelp()); |
| } |
| |
| System.exit(0); |
| } |
| |
| if (tool instanceof TypedCmdLineTool) { |
| ((TypedCmdLineTool<?>) tool).run(formatName, toolArguments); |
| } else if (tool instanceof BasicCmdLineTool) { |
| if (-1 == idx) { |
| ((BasicCmdLineTool) tool).run(toolArguments); |
| } else { |
| throw new TerminateToolException(1, "Tool " + toolName + " does not support formats."); |
| } |
| } else { |
| throw new TerminateToolException(1, "Tool " + toolName + " is not supported."); |
| } |
| } |
| catch (TerminateToolException e) { |
| |
| if (e.getMessage() != null) { |
| System.err.println(e.getMessage()); |
| } |
| |
| if (e.getCause() != null) { |
| System.err.println(e.getCause().getMessage()); |
| e.getCause().printStackTrace(System.err); |
| } |
| |
| System.exit(e.getCode()); |
| } |
| |
| final long endTime = System.currentTimeMillis(); |
| System.err.format("Execution time: %.3f seconds\n", (endTime - startTime) / 1000.0); |
| } |
| } |