| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package opennlp.tools.coref; |
| |
| import java.io.BufferedReader; |
| import java.io.DataInputStream; |
| import java.io.File; |
| import java.io.FileInputStream; |
| import java.io.FileOutputStream; |
| import java.io.FileReader; |
| import java.io.IOException; |
| import java.util.zip.GZIPInputStream; |
| |
| import opennlp.tools.dictionary.Dictionary; |
| import opennlp.tools.ml.maxent.io.BinaryGISModelReader; |
| import opennlp.tools.ml.model.AbstractModel; |
| import opennlp.tools.util.StringList; |
| import opennlp.tools.util.model.BaseModel; |
| |
| public class CorefModel extends BaseModel { |
| |
| private static final String COMPONENT_NAME = "Coref"; |
| |
| private static final String MALE_NAMES_DICTIONARY_ENTRY_NAME = "maleNames.dictionary"; |
| |
| private static final String FEMALE_NAMES_DICTIONARY_ENTRY_NAME = "femaleNames.dictionary"; |
| |
| private static final String NUMBER_MODEL_ENTRY_NAME = "number.model"; |
| |
| // private Map<String, Set<String>> acronyms; |
| |
| private static final String COMMON_NOUN_RESOLVER_MODEL_ENTRY_NAME = |
| "commonNounResolver.model"; |
| |
| private static final String DEFINITE_NOUN_RESOLVER_MODEL_ENTRY_NAME = |
| "definiteNounResolver.model"; |
| |
| private static final String SPEECH_PRONOUN_RESOLVER_MODEL_ENTRY_NAME = |
| "speechPronounResolver.model"; |
| |
| // TODO: Add IModel |
| |
| private static final String PLURAL_NOUN_RESOLVER_MODEL_ENTRY_NAME = |
| "pluralNounResolver.model"; |
| |
| private static final String SINGULAR_PRONOUN_RESOLVER_MODEL_ENTRY_NAME = |
| "singularPronounResolver.model"; |
| |
| private static final String PROPER_NOUN_RESOLVER_MODEL_ENTRY_NAME = |
| "properNounResolver.model"; |
| |
| private static final String SIM_MODEL_ENTRY_NAME = "sim.model"; |
| |
| private static final String PLURAL_PRONOUN_RESOLVER_MODEL_ENTRY_NAME = |
| "pluralPronounResolver.model"; |
| |
| public CorefModel(String languageCode, String project) throws IOException { |
| super(COMPONENT_NAME, languageCode, null); |
| |
| artifactMap.put(MALE_NAMES_DICTIONARY_ENTRY_NAME, |
| readNames(project + File.separator + "gen.mas")); |
| |
| artifactMap.put(FEMALE_NAMES_DICTIONARY_ENTRY_NAME, |
| readNames(project + File.separator + "gen.fem")); |
| |
| // TODO: Create acronyms |
| |
| artifactMap.put(NUMBER_MODEL_ENTRY_NAME, |
| createModel(project + File.separator + "num.bin.gz")); |
| |
| artifactMap.put(COMMON_NOUN_RESOLVER_MODEL_ENTRY_NAME, |
| createModel(project + File.separator + "cmodel.bin.gz")); |
| |
| artifactMap.put(DEFINITE_NOUN_RESOLVER_MODEL_ENTRY_NAME, |
| createModel(project + File.separator + "defmodel.bin.gz")); |
| |
| |
| artifactMap.put(SPEECH_PRONOUN_RESOLVER_MODEL_ENTRY_NAME, |
| createModel(project + File.separator + "fmodel.bin.gz")); |
| |
| // TODO: IModel |
| |
| artifactMap.put(PLURAL_NOUN_RESOLVER_MODEL_ENTRY_NAME, |
| createModel(project + File.separator + "plmodel.bin.gz")); |
| |
| artifactMap.put(SINGULAR_PRONOUN_RESOLVER_MODEL_ENTRY_NAME, |
| createModel(project + File.separator + "pmodel.bin.gz")); |
| |
| artifactMap.put(PROPER_NOUN_RESOLVER_MODEL_ENTRY_NAME, |
| createModel(project + File.separator + "pnmodel.bin.gz")); |
| |
| artifactMap.put(SIM_MODEL_ENTRY_NAME, |
| createModel(project + File.separator + "sim.bin.gz")); |
| |
| artifactMap.put(PLURAL_PRONOUN_RESOLVER_MODEL_ENTRY_NAME, |
| createModel(project + File.separator + "tmodel.bin.gz")); |
| |
| checkArtifactMap(); |
| } |
| |
| private AbstractModel createModel(String fileName) throws IOException { |
| return new BinaryGISModelReader(new DataInputStream(new GZIPInputStream( |
| new FileInputStream(fileName)))).getModel(); |
| } |
| |
| private static Dictionary readNames(String nameFile) throws IOException { |
| Dictionary names = new Dictionary(); |
| |
| BufferedReader nameReader = new BufferedReader(new FileReader(nameFile)); |
| for (String line = nameReader.readLine(); line != null; line = nameReader.readLine()) { |
| names.put(new StringList(line)); |
| } |
| |
| return names; |
| } |
| |
| public Dictionary getMaleNames() { |
| return (Dictionary) artifactMap.get(MALE_NAMES_DICTIONARY_ENTRY_NAME); |
| } |
| |
| public Dictionary getFemaleNames() { |
| return (Dictionary) artifactMap.get(FEMALE_NAMES_DICTIONARY_ENTRY_NAME); |
| } |
| |
| public AbstractModel getNumberModel() { |
| return (AbstractModel) artifactMap.get(NUMBER_MODEL_ENTRY_NAME); |
| } |
| |
| // public AcronymDictionary getAcronyms() { |
| // return null; |
| // } |
| |
| public AbstractModel getCommonNounResolverModel() { |
| return (AbstractModel) artifactMap.get(COMMON_NOUN_RESOLVER_MODEL_ENTRY_NAME); |
| } |
| |
| public AbstractModel getDefiniteNounResolverModel() { |
| return (AbstractModel) artifactMap.get(DEFINITE_NOUN_RESOLVER_MODEL_ENTRY_NAME); |
| } |
| |
| public AbstractModel getSpeechPronounResolverModel() { |
| return (AbstractModel) artifactMap.get(SPEECH_PRONOUN_RESOLVER_MODEL_ENTRY_NAME); |
| } |
| |
| // TODO: Where is this model used ? |
| // public AbstractModel getIModel() { |
| // return null; |
| // } |
| |
| public AbstractModel getPluralNounResolverModel() { |
| return (AbstractModel) artifactMap.get(PLURAL_NOUN_RESOLVER_MODEL_ENTRY_NAME); |
| } |
| |
| public AbstractModel getSingularPronounResolverModel() { |
| return (AbstractModel) artifactMap.get(SINGULAR_PRONOUN_RESOLVER_MODEL_ENTRY_NAME); |
| } |
| |
| public AbstractModel getProperNounResolverModel() { |
| return (AbstractModel) artifactMap.get(PROPER_NOUN_RESOLVER_MODEL_ENTRY_NAME); |
| } |
| |
| public AbstractModel getSimModel() { |
| return (AbstractModel) artifactMap.get(SIM_MODEL_ENTRY_NAME); |
| } |
| |
| public AbstractModel getPluralPronounResolverModel() { |
| return (AbstractModel) artifactMap.get(PLURAL_PRONOUN_RESOLVER_MODEL_ENTRY_NAME); |
| } |
| |
| public static void main(String[] args) throws IOException { |
| |
| if (args.length != 1) { |
| System.err.println("Usage: CorefModel projectDirectory"); |
| System.exit(-1); |
| } |
| |
| String projectDirectory = args[0]; |
| |
| CorefModel model = new CorefModel("en", projectDirectory); |
| model.serialize(new FileOutputStream("coref.model")); |
| } |
| } |