blob: 38af1f7a47d27f4198fbfeee30e4f175b611a20b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.coref;
import java.io.IOException;
import opennlp.tools.coref.mention.HeadFinder;
import opennlp.tools.coref.mention.Mention;
import opennlp.tools.coref.mention.MentionContext;
import opennlp.tools.coref.mention.MentionFinder;
import opennlp.tools.coref.mention.Parse;
import opennlp.tools.coref.resolver.AbstractResolver;
import opennlp.tools.coref.sim.Gender;
import opennlp.tools.coref.sim.Number;
/**
* Provides a default implementation of many of the methods in {@link Linker} that
* most implementations of {@link Linker} will want to extend.
*/
public abstract class AbstractLinker implements Linker {
/** The mention finder used to find mentions. */
protected MentionFinder mentionFinder;
/** Specifies whether debug print is generated. */
protected boolean debug = true;
/** The mode in which this linker is running. */
protected LinkerMode mode;
/** Instance used for for returning the same linker for subsequent getInstance requests. */
protected static Linker linker;
/** The resolvers used by this Linker. */
protected AbstractResolver[] resolvers;
/** The names of the resolvers used by this Linker. */
protected String[] resolverNames;
/** Array used to store the results of each call made to the linker. */
protected DiscourseEntity[] entities;
/** The index of resolver which is used for singular pronouns. */
protected int SINGULAR_PRONOUN;
/** The name of the project where the coreference models are stored. */
protected String corefProject;
/** The head finder used in this linker. */
protected HeadFinder headFinder;
/** Specifies whether coreferent mentions should be combined into a single entity.
* Set this to true to combine them, false otherwise. */
protected boolean useDiscourseModel;
/** Specifies whether mentions for which no resolver can be used should be added to the
* discourse model.
*/
protected boolean removeUnresolvedMentions;
/**
* Creates a new linker using the models in the specified project directory and using the specified mode.
* @param project The location of the models or other data needed by this linker.
* @param mode The mode the linker should be run in: testing, training, or evaluation.
*/
public AbstractLinker(String project, LinkerMode mode) {
this(project,mode,true);
}
/**
* Creates a new linker using the models in the specified project directory, using the specified mode,
* and combining coreferent entities based on the specified value.
* @param project The location of the models or other data needed by this linker.
* @param mode The mode the linker should be run in: testing, training, or evaluation.
* @param useDiscourseModel Specifies whether coreferent mention should be combined or not.
*/
public AbstractLinker(String project, LinkerMode mode,boolean useDiscourseModel) {
this.corefProject = project;
this.mode = mode;
SINGULAR_PRONOUN = -1;
this.useDiscourseModel = useDiscourseModel;
removeUnresolvedMentions = true;
}
/**
* Resolves the specified mention to an entity in the specified discourse model
* or creates a new entity for the mention.
*
* @param mention The mention to resolve.
* @param discourseModel The discourse model of existing entities.
*/
protected void resolve(MentionContext mention, DiscourseModel discourseModel) {
//System.err.println("AbstractLinker.resolve: "+mode+"("+econtext.id+") "+econtext.toText());
boolean validEntity = true; // true if we should add this entity to the dm
boolean canResolve = false;
for (int ri = 0; ri < resolvers.length; ri++) {
if (resolvers[ri].canResolve(mention)) {
if (mode == LinkerMode.TEST) {
entities[ri] = resolvers[ri].resolve(mention, discourseModel);
canResolve = true;
}
else if (mode == LinkerMode.TRAIN) {
entities[ri] = resolvers[ri].retain(mention, discourseModel);
if (ri + 1 != resolvers.length) {
canResolve = true;
}
}
else if (mode == LinkerMode.EVAL) {
entities[ri] = resolvers[ri].retain(mention, discourseModel);
//DiscourseEntity rde = resolvers[ri].resolve(mention, discourseModel);
//eval.update(rde == entities[ri], ri, entities[ri], rde);
}
else {
System.err.println("AbstractLinker.Unknown mode: " + mode);
}
if (ri == SINGULAR_PRONOUN && entities[ri] == null) {
validEntity = false;
}
}
else {
entities[ri] = null;
}
}
if (!canResolve && removeUnresolvedMentions) {
//System.err.println("No resolver for: "+econtext.toText()
// + " head="+econtext.headTokenText+" "+econtext.headTokenTag);
validEntity = false;
}
DiscourseEntity de = checkForMerges(discourseModel, entities);
if (validEntity) {
updateExtent(discourseModel, mention, de,useDiscourseModel);
}
}
public HeadFinder getHeadFinder() {
return headFinder;
}
/**
* Updates the specified discourse model with the specified mention as coreferent with the specified entity.
* @param dm The discourse model
* @param mention The mention to be added to the specified entity.
* @param entity The entity which is mentioned by the specified mention.
* @param useDiscourseModel Whether the mentions should be kept as an entiy or simply co-indexed.
*/
protected void updateExtent(DiscourseModel dm, MentionContext mention, DiscourseEntity entity,
boolean useDiscourseModel) {
if (useDiscourseModel) {
if (entity != null) {
//System.err.println("AbstractLinker.updateExtent: addingExtent:
// "+econtext.toText());
if (entity.getGenderProbability() < mention.getGenderProb()) {
entity.setGender(mention.getGender());
entity.setGenderProbability(mention.getGenderProb());
}
if (entity.getNumberProbability() < mention.getNumberProb()) {
entity.setNumber(mention.getNumber());
entity.setNumberProbability(mention.getNumberProb());
}
entity.addMention(mention);
dm.mentionEntity(entity);
}
else {
//System.err.println("AbstractLinker.updateExtent: creatingExtent:
// "+econtext.toText()+" "+econtext.gender+" "+econtext.number);
entity = new DiscourseEntity(mention, mention.getGender(), mention.getGenderProb(),
mention.getNumber(), mention.getNumberProb());
dm.addEntity(entity);
}
}
else {
if (entity != null) {
DiscourseEntity newEntity = new DiscourseEntity(mention, mention.getGender(),
mention.getGenderProb(), mention.getNumber(), mention.getNumberProb());
dm.addEntity(newEntity);
newEntity.setId(entity.getId());
}
else {
DiscourseEntity newEntity = new DiscourseEntity(mention, mention.getGender(),
mention.getGenderProb(), mention.getNumber(), mention.getNumberProb());
dm.addEntity(newEntity);
}
}
//System.err.println(de1);
}
protected DiscourseEntity checkForMerges(DiscourseModel dm, DiscourseEntity[] des) {
DiscourseEntity de1; //tempory variable
DiscourseEntity de2; //tempory variable
de1 = des[0];
for (int di = 1; di < des.length; di++) {
de2 = des[di];
if (de2 != null) {
if (de1 != null && de1 != de2) {
dm.mergeEntities(de1, de2, 1);
}
else {
de1 = de2;
}
}
}
return (de1);
}
public DiscourseEntity[] getEntities(Mention[] mentions) {
MentionContext[] extentContexts = this.constructMentionContexts(mentions);
DiscourseModel dm = new DiscourseModel();
for (int ei = 0; ei < extentContexts.length; ei++) {
//System.err.println(ei+" "+extentContexts[ei].toText());
resolve(extentContexts[ei], dm);
}
return (dm.getEntities());
}
public void setEntities(Mention[] mentions) {
getEntities(mentions);
}
public void train() throws IOException {
for (int ri = 0; ri < resolvers.length; ri++) {
resolvers[ri].train();
}
}
public MentionFinder getMentionFinder() {
return mentionFinder;
}
public MentionContext[] constructMentionContexts(Mention[] mentions) {
int mentionInSentenceIndex = -1;
int numMentionsInSentence = -1;
int prevSentenceIndex = -1;
MentionContext[] contexts = new MentionContext[mentions.length];
for (int mi = 0,mn = mentions.length;mi < mn; mi++) {
Parse mentionParse = mentions[mi].getParse();
//System.err.println("AbstractLinker.constructMentionContexts: mentionParse="+mentionParse);
if (mentionParse == null) {
System.err.println("no parse for " + mentions[mi]);
}
int sentenceIndex = mentionParse.getSentenceNumber();
if (sentenceIndex != prevSentenceIndex) {
mentionInSentenceIndex = 0;
prevSentenceIndex = sentenceIndex;
numMentionsInSentence = 0;
for (int msi = mi; msi < mentions.length; msi++) {
if (sentenceIndex != mentions[msi].getParse().getSentenceNumber()) {
break;
}
numMentionsInSentence++;
}
}
contexts[mi] = new MentionContext(mentions[mi], mentionInSentenceIndex,
numMentionsInSentence, mi, sentenceIndex, getHeadFinder());
//System.err.println("AbstractLinker.constructMentionContexts: mi="+mi
// +" sn="+mentionParse.getSentenceNumber()+" extent="+mentions[mi]+" parse="
// +mentionParse.getSpan()+" mc="+contexts[mi].toText());
contexts[mi].setId(mentions[mi].getId());
mentionInSentenceIndex++;
if (mode != LinkerMode.SIM) {
Gender g = computeGender(contexts[mi]);
contexts[mi].setGender(g.getType(),g.getConfidence());
Number n = computeNumber(contexts[mi]);
contexts[mi].setNumber(n.getType(),n.getConfidence());
}
}
return (contexts);
}
protected abstract Gender computeGender(MentionContext mention);
protected abstract Number computeNumber(MentionContext mention);
}