blob: 166b8ddd024e233b89ea6250d073335c65e47a28 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.coref.resolver;
import java.io.IOException;
import opennlp.tools.coref.DiscourseEntity;
import opennlp.tools.coref.DiscourseModel;
import opennlp.tools.coref.mention.MentionContext;
import opennlp.tools.coref.mention.Parse;
import opennlp.tools.util.CountedSet;
/**
* Default implementation of some methods in the {@link Resolver} interface.
*/
public abstract class AbstractResolver implements Resolver {
/**
* The number of previous entities that resolver should consider.
*/
protected int numEntitiesBack;
/**
* Debugging variable which specifies whether error output is generated
* if a class excludes as possibly coreferent mentions which are in-fact
* coreferent.
*/
protected boolean showExclusions;
/**
* Debugging variable which holds statistics about mention distances
* during training.
*/
protected CountedSet<Integer> distances;
/**
* The number of sentences back this resolver should look for a referent.
*/
protected int numSentencesBack;
public AbstractResolver(int neb) {
numEntitiesBack=neb;
showExclusions = true;
distances = new CountedSet<Integer>();
}
/**
* Returns the number of previous entities that resolver should consider.
*
* @return the number of previous entities that resolver should consider.
*/
protected int getNumEntities() {
return numEntitiesBack;
}
/**
* Specifies the number of sentences back this resolver should look for a referent.
*
* @param nsb the number of sentences back this resolver should look for a referent.
*/
public void setNumberSentencesBack(int nsb) {
numSentencesBack = nsb;
}
/**
* The number of entities that should be considered for resolution with the specified discourse model.
*
* @param dm The discourse model.
*
* @return number of entities that should be considered for resolution.
*/
protected int getNumEntities(DiscourseModel dm) {
return Math.min(dm.getNumEntities(),numEntitiesBack);
}
/**
* Returns the head parse for the specified mention.
*
* @param mention The mention.
*
* @return the head parse for the specified mention.
*/
protected Parse getHead(MentionContext mention) {
return mention.getHeadTokenParse();
}
/**
* Returns the index for the head word for the specified mention.
*
* @param mention The mention.
*
* @return the index for the head word for the specified mention.
*/
protected int getHeadIndex(MentionContext mention) {
Parse[] mtokens = mention.getTokenParses();
for (int ti=mtokens.length-1;ti>=0;ti--) {
Parse tok = mtokens[ti];
if (!tok.getSyntacticType().equals("POS") && !tok.getSyntacticType().equals(",") &&
!tok.getSyntacticType().equals(".")) {
return ti;
}
}
return mtokens.length-1;
}
/**
* Returns the text of the head word for the specified mention.
*
* @param mention The mention.
*
* @return The text of the head word for the specified mention.
*/
protected String getHeadString(MentionContext mention) {
return mention.getHeadTokenText().toLowerCase();
}
/**
* Determines if the specified entity is too far from the specified mention to be resolved to it.
* Once an entity has been determined to be out of range subsequent entities are not considered.
* To skip intermediate entities @see excluded.
*
* @param mention The mention which is being considered.
* @param entity The entity to which the mention is to be resolved.
*
* @return true is the entity is in range of the mention, false otherwise.
*/
protected boolean outOfRange(MentionContext mention, DiscourseEntity entity) {
return false;
}
/**
* Excludes entities which you are not compatible with the entity under consideration. The default
* implementation excludes entities whose last extent contains the extent under consideration.
* This prevents possessive pronouns from referring to the noun phrases they modify and other
* undesirable things.
*
* @param mention The mention which is being considered as referential.
* @param entity The entity to which the mention is to be resolved.
*
* @return true if the entity should be excluded, false otherwise.
*/
protected boolean excluded(MentionContext mention, DiscourseEntity entity) {
MentionContext cec = entity.getLastExtent();
return mention.getSentenceNumber() == cec.getSentenceNumber() &&
mention.getIndexSpan().getEnd() <= cec.getIndexSpan().getEnd();
}
public DiscourseEntity retain(MentionContext mention, DiscourseModel dm) {
int ei = 0;
if (mention.getId() == -1) {
return null;
}
for (; ei < dm.getNumEntities(); ei++) {
DiscourseEntity cde = dm.getEntity(ei);
MentionContext cec = cde.getLastExtent(); // candidate extent context
if (cec.getId() == mention.getId()) {
distances.add(ei);
return cde;
}
}
//System.err.println("AbstractResolver.retain: non-refering entity with id: "+ec.toText()+" id="+ec.id);
return null;
}
/**
* Returns the string of "_" delimited tokens for the specified mention.
*
* @param mention The mention.
*
* @return the string of "_" delimited tokens for the specified mention.
*/
protected String featureString(MentionContext mention){
StringBuilder fs = new StringBuilder();
Object[] mtokens =mention.getTokens();
fs.append(mtokens[0].toString());
for (int ti=1,tl=mtokens.length;ti<tl;ti++) {
fs.append("_").append(mtokens[ti].toString());
}
return fs.toString();
}
public void train() throws IOException {};
}