blob: 85c8c594302c404712dd68df0e24607ff5de5ff0 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.coref.resolver;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import opennlp.tools.coref.DiscourseEntity;
import opennlp.tools.coref.mention.MentionContext;
/**
* Resolves coreference between plural pronouns and their referents.
*/
public class PluralPronounResolver extends MaxentResolver {
int NUM_SENTS_BACK_PRONOUNS = 2;
public PluralPronounResolver(String projectName, ResolverMode m) throws IOException {
super(projectName, "tmodel", m, 30);
}
public PluralPronounResolver(String projectName, ResolverMode m,NonReferentialResolver nrr) throws IOException {
super(projectName, "tmodel", m, 30,nrr);
}
@Override
protected List<String> getFeatures(MentionContext mention, DiscourseEntity entity) {
List<String> features = new ArrayList<String>();
features.addAll(super.getFeatures(mention,entity));
//features.add("eid="+pc.id);
if (entity != null) { //generate pronoun w/ referent features
features.addAll(ResolverUtils.getPronounMatchFeatures(mention,entity));
MentionContext cec = entity.getLastExtent();
features.addAll(ResolverUtils.getDistanceFeatures(mention,entity));
features.addAll(ResolverUtils.getContextFeatures(cec));
features.add(ResolverUtils.getMentionCountFeature(entity));
/*
//lexical features
Set featureSet = new HashSet();
for (Iterator ei = entity.getExtents(); ei.hasNext();) {
MentionContext ec = (MentionContext) ei.next();
int headIndex = PTBHeadFinder.getInstance().getHeadIndex(ec.tokens);
Parse tok = (Parse) ec.tokens.get(headIndex);
featureSet.add("hw=" + tok.toString().toLowerCase());
if (ec.parse.isCoordinatedNounPhrase()) {
featureSet.add("ht=CC");
}
else {
featureSet.add("ht=" + tok.getSyntacticType());
}
if (ec.neType != null){
featureSet.add("ne="+ec.neType);
}
}
Iterator fset = featureSet.iterator();
while (fset.hasNext()) {
String f = (String) fset.next();
features.add(f);
}
*/
}
return (features);
}
@Override
protected boolean outOfRange(MentionContext mention, DiscourseEntity entity) {
MentionContext cec = entity.getLastExtent();
//System.err.println("MaxentPluralPronounResolver.outOfRange: ["+ec.toText()+" ("+ec.id+")] ["+cec.toText()+" ("+cec.id+")] ec.sentenceNumber=("+ec.sentenceNumber+")-cec.sentenceNumber=("+cec.sentenceNumber+") > "+NUM_SENTS_BACK_PRONOUNS);
return (mention.getSentenceNumber() - cec.getSentenceNumber() > NUM_SENTS_BACK_PRONOUNS);
}
public boolean canResolve(MentionContext mention) {
String tag = mention.getHeadTokenTag();
return (tag != null && tag.startsWith("PRP") && ResolverUtils.pluralThirdPersonPronounPattern.matcher(mention.getHeadTokenText()).matches());
}
}