opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralPronounResolver.java - opennlp-sandbox - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License. You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package opennlp.tools.coref.resolver;

 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;

 import opennlp.tools.coref.DiscourseEntity;
 import opennlp.tools.coref.mention.MentionContext;

 /**
  * Resolves coreference between plural pronouns and their referents.
  */
 public class PluralPronounResolver extends MaxentResolver {

   int NUM_SENTS_BACK_PRONOUNS = 2;

   public PluralPronounResolver(String projectName, ResolverMode m) throws IOException {
     super(projectName, "tmodel", m, 30);
   }

   public PluralPronounResolver(String projectName, ResolverMode m,NonReferentialResolver nrr)
       throws IOException {
     super(projectName, "tmodel", m, 30,nrr);
   }

   @Override
   protected List<String> getFeatures(MentionContext mention, DiscourseEntity entity) {
     List<String> features = new ArrayList<String>();
     features.addAll(super.getFeatures(mention,entity));
     //features.add("eid="+pc.id);
     if (entity != null) { //generate pronoun w/ referent features
       features.addAll(ResolverUtils.getPronounMatchFeatures(mention,entity));
       MentionContext cec = entity.getLastExtent();
       features.addAll(ResolverUtils.getDistanceFeatures(mention,entity));
       features.addAll(ResolverUtils.getContextFeatures(cec));
       features.add(ResolverUtils.getMentionCountFeature(entity));
       /*
       //lexical features
       Set featureSet = new HashSet();
       for (Iterator ei = entity.getExtents(); ei.hasNext();) {
         MentionContext ec = (MentionContext) ei.next();
         int headIndex = PTBHeadFinder.getInstance().getHeadIndex(ec.tokens);
         Parse tok = (Parse) ec.tokens.get(headIndex);
         featureSet.add("hw=" + tok.toString().toLowerCase());
         if (ec.parse.isCoordinatedNounPhrase()) {
           featureSet.add("ht=CC");
         }
         else {
           featureSet.add("ht=" + tok.getSyntacticType());
         }
         if (ec.neType != null){
           featureSet.add("ne="+ec.neType);
         }
       }
       Iterator fset = featureSet.iterator();
       while (fset.hasNext()) {
         String f = (String) fset.next();
         features.add(f);
       }
       */
     }
     return (features);
   }

   @Override
   protected boolean outOfRange(MentionContext mention, DiscourseEntity entity) {
     MentionContext cec = entity.getLastExtent();
     //System.err.println("MaxentPluralPronounResolver.outOfRange: ["+ec.toText()+" ("+ec.id+")]
     // ["+cec.toText()+" ("+cec.id+")] ec.sentenceNumber=("+ec.sentenceNumber+")-cec.sentenceNumber
     // =("+cec.sentenceNumber+") > "+NUM_SENTS_BACK_PRONOUNS);
     return (mention.getSentenceNumber() - cec.getSentenceNumber() > NUM_SENTS_BACK_PRONOUNS);
   }

   public boolean canResolve(MentionContext mention) {
     String tag = mention.getHeadTokenTag();
     return (tag != null && tag.startsWith("PRP")
         && ResolverUtils.pluralThirdPersonPronounPattern.matcher(mention.getHeadTokenText()).matches());
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package opennlp.tools.coref.resolver;

	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.List;

	import opennlp.tools.coref.DiscourseEntity;
	import opennlp.tools.coref.mention.MentionContext;

	/**
	* Resolves coreference between plural pronouns and their referents.
	*/
	public class PluralPronounResolver extends MaxentResolver {

	int NUM_SENTS_BACK_PRONOUNS = 2;

	public PluralPronounResolver(String projectName, ResolverMode m) throws IOException {
	super(projectName, "tmodel", m, 30);
	}

	public PluralPronounResolver(String projectName, ResolverMode m,NonReferentialResolver nrr)
	throws IOException {
	super(projectName, "tmodel", m, 30,nrr);
	}

	@Override
	protected List<String> getFeatures(MentionContext mention, DiscourseEntity entity) {
	List<String> features = new ArrayList<String>();
	features.addAll(super.getFeatures(mention,entity));
	//features.add("eid="+pc.id);
	if (entity != null) { //generate pronoun w/ referent features
	features.addAll(ResolverUtils.getPronounMatchFeatures(mention,entity));
	MentionContext cec = entity.getLastExtent();
	features.addAll(ResolverUtils.getDistanceFeatures(mention,entity));
	features.addAll(ResolverUtils.getContextFeatures(cec));
	features.add(ResolverUtils.getMentionCountFeature(entity));
	/*
	//lexical features
	Set featureSet = new HashSet();
	for (Iterator ei = entity.getExtents(); ei.hasNext();) {
	MentionContext ec = (MentionContext) ei.next();
	int headIndex = PTBHeadFinder.getInstance().getHeadIndex(ec.tokens);
	Parse tok = (Parse) ec.tokens.get(headIndex);
	featureSet.add("hw=" + tok.toString().toLowerCase());
	if (ec.parse.isCoordinatedNounPhrase()) {
	featureSet.add("ht=CC");
	}
	else {
	featureSet.add("ht=" + tok.getSyntacticType());
	}
	if (ec.neType != null){
	featureSet.add("ne="+ec.neType);
	}
	}
	Iterator fset = featureSet.iterator();
	while (fset.hasNext()) {
	String f = (String) fset.next();
	features.add(f);
	}
	*/
	}
	return (features);
	}

	@Override
	protected boolean outOfRange(MentionContext mention, DiscourseEntity entity) {
	MentionContext cec = entity.getLastExtent();
	//System.err.println("MaxentPluralPronounResolver.outOfRange: ["+ec.toText()+" ("+ec.id+")]
	// ["+cec.toText()+" ("+cec.id+")] ec.sentenceNumber=("+ec.sentenceNumber+")-cec.sentenceNumber
	// =("+cec.sentenceNumber+") > "+NUM_SENTS_BACK_PRONOUNS);
	return (mention.getSentenceNumber() - cec.getSentenceNumber() > NUM_SENTS_BACK_PRONOUNS);
	}

	public boolean canResolve(MentionContext mention) {
	String tag = mention.getHeadTokenTag();
	return (tag != null && tag.startsWith("PRP")
	&& ResolverUtils.pluralThirdPersonPronounPattern.matcher(mention.getHeadTokenText()).matches());
	}
	}