opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingularPronounResolver.java - opennlp-sandbox - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License. You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package opennlp.tools.coref.resolver;

 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.regex.Pattern;

 import opennlp.tools.coref.DiscourseEntity;
 import opennlp.tools.coref.mention.MentionContext;

 /**
  * This class resolver singular pronouns such as "he", "she", "it" and their various forms.
  */
 public class SingularPronounResolver extends MaxentResolver {

   int mode;

   Pattern PronounPattern;

   public SingularPronounResolver(String projectName, ResolverMode m) throws IOException {
     super(projectName, "pmodel", m, 30);
     this.numSentencesBack = 2;
   }

   public SingularPronounResolver(String projectName, ResolverMode m, NonReferentialResolver nonReferentialResolver) throws IOException {
     super(projectName, "pmodel", m, 30,nonReferentialResolver);
     this.numSentencesBack = 2;
   }

   public boolean canResolve(MentionContext mention) {
     //System.err.println("MaxentSingularPronounResolver.canResolve: ec= ("+mention.id+") "+ mention.toText());
     String tag = mention.getHeadTokenTag();
     return (tag != null && tag.startsWith("PRP") && ResolverUtils.singularThirdPersonPronounPattern.matcher(mention.getHeadTokenText()).matches());
   }

   @Override
   protected List<String> getFeatures(MentionContext mention, DiscourseEntity entity) {
     List<String> features = new ArrayList<String>();
     features.addAll(super.getFeatures(mention, entity));
     if (entity != null) { //generate pronoun w/ referent features
       MentionContext cec = entity.getLastExtent();
       //String gen = getPronounGender(pronoun);
       features.addAll(ResolverUtils.getPronounMatchFeatures(mention,entity));
       features.addAll(ResolverUtils.getContextFeatures(cec));
       features.addAll(ResolverUtils.getDistanceFeatures(mention,entity));
       features.add(ResolverUtils.getMentionCountFeature(entity));
       /*
       //lexical features
       Set featureSet = new HashSet();
       for (Iterator ei = entity.getExtents(); ei.hasNext();) {
         MentionContext ec = (MentionContext) ei.next();
         List toks = ec.tokens;
         Parse tok;
         int headIndex = PTBHeadFinder.getInstance().getHeadIndex(toks);
         for (int ti = 0; ti < headIndex; ti++) {
           tok = (Parse) toks.get(ti);
           featureSet.add(gen + "mw=" + tok.toString().toLowerCase());
           featureSet.add(gen + "mt=" + tok.getSyntacticType());
         }
         tok = (Parse) toks.get(headIndex);
         featureSet.add(gen + "hw=" + tok.toString().toLowerCase());
         featureSet.add(gen + "ht=" + tok.getSyntacticType());
         //semantic features
         if (ec.neType != null) {
           featureSet.add(gen + "," + ec.neType);
         }
         else {
           for (Iterator si = ec.synsets.iterator(); si.hasNext();) {
             Integer synset = (Integer) si.next();
             featureSet.add(gen + "," + synset);
           }
         }
       }
       Iterator fset = featureSet.iterator();
       while (fset.hasNext()) {
         String f = (String) fset.next();
         features.add(f);
       }
       */
     }
     return (features);
   }

   @Override
   public boolean excluded(MentionContext mention, DiscourseEntity entity) {
     if (super.excluded(mention, entity)) {
       return (true);
     }
     String mentionGender = null;

     for (Iterator<MentionContext> ei = entity.getMentions(); ei.hasNext();) {
       MentionContext entityMention = ei.next();
       String tag = entityMention.getHeadTokenTag();
       if (tag != null && tag.startsWith("PRP") && ResolverUtils.singularThirdPersonPronounPattern.matcher(mention.getHeadTokenText()).matches()) {
         if (mentionGender == null) { //lazy initialization
           mentionGender = ResolverUtils.getPronounGender(mention.getHeadTokenText());
         }
         String entityGender = ResolverUtils.getPronounGender(entityMention.getHeadTokenText());
         if (!entityGender.equals("u") && !mentionGender.equals(entityGender)) {
           return (true);
         }
       }
     }
     return (false);
   }

   @Override
   protected boolean outOfRange(MentionContext mention, DiscourseEntity entity) {
     MentionContext cec = entity.getLastExtent();
     //System.err.println("MaxentSingularPronounresolve.outOfRange: ["+entity.getLastExtent().toText()+" ("+entity.getId()+")] ["+mention.toText()+" ("+mention.getId()+")] entity.sentenceNumber=("+entity.getLastExtent().getSentenceNumber()+")-mention.sentenceNumber=("+mention.getSentenceNumber()+") > "+numSentencesBack);
     return (mention.getSentenceNumber() - cec.getSentenceNumber() > numSentencesBack);
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package opennlp.tools.coref.resolver;

	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.Iterator;
	import java.util.List;
	import java.util.regex.Pattern;

	import opennlp.tools.coref.DiscourseEntity;
	import opennlp.tools.coref.mention.MentionContext;

	/**
	* This class resolver singular pronouns such as "he", "she", "it" and their various forms.
	*/
	public class SingularPronounResolver extends MaxentResolver {

	int mode;

	Pattern PronounPattern;

	public SingularPronounResolver(String projectName, ResolverMode m) throws IOException {
	super(projectName, "pmodel", m, 30);
	this.numSentencesBack = 2;
	}

	public SingularPronounResolver(String projectName, ResolverMode m, NonReferentialResolver nonReferentialResolver) throws IOException {
	super(projectName, "pmodel", m, 30,nonReferentialResolver);
	this.numSentencesBack = 2;
	}

	public boolean canResolve(MentionContext mention) {
	//System.err.println("MaxentSingularPronounResolver.canResolve: ec= ("+mention.id+") "+ mention.toText());
	String tag = mention.getHeadTokenTag();
	return (tag != null && tag.startsWith("PRP") && ResolverUtils.singularThirdPersonPronounPattern.matcher(mention.getHeadTokenText()).matches());
	}

	@Override
	protected List<String> getFeatures(MentionContext mention, DiscourseEntity entity) {
	List<String> features = new ArrayList<String>();
	features.addAll(super.getFeatures(mention, entity));
	if (entity != null) { //generate pronoun w/ referent features
	MentionContext cec = entity.getLastExtent();
	//String gen = getPronounGender(pronoun);
	features.addAll(ResolverUtils.getPronounMatchFeatures(mention,entity));
	features.addAll(ResolverUtils.getContextFeatures(cec));
	features.addAll(ResolverUtils.getDistanceFeatures(mention,entity));
	features.add(ResolverUtils.getMentionCountFeature(entity));
	/*
	//lexical features
	Set featureSet = new HashSet();
	for (Iterator ei = entity.getExtents(); ei.hasNext();) {
	MentionContext ec = (MentionContext) ei.next();
	List toks = ec.tokens;
	Parse tok;
	int headIndex = PTBHeadFinder.getInstance().getHeadIndex(toks);
	for (int ti = 0; ti < headIndex; ti++) {
	tok = (Parse) toks.get(ti);
	featureSet.add(gen + "mw=" + tok.toString().toLowerCase());
	featureSet.add(gen + "mt=" + tok.getSyntacticType());
	}
	tok = (Parse) toks.get(headIndex);
	featureSet.add(gen + "hw=" + tok.toString().toLowerCase());
	featureSet.add(gen + "ht=" + tok.getSyntacticType());
	//semantic features
	if (ec.neType != null) {
	featureSet.add(gen + "," + ec.neType);
	}
	else {
	for (Iterator si = ec.synsets.iterator(); si.hasNext();) {
	Integer synset = (Integer) si.next();
	featureSet.add(gen + "," + synset);
	}
	}
	}
	Iterator fset = featureSet.iterator();
	while (fset.hasNext()) {
	String f = (String) fset.next();
	features.add(f);
	}
	*/
	}
	return (features);
	}

	@Override
	public boolean excluded(MentionContext mention, DiscourseEntity entity) {
	if (super.excluded(mention, entity)) {
	return (true);
	}
	String mentionGender = null;

	for (Iterator<MentionContext> ei = entity.getMentions(); ei.hasNext();) {
	MentionContext entityMention = ei.next();
	String tag = entityMention.getHeadTokenTag();
	if (tag != null && tag.startsWith("PRP") && ResolverUtils.singularThirdPersonPronounPattern.matcher(mention.getHeadTokenText()).matches()) {
	if (mentionGender == null) { //lazy initialization
	mentionGender = ResolverUtils.getPronounGender(mention.getHeadTokenText());
	}
	String entityGender = ResolverUtils.getPronounGender(entityMention.getHeadTokenText());
	if (!entityGender.equals("u") && !mentionGender.equals(entityGender)) {
	return (true);
	}
	}
	}
	return (false);
	}

	@Override
	protected boolean outOfRange(MentionContext mention, DiscourseEntity entity) {
	MentionContext cec = entity.getLastExtent();
	//System.err.println("MaxentSingularPronounresolve.outOfRange: ["+entity.getLastExtent().toText()+" ("+entity.getId()+")] ["+mention.toText()+" ("+mention.getId()+")] entity.sentenceNumber=("+entity.getLastExtent().getSentenceNumber()+")-mention.sentenceNumber=("+mention.getSentenceNumber()+") > "+numSentencesBack);
	return (mention.getSentenceNumber() - cec.getSentenceNumber() > numSentencesBack);
	}
	}