opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SpeechPronounResolver.java - opennlp-sandbox - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License. You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package opennlp.tools.coref.resolver;

 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;

 import opennlp.tools.coref.DiscourseEntity;
 import opennlp.tools.coref.mention.MentionContext;

 /**
  * Resolves pronouns specific to quoted speech such as "you", "me", and "I".
  */
 public class SpeechPronounResolver extends MaxentResolver {

   public SpeechPronounResolver(String projectName, ResolverMode m) throws IOException {
     super(projectName,"fmodel", m, 30);
     this.numSentencesBack = 0;
     showExclusions = false;
     preferFirstReferent = true;
   }

   public SpeechPronounResolver(String projectName, ResolverMode m, NonReferentialResolver nrr) throws IOException {
     super(projectName,"fmodel", m, 30,nrr);
     showExclusions = false;
     preferFirstReferent = true;
   }


   @Override
   protected List<String> getFeatures(MentionContext mention, DiscourseEntity entity) {
     List<String> features = new ArrayList<String>();
     features.addAll(super.getFeatures(mention, entity));
     if (entity != null) {
       features.addAll(ResolverUtils.getPronounMatchFeatures(mention,entity));
       List<String> contexts = ResolverUtils.getContextFeatures(mention);
       MentionContext cec = entity.getLastExtent();
       if (mention.getHeadTokenTag().startsWith("PRP") && cec.getHeadTokenTag().startsWith("PRP")) {
         features.add(mention.getHeadTokenText() + "," + cec.getHeadTokenText());
       }
       else if (mention.getHeadTokenText().startsWith("NNP")) {
         for (int ci = 0, cl = contexts.size(); ci < cl; ci++) {
           features.add(contexts.get(ci));
         }
         features.add(mention.getNameType() + "," + cec.getHeadTokenText());
       }
       else {
         List<String> ccontexts = ResolverUtils.getContextFeatures(cec);
         for (int ci = 0, cl = ccontexts.size(); ci < cl; ci++) {
           features.add(ccontexts.get(ci));
         }
         features.add(cec.getNameType() + "," + mention.getHeadTokenText());
       }
     }
     return (features);
   }

   @Override
   protected boolean outOfRange(MentionContext mention, DiscourseEntity entity) {
     MentionContext cec = entity.getLastExtent();
     return (mention.getSentenceNumber() - cec.getSentenceNumber() > numSentencesBack);
   }

   public boolean canResolve(MentionContext mention) {
     String tag = mention.getHeadTokenTag();
     boolean fpp = tag != null && tag.startsWith("PRP") && ResolverUtils.speechPronounPattern.matcher(mention.getHeadTokenText()).matches();
     boolean pn = tag != null && tag.startsWith("NNP");
     return (fpp || pn);
   }

   @Override
   protected boolean excluded(MentionContext mention, DiscourseEntity entity) {
     if (super.excluded(mention, entity)) {
       return true;
     }
     MentionContext cec = entity.getLastExtent();
     if (!canResolve(cec)) {
       return true;
     }
     if (mention.getHeadTokenTag().startsWith("NNP")) { //mention is a propernoun
       if (cec.getHeadTokenTag().startsWith("NNP")) {
         return true; // both NNP
       }
       else {
         if (entity.getNumMentions() > 1) {
           return true;
         }
         return !canResolve(cec);
       }
     }
     else if (mention.getHeadTokenTag().startsWith("PRP")){ // mention is a speech pronoun
       // cec can be either a speech pronoun or a propernoun
       if (cec.getHeadTokenTag().startsWith("NNP")) {
         //exclude antecedents not in the same sentence when they are not pronoun
         return (mention.getSentenceNumber() - cec.getSentenceNumber() != 0);
       }
       else if (cec.getHeadTokenTag().startsWith("PRP")){
         return false;
       }
       else {
         System.err.println("Unexpected candidate exluded: "+cec.toText());
         return true;
       }
     }
     else {
       System.err.println("Unexpected mention exluded: "+mention.toText());
       return true;
     }
   }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package opennlp.tools.coref.resolver;

	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.List;

	import opennlp.tools.coref.DiscourseEntity;
	import opennlp.tools.coref.mention.MentionContext;

	/**
	* Resolves pronouns specific to quoted speech such as "you", "me", and "I".
	*/
	public class SpeechPronounResolver extends MaxentResolver {

	public SpeechPronounResolver(String projectName, ResolverMode m) throws IOException {
	super(projectName,"fmodel", m, 30);
	this.numSentencesBack = 0;
	showExclusions = false;
	preferFirstReferent = true;
	}

	public SpeechPronounResolver(String projectName, ResolverMode m, NonReferentialResolver nrr) throws IOException {
	super(projectName,"fmodel", m, 30,nrr);
	showExclusions = false;
	preferFirstReferent = true;
	}


	@Override
	protected List<String> getFeatures(MentionContext mention, DiscourseEntity entity) {
	List<String> features = new ArrayList<String>();
	features.addAll(super.getFeatures(mention, entity));
	if (entity != null) {
	features.addAll(ResolverUtils.getPronounMatchFeatures(mention,entity));
	List<String> contexts = ResolverUtils.getContextFeatures(mention);
	MentionContext cec = entity.getLastExtent();
	if (mention.getHeadTokenTag().startsWith("PRP") && cec.getHeadTokenTag().startsWith("PRP")) {
	features.add(mention.getHeadTokenText() + "," + cec.getHeadTokenText());
	}
	else if (mention.getHeadTokenText().startsWith("NNP")) {
	for (int ci = 0, cl = contexts.size(); ci < cl; ci++) {
	features.add(contexts.get(ci));
	}
	features.add(mention.getNameType() + "," + cec.getHeadTokenText());
	}
	else {
	List<String> ccontexts = ResolverUtils.getContextFeatures(cec);
	for (int ci = 0, cl = ccontexts.size(); ci < cl; ci++) {
	features.add(ccontexts.get(ci));
	}
	features.add(cec.getNameType() + "," + mention.getHeadTokenText());
	}
	}
	return (features);
	}

	@Override
	protected boolean outOfRange(MentionContext mention, DiscourseEntity entity) {
	MentionContext cec = entity.getLastExtent();
	return (mention.getSentenceNumber() - cec.getSentenceNumber() > numSentencesBack);
	}

	public boolean canResolve(MentionContext mention) {
	String tag = mention.getHeadTokenTag();
	boolean fpp = tag != null && tag.startsWith("PRP") && ResolverUtils.speechPronounPattern.matcher(mention.getHeadTokenText()).matches();
	boolean pn = tag != null && tag.startsWith("NNP");
	return (fpp \|\| pn);
	}

	@Override
	protected boolean excluded(MentionContext mention, DiscourseEntity entity) {
	if (super.excluded(mention, entity)) {
	return true;
	}
	MentionContext cec = entity.getLastExtent();
	if (!canResolve(cec)) {
	return true;
	}
	if (mention.getHeadTokenTag().startsWith("NNP")) { //mention is a propernoun
	if (cec.getHeadTokenTag().startsWith("NNP")) {
	return true; // both NNP
	}
	else {
	if (entity.getNumMentions() > 1) {
	return true;
	}
	return !canResolve(cec);
	}
	}
	else if (mention.getHeadTokenTag().startsWith("PRP")){ // mention is a speech pronoun
	// cec can be either a speech pronoun or a propernoun
	if (cec.getHeadTokenTag().startsWith("NNP")) {
	//exclude antecedents not in the same sentence when they are not pronoun
	return (mention.getSentenceNumber() - cec.getSentenceNumber() != 0);
	}
	else if (cec.getHeadTokenTag().startsWith("PRP")){
	return false;
	}
	else {
	System.err.println("Unexpected candidate exluded: "+cec.toText());
	return true;
	}
	}
	else {
	System.err.println("Unexpected mention exluded: "+mention.toText());
	return true;
	}
	}

	}