| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package opennlp.tools.coref.mention; |
| |
| import java.util.List; |
| |
| import opennlp.tools.coref.sim.Context; |
| import opennlp.tools.coref.sim.GenderEnum; |
| import opennlp.tools.coref.sim.NumberEnum; |
| import opennlp.tools.util.Span; |
| |
| /** |
| * Data structure representation of a mention with additional contextual information. |
| * The contextual information is used in performing coreference resolution. |
| */ |
| public class MentionContext extends Context { |
| |
| /** |
| * The index of first token which is not part of a descriptor. This is 0 if no descriptor is present. |
| */ |
| private int nonDescriptorStart; |
| |
| /** |
| * The Parse of the head constituent of this mention. |
| */ |
| private Parse head; |
| |
| /** |
| * Sentence-token-based span whose end is the last token of the mention. |
| */ |
| private Span indexSpan; |
| |
| /** |
| * Position of the NP in the sentence. |
| */ |
| private int nounLocation; |
| |
| /** |
| * Position of the NP in the document. |
| */ |
| private int nounNumber; |
| |
| /** |
| * Number of noun phrases in the sentence which contains this mention. |
| */ |
| private int maxNounLocation; |
| |
| /** |
| * Index of the sentence in the document which contains this mention. |
| */ |
| private int sentenceNumber; |
| |
| /** |
| * The token preceding this mention's maximal noun phrase. |
| */ |
| private Parse prevToken; |
| |
| /** |
| * The token following this mention's maximal noun phrase. |
| */ |
| private Parse nextToken; |
| |
| /** |
| * The token following this mention's basal noun phrase. |
| */ |
| private Parse basalNextToken; |
| |
| /** |
| * The parse of the mention's head word. |
| */ |
| private Parse headToken; |
| |
| /** |
| * The parse of the first word in the mention. |
| */ |
| private Parse firstToken; |
| |
| /** |
| * The text of the first word in the mention. |
| */ |
| private String firstTokenText; |
| |
| /** |
| * The pos-tag of the first word in the mention. |
| */ |
| private String firstTokenTag; |
| |
| /** |
| * The gender assigned to this mention. |
| */ |
| private GenderEnum gender; |
| |
| /** |
| * The probability associated with the gender assignment. |
| */ |
| private double genderProb; |
| |
| /** |
| * The number assigned to this mention. |
| */ |
| private NumberEnum number; |
| |
| /** |
| * The probability associated with the number assignment. |
| */ |
| private double numberProb; |
| |
| public MentionContext(Span span, Span headSpan, int entityId, Parse parse, String extentType, |
| String nameType, int mentionIndex, int mentionsInSentence, |
| int mentionIndexInDocument, int sentenceIndex, HeadFinder headFinder) { |
| super(span,headSpan,entityId,parse,extentType,nameType,headFinder); |
| nounLocation = mentionIndex; |
| maxNounLocation = mentionsInSentence; |
| nounNumber = mentionIndexInDocument; |
| sentenceNumber = sentenceIndex; |
| indexSpan = parse.getSpan(); |
| prevToken = parse.getPreviousToken(); |
| nextToken = parse.getNextToken(); |
| head = headFinder.getLastHead(parse); |
| List<Parse> headTokens = head.getTokens(); |
| tokens = headTokens.toArray(new Parse[headTokens.size()]); |
| basalNextToken = head.getNextToken(); |
| //System.err.println("MentionContext.init: "+ent+" "+ent.getEntityId()+" head="+head); |
| nonDescriptorStart = 0; |
| initHeads(headFinder.getHeadIndex(head)); |
| gender = GenderEnum.UNKNOWN; |
| this.genderProb = 0d; |
| number = NumberEnum.UNKNOWN; |
| this.numberProb = 0d; |
| } |
| /** |
| * Constructs context information for the specified mention. |
| * |
| * @param mention The mention object on which this object is based. |
| * @param mentionIndexInSentence The mention's position in the sentence. |
| * @param mentionsInSentence The number of mentions in the sentence. |
| * @param mentionIndexInDocument The index of this mention with respect to the document. |
| * @param sentenceIndex The index of the sentence which contains this mention. |
| * @param headFinder An object which provides head information. |
| */ |
| public MentionContext(Mention mention, int mentionIndexInSentence, int mentionsInSentence, |
| int mentionIndexInDocument, int sentenceIndex, HeadFinder headFinder) { |
| this(mention.getSpan(), mention.getHeadSpan(), mention.getId(), mention.getParse(), |
| mention.type, mention.nameType, mentionIndexInSentence, mentionsInSentence, |
| mentionIndexInDocument, sentenceIndex, headFinder); |
| } |
| |
| |
| /** |
| * Constructs context information for the specified mention. |
| * |
| * @param mentionParse Mention parse structure for which context is to be constructed. |
| * @param mentionIndex mention position in sentence. |
| * @param mentionsInSentence Number of mentions in the sentence. |
| * @param mentionsInDocument Number of mentions in the document. |
| * @param sentenceIndex Sentence number for this mention. |
| * @param nameType The named-entity type for this mention. |
| * @param headFinder Object which provides head information. |
| */ |
| /* |
| public MentionContext(Parse mentionParse, int mentionIndex, int mentionsInSentence, |
| int mentionsInDocument, int sentenceIndex, String nameType, HeadFinder headFinder) { |
| nounLocation = mentionIndex; |
| maxNounLocation = mentionsInDocument; |
| sentenceNumber = sentenceIndex; |
| parse = mentionParse; |
| indexSpan = mentionParse.getSpan(); |
| prevToken = mentionParse.getPreviousToken(); |
| nextToken = mentionParse.getNextToken(); |
| head = headFinder.getLastHead(mentionParse); |
| List headTokens = head.getTokens(); |
| tokens = (Parse[]) headTokens.toArray(new Parse[headTokens.size()]); |
| basalNextToken = head.getNextToken(); |
| //System.err.println("MentionContext.init: "+ent+" "+ent.getEntityId()+" head="+head); |
| indexHeadSpan = head.getSpan(); |
| nonDescriptorStart = 0; |
| initHeads(headFinder.getHeadIndex(head)); |
| this.neType= nameType; |
| if (getHeadTokenTag().startsWith("NN") && !getHeadTokenTag().startsWith("NNP")) { |
| //if (headTokenTag.startsWith("NNP") && neType != null) { |
| this.synsets = getSynsetSet(this); |
| } |
| else { |
| this.synsets=Collections.EMPTY_SET; |
| } |
| gender = GenderEnum.UNKNOWN; |
| this.genderProb = 0d; |
| number = NumberEnum.UNKNOWN; |
| this.numberProb = 0d; |
| } |
| */ |
| |
| private void initHeads(int headIndex) { |
| this.headTokenIndex = headIndex; |
| this.headToken = (Parse) tokens[getHeadTokenIndex()]; |
| this.headTokenText = headToken.toString(); |
| this.headTokenTag = headToken.getSyntacticType(); |
| this.firstToken = (Parse) tokens[0]; |
| this.firstTokenTag = firstToken.getSyntacticType(); |
| this.firstTokenText = firstToken.toString(); |
| } |
| |
| /** |
| * Returns the parse of the head token for this mention. |
| * |
| * @return the parse of the head token for this mention. |
| */ |
| public Parse getHeadTokenParse() { |
| return headToken; |
| } |
| |
| public String getHeadText() { |
| StringBuilder headText = new StringBuilder(); |
| for (int hsi = 0; hsi < tokens.length; hsi++) { |
| headText.append(" ").append(tokens[hsi].toString()); |
| } |
| return headText.toString().substring(1); |
| } |
| |
| public Parse getHead() { |
| return head; |
| } |
| |
| public int getNonDescriptorStart() { |
| return this.nonDescriptorStart; |
| } |
| |
| /** |
| * Returns a sentence-based token span for this mention. If this mention consist |
| * of the third, fourth, and fifth token, then this span will be 2..4. |
| * |
| * @return a sentence-based token span for this mention. |
| */ |
| public Span getIndexSpan() { |
| return indexSpan; |
| } |
| |
| /** |
| * Returns the index of the noun phrase for this mention in a sentence. |
| * |
| * @return the index of the noun phrase for this mention in a sentence. |
| */ |
| public int getNounPhraseSentenceIndex() { |
| return nounLocation; |
| } |
| |
| /** |
| * Returns the index of the noun phrase for this mention in a document. |
| * |
| * @return the index of the noun phrase for this mention in a document. |
| */ |
| public int getNounPhraseDocumentIndex() { |
| return nounNumber; |
| } |
| |
| /** |
| * Returns the index of the last noun phrase in the sentence containing this mention. |
| * This is one less than the number of noun phrases in the sentence which contains this mention. |
| * |
| * @return the index of the last noun phrase in the sentence containing this mention. |
| */ |
| public int getMaxNounPhraseSentenceIndex() { |
| return maxNounLocation; |
| } |
| |
| public Parse getNextTokenBasal() { |
| return basalNextToken; |
| } |
| |
| public Parse getPreviousToken() { |
| return prevToken; |
| } |
| |
| public Parse getNextToken() { |
| return nextToken; |
| } |
| |
| /** |
| * Returns the index of the sentence which contains this mention. |
| * |
| * @return the index of the sentence which contains this mention. |
| */ |
| public int getSentenceNumber() { |
| return sentenceNumber; |
| } |
| |
| /** |
| * Returns the parse for the first token in this mention. |
| * |
| * @return The parse for the first token in this mention. |
| */ |
| public Parse getFirstToken() { |
| return firstToken; |
| } |
| |
| /** |
| * Returns the text for the first token of the mention. |
| * |
| * @return The text for the first token of the mention. |
| */ |
| public String getFirstTokenText() { |
| return firstTokenText; |
| } |
| |
| /** |
| * Returns the pos-tag of the first token of this mention. |
| * |
| * @return the pos-tag of the first token of this mention. |
| */ |
| public String getFirstTokenTag() { |
| return firstTokenTag; |
| } |
| |
| /** |
| * Returns the parses for the tokens which are contained in this mention. |
| * |
| * @return An array of parses, in order, for each token contained in this mention. |
| */ |
| public Parse[] getTokenParses() { |
| return (Parse[]) tokens; |
| } |
| |
| /** |
| * Returns the text of this mention. |
| * |
| * @return A space-delimited string of the tokens of this mention. |
| */ |
| public String toText() { |
| return parse.toString(); |
| } |
| |
| /* |
| private static String[] getLemmas(MentionContext xec) { |
| //TODO: Try multi-word lemmas first. |
| String word = xec.getHeadTokenText(); |
| return DictionaryFactory.getDictionary().getLemmas(word,"NN"); |
| } |
| |
| private static Set getSynsetSet(MentionContext xec) { |
| //System.err.println("getting synsets for mention:"+xec.toText()); |
| Set synsetSet = new HashSet(); |
| String[] lemmas = getLemmas(xec); |
| for (int li = 0; li < lemmas.length; li++) { |
| String[] synsets = DictionaryFactory.getDictionary().getParentSenseKeys(lemmas[li],"NN",0); |
| for (int si=0,sn=synsets.length;si<sn;si++) { |
| synsetSet.add(synsets[si]); |
| } |
| } |
| return (synsetSet); |
| } |
| */ |
| |
| /** |
| * Assigns the specified gender with the specified probability to this mention. |
| * |
| * @param gender The gender to be given to this mention. |
| * @param probability The probability associated with the gender assignment. |
| */ |
| public void setGender(GenderEnum gender, double probability) { |
| this.gender = gender; |
| this.genderProb = probability; |
| } |
| |
| /** |
| * Returns the gender of this mention. |
| * |
| * @return The gender of this mention. |
| */ |
| public GenderEnum getGender() { |
| return gender; |
| } |
| |
| /** |
| * Returns the probability associated with the gender assignment. |
| * |
| * @return The probability associated with the gender assignment. |
| */ |
| public double getGenderProb() { |
| return genderProb; |
| } |
| |
| /** |
| * Assigns the specified number with the specified probability to this mention. |
| * |
| * @param number The number to be given to this mention. |
| * @param probability The probability associated with the number assignment. |
| */ |
| public void setNumber(NumberEnum number, double probability) { |
| this.number = number; |
| this.numberProb = probability; |
| } |
| |
| /** |
| * Returns the number of this mention. |
| * |
| * @return The number of this mention. |
| */ |
| public NumberEnum getNumber() { |
| return number; |
| } |
| |
| /** |
| * Returns the probability associated with the number assignment. |
| * |
| * @return The probability associated with the number assignment. |
| */ |
| public double getNumberProb() { |
| return numberProb; |
| } |
| } |