| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package opennlp.tools.coref.mention; |
| |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.HashSet; |
| import java.util.LinkedList; |
| import java.util.List; |
| import java.util.Set; |
| import java.util.Stack; |
| |
| import opennlp.tools.parser.Parse; |
| import opennlp.tools.parser.chunking.Parser; |
| import opennlp.tools.util.Span; |
| |
| /** |
| * This class is a wrapper for {@link opennlp.tools.parser.Parse} mapping |
| * it to the API specified in {@link opennlp.tools.coref.mention.Parse}. |
| * This allows coreference to be done on the output of the parser. |
| */ |
| public class DefaultParse extends AbstractParse { |
| |
| public static String[] NAME_TYPES = {"person", "organization", "location", "date", |
| "time", "percentage", "money"}; |
| |
| private Parse parse; |
| private int sentenceNumber; |
| private static Set<String> entitySet = new HashSet<String>(Arrays.asList(NAME_TYPES)); |
| |
| /** |
| * Initializes the current instance. |
| * |
| * @param parse |
| * @param sentenceNumber |
| */ |
| public DefaultParse(Parse parse, int sentenceNumber) { |
| this.parse = parse; |
| this.sentenceNumber = sentenceNumber; |
| |
| // Should we just maintain a parse id map !? |
| } |
| |
| public int getSentenceNumber() { |
| return sentenceNumber; |
| } |
| |
| public List<opennlp.tools.coref.mention.Parse> getNamedEntities() { |
| List<Parse> names = new ArrayList<Parse>(); |
| List<Parse> kids = new LinkedList<Parse>(Arrays.asList(parse.getChildren())); |
| while (kids.size() > 0) { |
| Parse p = kids.remove(0); |
| if (entitySet.contains(p.getType())) { |
| names.add(p); |
| } |
| else { |
| kids.addAll(Arrays.asList(p.getChildren())); |
| } |
| } |
| return createParses(names.toArray(new Parse[names.size()])); |
| } |
| |
| public List<opennlp.tools.coref.mention.Parse> getChildren() { |
| return createParses(parse.getChildren()); |
| } |
| |
| public List<opennlp.tools.coref.mention.Parse> getSyntacticChildren() { |
| List<Parse> kids = new ArrayList<Parse>(Arrays.asList(parse.getChildren())); |
| for (int ci = 0; ci < kids.size(); ci++) { |
| Parse kid = kids.get(ci); |
| if (entitySet.contains(kid.getType())) { |
| kids.remove(ci); |
| kids.addAll(ci, Arrays.asList(kid.getChildren())); |
| ci--; |
| } |
| } |
| return createParses(kids.toArray(new Parse[kids.size()])); |
| } |
| |
| public List<opennlp.tools.coref.mention.Parse> getTokens() { |
| List<Parse> tokens = new ArrayList<Parse>(); |
| List<Parse> kids = new LinkedList<Parse>(Arrays.asList(parse.getChildren())); |
| while (kids.size() > 0) { |
| Parse p = kids.remove(0); |
| if (p.isPosTag()) { |
| tokens.add(p); |
| } |
| else { |
| kids.addAll(0,Arrays.asList(p.getChildren())); |
| } |
| } |
| return createParses(tokens.toArray(new Parse[tokens.size()])); |
| } |
| |
| public String getSyntacticType() { |
| if (entitySet.contains(parse.getType())) { |
| return null; |
| } |
| else if (parse.getType().contains("#")) { |
| return parse.getType().substring(0, parse.getType().indexOf('#')); |
| } |
| else { |
| return parse.getType(); |
| } |
| } |
| |
| private List<opennlp.tools.coref.mention.Parse> createParses(Parse[] parses) { |
| List<opennlp.tools.coref.mention.Parse> newParses = new ArrayList<>(parses.length); |
| |
| for (int pi = 0, pn = parses.length; pi < pn;pi++) { |
| newParses.add(new DefaultParse(parses[pi],sentenceNumber)); |
| } |
| |
| return newParses; |
| } |
| |
| public String getEntityType() { |
| if (entitySet.contains(parse.getType())) { |
| return parse.getType(); |
| } |
| else { |
| return null; |
| } |
| } |
| |
| public boolean isParentNAC() { |
| Parse parent = parse.getParent(); |
| while (parent != null) { |
| if (parent.getType().equals("NAC")) { |
| return true; |
| } |
| parent = parent.getParent(); |
| } |
| return false; |
| } |
| |
| public opennlp.tools.coref.mention.Parse getParent() { |
| Parse parent = parse.getParent(); |
| if (parent == null) { |
| return null; |
| } |
| else { |
| return new DefaultParse(parent,sentenceNumber); |
| } |
| } |
| |
| public boolean isNamedEntity() { |
| |
| // TODO: We should use here a special tag to, where |
| // the type can be extracted from. Then it just depends |
| // on the training data and not the values inside NAME_TYPES. |
| |
| if (entitySet.contains(parse.getType())) { |
| return true; |
| } |
| else { |
| return false; |
| } |
| } |
| |
| public boolean isNounPhrase() { |
| return parse.getType().equals("NP") || parse.getType().startsWith("NP#"); |
| } |
| |
| public boolean isSentence() { |
| return parse.getType().equals(Parser.TOP_NODE); |
| } |
| |
| public boolean isToken() { |
| return parse.isPosTag(); |
| } |
| |
| public int getEntityId() { |
| |
| String type = parse.getType(); |
| |
| if (type.contains("#")) { |
| String numberString = type.substring(type.indexOf('#') + 1); |
| return Integer.parseInt(numberString); |
| } |
| else { |
| return -1; |
| } |
| } |
| |
| public Span getSpan() { |
| return parse.getSpan(); |
| } |
| |
| public int compareTo(opennlp.tools.coref.mention.Parse p) { |
| |
| if (p == this) { |
| return 0; |
| } |
| |
| if (getSentenceNumber() < p.getSentenceNumber()) { |
| return -1; |
| } |
| else if (getSentenceNumber() > p.getSentenceNumber()) { |
| return 1; |
| } |
| else { |
| |
| if (parse.getSpan().getStart() == p.getSpan().getStart() && |
| parse.getSpan().getEnd() == p.getSpan().getEnd()) { |
| |
| System.out.println("Maybe incorrect measurement!"); |
| |
| Stack<Parse> parents = new Stack<Parse>(); |
| |
| |
| |
| |
| // get parent and update distance |
| // if match return distance |
| // if not match do it again |
| } |
| |
| return parse.getSpan().compareTo(p.getSpan()); |
| } |
| } |
| |
| @Override |
| public String toString() { |
| return parse.getCoveredText(); |
| } |
| |
| |
| public opennlp.tools.coref.mention.Parse getPreviousToken() { |
| Parse parent = parse.getParent(); |
| Parse node = parse; |
| int index = -1; |
| //find parent with previous children |
| while (parent != null && index < 0) { |
| index = parent.indexOf(node) - 1; |
| if (index < 0) { |
| node = parent; |
| parent = parent.getParent(); |
| } |
| } |
| //find right-most child which is a token |
| if (index < 0) { |
| return null; |
| } |
| else { |
| Parse p = parent.getChildren()[index]; |
| while (!p.isPosTag()) { |
| Parse[] kids = p.getChildren(); |
| p = kids[kids.length - 1]; |
| } |
| return new DefaultParse(p,sentenceNumber); |
| } |
| } |
| |
| public opennlp.tools.coref.mention.Parse getNextToken() { |
| Parse parent = parse.getParent(); |
| Parse node = parse; |
| int index = -1; |
| //find parent with subsequent children |
| while (parent != null) { |
| index = parent.indexOf(node) + 1; |
| if (index == parent.getChildCount()) { |
| node = parent; |
| parent = parent.getParent(); |
| } |
| else { |
| break; |
| } |
| } |
| //find left-most child which is a token |
| if (parent == null) { |
| return null; |
| } |
| else { |
| Parse p = parent.getChildren()[index]; |
| while (!p.isPosTag()) { |
| p = p.getChildren()[0]; |
| } |
| return new DefaultParse(p,sentenceNumber); |
| } |
| } |
| |
| @Override |
| public boolean equals(Object o) { |
| |
| boolean result; |
| |
| if (o == this) { |
| result = true; |
| } |
| else if (o instanceof DefaultParse) { |
| result = parse == ((DefaultParse) o).parse; |
| } |
| else { |
| result = false; |
| } |
| |
| return result; |
| } |
| |
| @Override |
| public int hashCode() { |
| return parse.hashCode(); |
| } |
| |
| /** |
| * Retrieves the {@link Parse}. |
| * |
| * @return the {@link Parse} |
| */ |
| public Parse getParse() { |
| return parse; |
| } |
| } |