opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java - opennlp - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License. You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */


 package opennlp.tools.parser;

 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Objects;
 import java.util.Set;
 import java.util.Stack;
 import java.util.TreeSet;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 import opennlp.tools.util.Span;

 /**
  * Data structure for holding parse constituents.
  */
 public class Parse implements Cloneable, Comparable<Parse> {

   public static final String BRACKET_LRB = "(";
   public static final String BRACKET_RRB = ")";
   public static final String BRACKET_LCB = "{";
   public static final String BRACKET_RCB = "}";
   public static final String BRACKET_LSB = "[";
   public static final String BRACKET_RSB = "]";

   /**
    * The text string on which this parse is based.
    * This object is shared among all parses for the same sentence.
    */
   private String text;

   /**
    * The character offsets into the text for this constituent.
    */
   private Span span;

   /**
    * The syntactic type of this parse.
    */
   private String type;

   /**
    * The sub-constituents of this parse.
    */
   private List<Parse> parts;

   /**
    * The head parse of this parse. A parse can be its own head.
    */
   private Parse head;

   /**
    * A string used during parse construction to specify which
    * stage of parsing has been performed on this node.
    */
   private String label;

   /**
    * Index in the sentence of the head of this constituent.
    */
   private int headIndex;

   /**
    * The parent parse of this parse.
    */
   private Parse parent;

   /**
    * The probability associated with the syntactic type
    * assigned to this parse.
    */
   private double prob;

   /**
    * The string buffer used to track the derivation of this parse.
    */
   private StringBuffer derivation;

   /**
    * Specifies whether this constituent was built during the chunking phase.
    */
   private boolean isChunk;

   /**
    * The pattern used to find the base constituent label of a
    * Penn Treebank labeled constituent.
    */
   private static Pattern typePattern = Pattern.compile("^([^ =-]+)");

   /**
    * The pattern used to find the function tags.
    */
   private static Pattern funTypePattern = Pattern.compile("^[^ =-]+-([^ =-]+)");

   /**
    * The patter used to identify tokens in Penn Treebank labeled constituents.
    */
   private static Pattern tokenPattern = Pattern.compile("^[^ ()]+ ([^ ()]+)\\s*\\)");

   /**
    * The set of punctuation parses which are between this parse and the previous parse.
    */
   private Collection<Parse> prevPunctSet;

   /**
    * The set of punctuation parses which are between this parse and
    * the subsequent parse.
    */
   private Collection<Parse> nextPunctSet;

   /**
    * Specifies whether constituent labels should include parts specified
    * after minus character.
    */
   private static boolean useFunctionTags;

   /**
    * Creates a new parse node for this specified text and span of the specified type
    * with the specified probability and the specified head index.
    *
    * @param text The text of the sentence for which this node is a part of.
    * @param span The character offsets for this node within the specified text.
    * @param type The constituent label of this node.
    * @param p The probability of this parse.
    * @param index The token index of the head of this parse.
    */
   public Parse(String text, Span span, String type, double p, int index) {
     this.text = text;
     this.span = span;
     this.type = type;
     this.prob = p;
     this.head = this;
     this.headIndex = index;
     this.parts = new LinkedList<>();
     this.label = null;
     this.parent = null;
   }

   /**
    * Creates a new parse node for this specified text and span of the specified type with
    * the specified probability and the specified head and head index.
    *
    * @param text The text of the sentence for which this node is a part of.
    * @param span The character offsets for this node within the specified text.
    * @param type The constituent label of this node.
    * @param p The probability of this parse.
    * @param h The head token of this parse.
    */
   public Parse(String text, Span span, String type, double p, Parse h) {
     this(text, span, type, p, 0);
     if (h != null) {
       this.head = h;
       this.headIndex = h.headIndex;
     }
   }

   @Override
   public Object clone() {
     Parse p = new Parse(this.text, this.span, this.type, this.prob, this.head);
     p.parts = new LinkedList<>();
     p.parts.addAll(this.parts);

     if (derivation != null) {
       p.derivation = new StringBuffer(100);
       p.derivation.append(this.derivation);
     }
     p.label = this.label;
     return (p);
   }

   /**
    * Clones the right frontier of parse up to the specified node.
    *
    * @param node The last node in the right frontier of the parse tree which should be cloned.
    * @return A clone of this parse and its right frontier up to and including the specified node.
    */
   public Parse clone(Parse node) {
     if (this == node) {
       return (Parse) this.clone();
     }
     else {
       Parse c = (Parse) this.clone();
       Parse lc = c.parts.get(parts.size() - 1);
       c.parts.set(parts.size() - 1,lc.clone(node));
       return c;
     }
   }

   /**
    * Clones the right frontier of this root parse up to and including the specified node.
    *
    * @param node The last node in the right frontier of the parse tree which should be cloned.
    * @param parseIndex The child index of the parse for this root node.
    * @return A clone of this root parse and its right frontier up to and including the specified node.
    */
   public Parse cloneRoot(Parse node, int parseIndex) {
     Parse c = (Parse) this.clone();
     Parse fc = c.parts.get(parseIndex);
     c.parts.set(parseIndex,fc.clone(node));
     return c;
   }

   /**
    * Specifies whether function tags should be included as part of the constituent type.
    *
    * @param uft true is they should be included; false otherwise.
    */
   public static void useFunctionTags(boolean uft) {
     useFunctionTags = uft;
   }


   /**
    * Set the type of this constituent to the specified type.
    *
    * @param type The type of this constituent.
    */
   public void setType(String type) {
     this.type = type;
   }

   /**
    * Returns the constituent label for this node of the parse.
    *
    * @return The constituent label for this node of the parse.
    */
   public String getType() {
     return type;
   }

   /**
    * Returns the set of punctuation parses that occur immediately before this parse.
    *
    * @return the set of punctuation parses that occur immediately before this parse.
    */
   public Collection<Parse> getPreviousPunctuationSet() {
     return prevPunctSet;
   }

   /**
    * Designates that the specified punctuation should is prior to this parse.
    *
    * @param punct The punctuation.
    */
   public void addPreviousPunctuation(Parse punct) {
     if (this.prevPunctSet == null) {
       this.prevPunctSet = new TreeSet<>();
     }
     prevPunctSet.add(punct);
   }

   /**
    * Returns the set of punctuation parses that occur immediately after this parse.
    *
    * @return the set of punctuation parses that occur immediately after this parse.
    */
   public Collection<Parse> getNextPunctuationSet() {
     return nextPunctSet;
   }

   /**
    * Designates that the specified punctuation follows this parse.
    *
    * @param punct The punctuation set.
    */
   public void addNextPunctuation(Parse punct) {
     if (this.nextPunctSet == null) {
       this.nextPunctSet = new TreeSet<>();
     }
     nextPunctSet.add(punct);
   }

   /**
    * Sets the set of punctuation tags which follow this parse.
    *
    * @param punctSet The set of punctuation tags which follow this parse.
    */
   public void setNextPunctuation(Collection<Parse> punctSet) {
     this.nextPunctSet = punctSet;
   }

   /**
    * Sets the set of punctuation tags which preceed this parse.
    *
    * @param punctSet The set of punctuation tags which preceed this parse.
    */
   public void setPrevPunctuation(Collection<Parse> punctSet) {
     this.prevPunctSet = punctSet;
   }

   /**
    * Inserts the specified constituent into this parse based on its text span.This
    * method assumes that the specified constituent can be inserted into this parse.
    *
    * @param constituent The constituent to be inserted.
    */
   public void insert(final Parse constituent) {
     Span ic = constituent.span;
     if (span.contains(ic)) {
       //double oprob=c.prob;
       int pi = 0;
       int pn = parts.size();
       for (; pi < pn; pi++) {
         Parse subPart = parts.get(pi);
         //System.err.println("Parse.insert:con="+constituent+" sp["+pi+"] "+subPart+" "+subPart.getType());
         Span sp = subPart.span;
         if (sp.getStart() >= ic.getEnd()) {
           break;
         }
         // constituent contains subPart
         else if (ic.contains(sp)) {
           //System.err.println("Parse.insert:con contains subPart");
           parts.remove(pi);
           pi--;
           constituent.parts.add(subPart);
           subPart.setParent(constituent);
           //System.err.println("Parse.insert: "+subPart.hashCode()+" -> "+subPart.getParent().hashCode());
           pn = parts.size();
         }
         else if (sp.contains(ic)) {
           //System.err.println("Parse.insert:subPart contains con");
           subPart.insert(constituent);
           return;
         }
       }
       //System.err.println("Parse.insert:adding con="+constituent+" to "+this);
       parts.add(pi, constituent);
       constituent.setParent(this);
       // System.err.println("Parse.insert: "+constituent.hashCode()+" -> "
       // +constituent.getParent().hashCode());
     }
     else {
       throw new IllegalArgumentException("Inserting constituent not contained in the sentence!");
     }
   }

   /**
    * Appends the specified string buffer with a string representation of this parse.
    *
    * @param sb A string buffer into which the parse string can be appended.
    */
   public void show(StringBuffer sb) {
     int start;
     start = span.getStart();
     if (!type.equals(AbstractBottomUpParser.TOK_NODE)) {
       sb.append("(");
       sb.append(type).append(" ");
       //System.out.print(label+" ");
       //System.out.print(head+" ");
       //System.out.print(df.format(prob)+" ");
     }
     for (Parse c : parts) {
       Span s = c.span;
       if (start < s.getStart()) {
         //System.out.println("pre "+start+" "+s.getStart());
         sb.append(encodeToken(text.substring(start, s.getStart())));
       }
       c.show(sb);
       start = s.getEnd();
     }
     if (start < span.getEnd()) {
       sb.append(encodeToken(text.substring(start, span.getEnd())));
     }
     if (!type.equals(AbstractBottomUpParser.TOK_NODE)) {
       sb.append(")");
     }
   }

   /**
    * Displays this parse using Penn Treebank-style formatting.
    */
   public void show() {
     StringBuffer sb = new StringBuffer(text.length() * 4);
     show(sb);
     System.out.println(sb);
   }


   /**
    * Returns the probability associated with the pos-tag sequence assigned to this parse.
    *
    * @return The probability associated with the pos-tag sequence assigned to this parse.
    */
   public double getTagSequenceProb() {
     //System.err.println("Parse.getTagSequenceProb: "+type+" "+this);
     if (parts.size() == 1 && (parts.get(0)).type.equals(AbstractBottomUpParser.TOK_NODE)) {
       //System.err.println(this+" "+prob);
       return (StrictMath.log(prob));
     }
     else if (parts.size() == 0) {
       System.err.println("Parse.getTagSequenceProb: Wrong base case!");
       return (0.0);
     }
     else {
       double sum = 0.0;
       for (Parse part : parts) {
         sum += part.getTagSequenceProb();
       }
       return sum;
     }
   }

   /**
    * Returns whether this parse is complete.
    *
    * @return Returns true if the parse contains a single top-most node.
    */
   public boolean complete() {
     return (parts.size() == 1);
   }

   public String getCoveredText() {
     return text.substring(span.getStart(), span.getEnd());
   }

   /**
    * Represents this parse in a human readable way.
    */
   @Override
   public String toString() {
     // TODO: Use the commented code in next bigger release,
     // change probably breaks backward compatibility in some
     // applications
     //StringBuffer buffer = new StringBuffer();
     //show(buffer);
     //return buffer.toString();
     return getCoveredText();
   }

   /**
    * Returns the text of the sentence over which this parse was formed.
    *
    * @return The text of the sentence over which this parse was formed.
    */
   public String getText() {
     return text;
   }

   /**
    * Returns the character offsets for this constituent.
    *
    * @return The character offsets for this constituent.
    */
   public Span getSpan() {
     return span;
   }

   /**
    * Returns the log of the product of the probability associated with all the
    * decisions which formed this constituent.
    *
    * @return The log of the product of the probability associated with all the
    * decisions which formed this constituent.
    */
   public double getProb() {
     return prob;
   }

   /**
    * Adds the specified probability log to this current log for this parse.
    *
    * @param logProb The probability of an action performed on this parse.
    */
   public void addProb(double logProb) {
     this.prob += logProb;
   }

   /**
    * Returns the child constituents of this constituent
    * .
    * @return The child constituents of this constituent.
    */
   public Parse[] getChildren() {
     return parts.toArray(new Parse[parts.size()]);
   }

   /**
    * Replaces the child at the specified index with a new child with the specified label.
    *
    * @param index The index of the child to be replaced.
    * @param label The label to be assigned to the new child.
    */
   public void setChild(int index, String label) {
     Parse newChild = (Parse) (parts.get(index)).clone();
     newChild.setLabel(label);
     parts.set(index,newChild);
   }

   public void add(Parse daughter, HeadRules rules) {
     if (daughter.prevPunctSet != null) {
       parts.addAll(daughter.prevPunctSet);
     }
     parts.add(daughter);
     this.span = new Span(span.getStart(),daughter.getSpan().getEnd());
     this.head = rules.getHead(getChildren(),type);
     this.headIndex = head.headIndex;
   }

   public void remove(int index) {
     parts.remove(index);
     if (! parts.isEmpty()) {
       if (index == 0 || index == parts.size()) { //size is orig last element
         span = new Span((parts.get(0)).span.getStart(),(parts.get(parts.size() - 1)).span.getEnd());
       }
     }
   }

   public Parse adjoinRoot(Parse node, HeadRules rules, int parseIndex) {
     Parse lastChild = parts.get(parseIndex);
     Parse adjNode = new Parse(this.text,new Span(lastChild.getSpan().getStart(),
         node.getSpan().getEnd()),lastChild.getType(),1,
         rules.getHead(new Parse[]{lastChild,node},lastChild.getType()));
     adjNode.parts.add(lastChild);
     if (node.prevPunctSet != null) {
       adjNode.parts.addAll(node.prevPunctSet);
     }
     adjNode.parts.add(node);
     parts.set(parseIndex,adjNode);
     return adjNode;
   }

   /**
    * Sister adjoins this node's last child and the specified sister node and returns their
    * new parent node.  The new parent node replace this nodes last child.
    *
    * @param sister The node to be adjoined.
    * @param rules The head rules for the parser.
    * @return The new parent node of this node and the specified sister node.
    */
   public Parse adjoin(Parse sister, HeadRules rules) {
     Parse lastChild = parts.get(parts.size() - 1);
     Parse adjNode = new Parse(this.text,new Span(lastChild.getSpan().getStart(),sister.getSpan().getEnd()),
         lastChild.getType(),1,rules.getHead(new Parse[]{lastChild,sister},lastChild.getType()));
     adjNode.parts.add(lastChild);
     if (sister.prevPunctSet != null) {
       adjNode.parts.addAll(sister.prevPunctSet);
     }
     adjNode.parts.add(sister);
     parts.set(parts.size() - 1, adjNode);
     this.span = new Span(span.getStart(),sister.getSpan().getEnd());
     this.head = rules.getHead(getChildren(),type);
     this.headIndex = head.headIndex;
     return adjNode;
   }

   public void expandTopNode(Parse root) {
     boolean beforeRoot = true;
     //System.err.println("expandTopNode: parts="+parts);
     for (int pi = 0, ai = 0; pi < parts.size(); pi++,ai++) {
       Parse node = parts.get(pi);
       if (node == root) {
         beforeRoot = false;
       }
       else if (beforeRoot) {
         root.parts.add(ai,node);
         parts.remove(pi);
         pi--;
       }
       else {
         root.parts.add(node);
         parts.remove(pi);
         pi--;
       }
     }
     root.updateSpan();
   }

   /**
    * Returns the number of children for this parse node.
    *
    * @return the number of children for this parse node.
    */
   public int getChildCount() {
     return parts.size();
   }

   /**
    * Returns the index of this specified child.
    *
    * @param child A child of this parse.
    *
    * @return the index of this specified child or -1 if the specified child is not a child of this parse.
    */
   public int indexOf(Parse child) {
     return parts.indexOf(child);
   }

   /**
    * Returns the head constituent associated with this constituent.
    *
    * @return The head constituent associated with this constituent.
    */
   public Parse getHead() {
     return head;
   }

   /**
    * Returns the index within a sentence of the head token for this parse.
    *
    * @return The index within a sentence of the head token for this parse.
    */
   public int getHeadIndex() {
     return headIndex;
   }

   /**
    * Returns the label assigned to this parse node during parsing
    * which specifies how this node will be formed into a constituent.
    *
    * @return The outcome label assigned to this node during parsing.
    */
   public String getLabel() {
     return label;
   }

   /**
    * Assigns this parse the specified label.  This is used by parsing schemes to
    * tag parsing nodes while building.
    *
    * @param label A label indicating something about the stage of building for this parse node.
    */
   public void setLabel(String label) {
     this.label = label;
   }

   private static String getType(String rest) {
     if (rest.startsWith("-LCB-")) {
       return "-LCB-";
     }
     else if (rest.startsWith("-RCB-")) {
       return "-RCB-";
     }
     else if (rest.startsWith("-LRB-")) {
       return "-LRB-";
     }
     else if (rest.startsWith("-RRB-")) {
       return "-RRB-";
     }
     else if (rest.startsWith("-RSB-")) {
       return "-RSB-";
     }
     else if (rest.startsWith("-LSB-")) {
       return "-LSB-";
     }

     else if (rest.startsWith("-NONE-")) {
       return "-NONE-";
     }
     else {
       Matcher typeMatcher = typePattern.matcher(rest);
       if (typeMatcher.find()) {
         String type = typeMatcher.group(1);
         if (useFunctionTags) {
           Matcher funMatcher = funTypePattern.matcher(rest);
           if (funMatcher.find()) {
             String ftag = funMatcher.group(1);
             type = type + "-" + ftag;
           }
         }
         return type;
       }
     }
     return null;
   }

   private static String encodeToken(String token) {
     if (BRACKET_LRB.equals(token)) {
       return "-LRB-";
     }
     else if (BRACKET_RRB.equals(token)) {
       return "-RRB-";
     }
     else if (BRACKET_LCB.equals(token)) {
       return "-LCB-";
     }
     else if (BRACKET_RCB.equals(token)) {
       return "-RCB-";
     }
     else if (BRACKET_LSB.equals(token)) {
       return "-LSB-";
     }
     else if (BRACKET_RSB.equals(token)) {
       return "-RSB-";
     }

     return token;
   }

   private static String decodeToken(String token) {
     if ("-LRB-".equals(token)) {
       return BRACKET_LRB;
     }
     else if ("-RRB-".equals(token)) {
       return BRACKET_RRB;
     }
     else if ("-LCB-".equals(token)) {
       return BRACKET_LCB;
     }
     else if ("-RCB-".equals(token)) {
       return BRACKET_RCB;
     }
     else if ("-LSB-".equals(token)) {
       return BRACKET_LSB;
     }
     else if ("-RSB-".equals(token)) {
       return BRACKET_RSB;
     }

     return token;
   }

   /**
    * Returns the string containing the token for the specified portion of the parse string or
    * null if the portion of the parse string does not represent a token.
    *
    * @param rest The portion of the parse string remaining to be processed.
    *
    * @return The string containing the token for the specified portion of the parse string or
    *     null if the portion of the parse string does not represent a token.
    */
   private static String getToken(String rest) {
     Matcher tokenMatcher = tokenPattern.matcher(rest);
     if (tokenMatcher.find()) {
       return decodeToken(tokenMatcher.group(1));
     }
     return null;
   }

   /**
    * Computes the head parses for this parse and its sub-parses and stores this information
    * in the parse data structure.
    *
    * @param rules The head rules which determine how the head of the parse is computed.
    */
   public void updateHeads(HeadRules rules) {
     if (parts != null && parts.size() != 0) {
       for (Parse c : parts) {
         c.updateHeads(rules);
       }
       this.head = rules.getHead(parts.toArray(new Parse[parts.size()]), type);
       if (head == null) {
         head = this;
       }
       else {
         this.headIndex = head.headIndex;
       }
     }
     else {
       this.head = this;
     }
   }

   public void updateSpan() {
     span = new Span((parts.get(0)).span.getStart(),(parts.get(parts.size() - 1)).span.getEnd());
   }

   /**
    * Prune the specified sentence parse of vacuous productions.
    *
    * @param parse
    */
   public static void pruneParse(Parse parse) {
     List<Parse> nodes = new LinkedList<>();
     nodes.add(parse);
     while (nodes.size() != 0) {
       Parse node = nodes.remove(0);
       Parse[] children = node.getChildren();
       if (children.length == 1 && node.getType().equals(children[0].getType())) {
         int index = node.getParent().parts.indexOf(node);
         children[0].setParent(node.getParent());
         node.getParent().parts.set(index,children[0]);
         node.parent = null;
         node.parts = null;
       }
       nodes.addAll(Arrays.asList(children));
     }
   }

   public static void fixPossesives(Parse parse) {
     Parse[] tags = parse.getTagNodes();
     for (int ti = 0; ti < tags.length; ti++) {
       if (tags[ti].getType().equals("POS")) {
         if (ti + 1 < tags.length && tags[ti + 1].getParent() == tags[ti].getParent().getParent()) {
           int start = tags[ti + 1].getSpan().getStart();
           int end = tags[ti + 1].getSpan().getEnd();
           for (int npi = ti + 2; npi < tags.length; npi++) {
             if (tags[npi].getParent() == tags[npi - 1].getParent()) {
               end = tags[npi].getSpan().getEnd();
             }
             else {
               break;
             }
           }
           Parse npPos = new Parse(parse.getText(), new Span(start,end), "NP", 1 , tags[ti + 1]);
           parse.insert(npPos);
         }
       }
     }
   }


   /**
    * Parses the specified tree-bank style parse string and return a Parse structure for that string.
    *
    * @param parse A tree-bank style parse string.
    *
    * @return a Parse structure for the specified tree-bank style parse string.
    */
   public static Parse parseParse(String parse) {
     return parseParse(parse,null);
   }

   /**
    * Parses the specified tree-bank style parse string and return a Parse structure
    * for that string.
    *
    * @param parse A tree-bank style parse string.
    * @param gl The gap labeler.
    *
    * @return a Parse structure for the specified tree-bank style parse string.
    */
   public static Parse parseParse(String parse, GapLabeler gl) {
     StringBuilder text = new StringBuilder();
     int offset = 0;
     Stack<Constituent> stack = new Stack<>();
     List<Constituent> cons = new LinkedList<>();
     for (int ci = 0, cl = parse.length(); ci < cl; ci++) {
       char c = parse.charAt(ci);
       if (c == '(') {
         String rest = parse.substring(ci + 1);
         String type = getType(rest);
         if (type == null) {
           System.err.println("null type for: " + rest);
         }
         String token = getToken(rest);
         stack.push(new Constituent(type, new Span(offset,offset)));
         if (token != null) {
           if (Objects.equals(type, "-NONE-") && gl != null) {
             //System.err.println("stack.size="+stack.size());
             gl.labelGaps(stack);
           }
           else {
             cons.add(new Constituent(AbstractBottomUpParser.TOK_NODE,
                 new Span(offset, offset + token.length())));
             text.append(token).append(" ");
             offset += token.length() + 1;
           }
         }
       }
       else if (c == ')') {
         Constituent con = stack.pop();
         int start = con.getSpan().getStart();
         if (start < offset) {
           cons.add(new Constituent(con.getLabel(), new Span(start, offset - 1)));
         }
       }
     }
     String txt = text.toString();
     int tokenIndex = -1;
     Parse p = new Parse(txt, new Span(0, txt.length()), AbstractBottomUpParser.TOP_NODE, 1,0);
     for (Constituent con : cons) {
       String type = con.getLabel();
       if (!type.equals(AbstractBottomUpParser.TOP_NODE)) {
         if (AbstractBottomUpParser.TOK_NODE.equals(type)) {
           tokenIndex++;
         }
         Parse c = new Parse(txt, con.getSpan(), type, 1, tokenIndex);
         //System.err.println("insert["+ci+"] "+type+" "+c.toString()+" "+c.hashCode());
         p.insert(c);
         //codeTree(p);
       }
     }
     return p;
   }

   /**
    * Returns the parent parse node of this constituent.
    *
    * @return The parent parse node of this constituent.
    */
   public Parse getParent() {
     return parent;
   }

   /**
    * Specifies the parent parse node for this constituent.
    *
    * @param parent The parent parse node for this constituent.
    */
   public void setParent(Parse parent) {
     this.parent = parent;
   }

   /**
    * Indicates whether this parse node is a pos-tag.
    *
    * @return true if this node is a pos-tag, false otherwise.
    */
   public boolean isPosTag() {
     return (parts.size() == 1 &&
         (parts.get(0)).getType().equals(AbstractBottomUpParser.TOK_NODE));
   }

   /**
    * Returns true if this constituent contains no sub-constituents.
    *
    * @return true if this constituent contains no sub-constituents; false otherwise.
    */
   public boolean isFlat() {
     boolean flat = true;
     for (Parse part : parts) {
       flat &= part.isPosTag();
     }
     return flat;
   }

   public void isChunk(boolean ic) {
     this.isChunk = ic;
   }

   public boolean isChunk() {
     return isChunk;
   }

   /**
    * Returns the parse nodes which are children of this node and which are pos tags.
    *
    * @return the parse nodes which are children of this node and which are pos tags.
    */
   public Parse[] getTagNodes() {
     List<Parse> tags = new LinkedList<>();
     List<Parse> nodes = new LinkedList<>();
     nodes.addAll(this.parts);
     while (nodes.size() != 0) {
       Parse p = nodes.remove(0);
       if (p.isPosTag()) {
         tags.add(p);
       }
       else {
         nodes.addAll(0,p.parts);
       }
     }
     return tags.toArray(new Parse[tags.size()]);
   }

   public Parse[] getTokenNodes() {
     List<Parse> tokens = new LinkedList<>();
     List<Parse> nodes = new LinkedList<>();
     nodes.addAll(this.parts);
     while (nodes.size() != 0) {
       Parse p = nodes.remove(0);
       if (p.getType().equals(AbstractBottomUpParser.TOK_NODE)) {
         tokens.add(p);
       }
       else {
         nodes.addAll(0, p.parts);
       }
     }
     return tokens.toArray(new Parse[tokens.size()]);
   }

   /**
    * Returns the deepest shared parent of this node and the specified node.
    * If the nodes are identical then their parent is returned.
    * If one node is the parent of the other then the parent node is returned.
    *
    * @param node The node from which parents are compared to this node's parents.
    *
    * @return the deepest shared parent of this node and the specified node.
    */
   public Parse getCommonParent(Parse node) {
     if (this == node) {
       return parent;
     }
     Set<Parse> parents = new HashSet<>();
     Parse cparent = this;
     while (cparent != null) {
       parents.add(cparent);
       cparent = cparent.getParent();
     }
     while (node != null) {
       if (parents.contains(node)) {
         return node;
       }
       node = node.getParent();
     }
     return null;
   }

   @Override
   public boolean equals(Object obj) {
     if (obj == this) {
       return true;
     }

     if (obj instanceof Parse) {
       Parse p = (Parse) obj;

       return Objects.equals(label, p.label) && span.equals(p.span)
           && text.equals(p.text) && parts.equals(p.parts);
     }

     return false;
   }

   @Override
   public int hashCode() {
     // Note: label is missing here!
     return Objects.hash(span, text);
   }

   public int compareTo(Parse p) {
     return Double.compare(p.getProb(), this.getProb());
   }

   /**
    * Returns the derivation string for this parse if one has been created.
    *
    * @return the derivation string for this parse or null if no derivation string has been created.
    */
   public StringBuffer getDerivation() {
     return derivation;
   }

   /**
    * Specifies the derivation string to be associated with this parse.
    *
    * @param derivation The derivation string to be associated with this parse.
    */
   public void setDerivation(StringBuffer derivation) {
     this.derivation = derivation;
   }

   private void codeTree(Parse p,int[] levels) {
     Parse[] kids = p.getChildren();
     StringBuilder levelsBuff = new StringBuilder();
     levelsBuff.append("[");
     int[] nlevels = new int[levels.length + 1];
     for (int li = 0; li < levels.length; li++) {
       nlevels[li] = levels[li];
       levelsBuff.append(levels[li]).append(".");
     }
     for (int ki = 0; ki < kids.length; ki++) {
       nlevels[levels.length] = ki;
       System.out.println(levelsBuff.toString() + ki + "] " + kids[ki].getType() +
           " " + kids[ki].hashCode() + " -> " + kids[ki].getParent().hashCode() +
           " " + kids[ki].getParent().getType() + " " + kids[ki].getCoveredText());
       codeTree(kids[ki],nlevels);
     }
   }

   /**
    * Prints to standard out a representation of the specified parse which
    * contains hash codes so that parent/child relationships can be explicitly seen.
    */
   public void showCodeTree() {
     codeTree(this,new int[0]);
   }

   /**
    * Utility method to inserts named entities.
    *
    * @param tag
    * @param names
    * @param tokens
    */
   public static void addNames(String tag, Span[] names, Parse[] tokens) {
     for (Span nameTokenSpan : names) {
       Parse startToken = tokens[nameTokenSpan.getStart()];
       Parse endToken = tokens[nameTokenSpan.getEnd() - 1];
       Parse commonParent = startToken.getCommonParent(endToken);
       //System.err.println("addNames: "+startToken+" .. "+endToken+" commonParent = "+commonParent);
       if (commonParent != null) {
         Span nameSpan = new Span(startToken.getSpan().getStart(), endToken.getSpan().getEnd());
         if (nameSpan.equals(commonParent.getSpan())) {
           commonParent.insert(new Parse(commonParent.getText(), nameSpan, tag, 1.0, endToken.getHeadIndex()));
         } else {
           Parse[] kids = commonParent.getChildren();
           boolean crossingKids = false;
           for (Parse kid : kids) {
             if (nameSpan.crosses(kid.getSpan())) {
               crossingKids = true;
             }
           }
           if (!crossingKids) {
             commonParent.insert(new Parse(commonParent.getText(), nameSpan,
                 tag, 1.0, endToken.getHeadIndex()));
           } else {
             if (commonParent.getType().equals("NP")) {
               Parse[] grandKids = kids[0].getChildren();
               if (grandKids.length > 1 && nameSpan.contains(grandKids[grandKids.length - 1].getSpan())) {
                 commonParent.insert(new Parse(commonParent.getText(), commonParent.getSpan(),
                     tag, 1.0, commonParent.getHeadIndex()));
               }
             }
           }
         }
       }
     }
   }
 }