| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package opennlp.tools.util; |
| |
| import java.io.Serializable; |
| import java.util.Objects; |
| |
| /** |
| * Class for storing start and end integer offsets. |
| * |
| */ |
| public class Span implements Comparable<Span>, Serializable { |
| |
| private final int start; |
| private final int end; |
| private final double prob;//default is 0 |
| private final String type; |
| |
| /** |
| * Initializes a new Span Object. Sets the prob to 0 as default. |
| * |
| * @param s start of span. |
| * @param e end of span, which is +1 more than the last element in the span. |
| * @param type the type of the span |
| */ |
| public Span(int s, int e, String type) { |
| this(s, e, type, 0d); |
| } |
| |
| /** |
| * Initializes a new Span Object. |
| * |
| * @param s start of span. |
| * @param e end of span, which is +1 more than the last element in the span. |
| * @param type the type of the span |
| * @param prob probability of span. |
| */ |
| public Span(int s, int e, String type, double prob) { |
| |
| if (s < 0) { |
| throw new IllegalArgumentException("start index must be zero or greater: " + s); |
| } |
| if (e < 0) { |
| throw new IllegalArgumentException("end index must be zero or greater: " + e); |
| } |
| if (s > e) { |
| throw new IllegalArgumentException("start index must not be larger than end index: " |
| + "start=" + s + ", end=" + e); |
| } |
| |
| start = s; |
| end = e; |
| this.prob = prob; |
| this.type = type; |
| } |
| |
| /** |
| * Initializes a new Span Object. Sets the prob to 0 as default |
| * |
| * @param s start of span. |
| * @param e end of span. |
| */ |
| public Span(int s, int e) { |
| this(s, e, null, 0d); |
| } |
| |
| /** |
| * |
| * @param s the start of the span (the token index, not the char index) |
| * @param e the end of the span (the token index, not the char index) |
| * @param prob |
| */ |
| public Span(int s, int e, double prob) { |
| this(s, e, null, prob); |
| } |
| |
| /** |
| * Initializes a new Span object with an existing Span which is shifted by an |
| * offset. |
| * |
| * @param span |
| * @param offset |
| */ |
| public Span(Span span, int offset) { |
| this(span.start + offset, span.end + offset, span.getType(), span.getProb()); |
| } |
| |
| /** |
| * Creates a new immutable span based on an existing span, where the existing span did not include the prob |
| * @param span the span that has no prob or the prob is incorrect and a new Span must be generated |
| * @param prob the probability of the span |
| */ |
| public Span(Span span, double prob) { |
| this(span.start, span.end, span.getType(), prob); |
| } |
| |
| /** |
| * Return the start of a span. |
| * |
| * @return the start of a span. |
| * |
| */ |
| public int getStart() { |
| return start; |
| } |
| |
| /** |
| * Return the end of a span. |
| * |
| * Note: that the returned index is one past the actual end of the span in the |
| * text, or the first element past the end of the span. |
| * |
| * @return the end of a span. |
| * |
| */ |
| public int getEnd() { |
| return end; |
| } |
| |
| /** |
| * Retrieves the type of the span. |
| * |
| * @return the type or null if not set |
| */ |
| public String getType() { |
| return type; |
| } |
| |
| /** |
| * Returns the length of this span. |
| * |
| * @return the length of the span. |
| */ |
| public int length() { |
| return end - start; |
| } |
| |
| /** |
| * Returns true if the specified span is contained by this span. Identical |
| * spans are considered to contain each other. |
| * |
| * @param s The span to compare with this span. |
| * |
| * @return true is the specified span is contained by this span; false otherwise. |
| */ |
| public boolean contains(Span s) { |
| return start <= s.getStart() && s.getEnd() <= end; |
| } |
| |
| /** |
| * Returns true if the specified index is contained inside this span. An index |
| * with the value of end is considered outside the span. |
| * |
| * @param index the index to test with this span. |
| * |
| * @return true if the span contains this specified index; false otherwise. |
| */ |
| public boolean contains(int index) { |
| return start <= index && index < end; |
| } |
| |
| /** |
| * Returns true if the specified span is the begin of this span and the |
| * specified span is contained in this span. |
| * |
| * @param s The span to compare with this span. |
| * |
| * @return true if the specified span starts with this span and is contained |
| * in this span; false otherwise |
| */ |
| public boolean startsWith(Span s) { |
| return getStart() == s.getStart() && contains(s); |
| } |
| |
| /** |
| * Returns true if the specified span intersects with this span. |
| * |
| * @param s The span to compare with this span. |
| * |
| * @return true is the spans overlap; false otherwise. |
| */ |
| public boolean intersects(Span s) { |
| int sstart = s.getStart(); |
| //either s's start is in this or this' start is in s |
| return this.contains(s) || s.contains(this) |
| || getStart() <= sstart && sstart < getEnd() |
| || sstart <= getStart() && getStart() < s.getEnd(); |
| } |
| |
| /** |
| * Returns true is the specified span crosses this span. |
| * |
| * @param s The span to compare with this span. |
| * |
| * @return true is the specified span overlaps this span and contains a |
| * non-overlapping section; false otherwise. |
| */ |
| public boolean crosses(Span s) { |
| int sstart = s.getStart(); |
| //either s's start is in this or this' start is in s |
| return !this.contains(s) && !s.contains(this) |
| && (getStart() <= sstart && sstart < getEnd() |
| || sstart <= getStart() && getStart() < s.getEnd()); |
| } |
| |
| /** |
| * Retrieves the string covered by the current span of the specified text. |
| * |
| * @param text |
| * |
| * @return the substring covered by the current span |
| */ |
| public CharSequence getCoveredText(CharSequence text) { |
| if (getEnd() > text.length()) { |
| throw new IllegalArgumentException("The span " + this |
| + " is outside the given text which has length " + text.length() + "!"); |
| } |
| |
| return text.subSequence(getStart(), getEnd()); |
| } |
| |
| /** |
| * Return a copy of this span with leading and trailing white spaces removed. |
| * |
| * @param text |
| * |
| * @return the trimmed span or the same object if already trimmed |
| */ |
| public Span trim(CharSequence text) { |
| |
| int newStartOffset = getStart(); |
| |
| for (int i = getStart(); i < getEnd() && StringUtil.isWhitespace(text.charAt(i)); i++) { |
| newStartOffset++; |
| } |
| |
| int newEndOffset = getEnd(); |
| for (int i = getEnd(); i > getStart() && StringUtil.isWhitespace(text.charAt(i - 1)); i--) { |
| newEndOffset--; |
| } |
| |
| if (newStartOffset == getStart() && newEndOffset == getEnd()) { |
| return this; |
| } else if (newStartOffset > newEndOffset) { |
| return new Span(getStart(), getStart(), getType()); |
| } else { |
| return new Span(newStartOffset, newEndOffset, getType()); |
| } |
| } |
| |
| /** |
| * Compares the specified span to the current span. |
| */ |
| public int compareTo(Span s) { |
| if (getStart() < s.getStart()) { |
| return -1; |
| } else if (getStart() == s.getStart()) { |
| if (getEnd() > s.getEnd()) { |
| return -1; |
| } else if (getEnd() < s.getEnd()) { |
| return 1; |
| } else { |
| // compare the type |
| if (getType() == null && s.getType() == null) { |
| return 0; |
| } else if (getType() != null && s.getType() != null) { |
| // use type lexicography order |
| return getType().compareTo(s.getType()); |
| } else if (getType() != null) { |
| return -1; |
| } |
| return 1; |
| } |
| } else { |
| return 1; |
| } |
| } |
| |
| /** |
| * Generates a hash code of the current span. |
| */ |
| @Override |
| public int hashCode() { |
| return Objects.hash(getStart(), getEnd(), getType()); |
| } |
| |
| /** |
| * Checks if the specified span is equal to the current span. |
| */ |
| @Override |
| public boolean equals(Object o) { |
| if (o == this) { |
| return true; |
| } |
| |
| if (o instanceof Span) { |
| Span s = (Span) o; |
| |
| return getStart() == s.getStart() && getEnd() == s.getEnd() |
| && Objects.equals(getType(), s.getType()); |
| } |
| |
| return false; |
| } |
| |
| /** |
| * Generates a human readable string. |
| */ |
| @Override |
| public String toString() { |
| StringBuilder toStringBuffer = new StringBuilder(15); |
| toStringBuffer.append("["); |
| toStringBuffer.append(getStart()); |
| toStringBuffer.append(".."); |
| toStringBuffer.append(getEnd()); |
| toStringBuffer.append(")"); |
| if (getType() != null) { |
| toStringBuffer.append(" "); |
| toStringBuffer.append(getType()); |
| } |
| |
| return toStringBuffer.toString(); |
| } |
| |
| /** |
| * Converts an array of {@link Span}s to an array of {@link String}s. |
| * |
| * @param spans |
| * @param s |
| * @return the strings |
| */ |
| public static String[] spansToStrings(Span[] spans, CharSequence s) { |
| String[] tokens = new String[spans.length]; |
| |
| for (int si = 0, sl = spans.length; si < sl; si++) { |
| tokens[si] = spans[si].getCoveredText(s).toString(); |
| } |
| |
| return tokens; |
| } |
| |
| public static String[] spansToStrings(Span[] spans, String[] tokens) { |
| String[] chunks = new String[spans.length]; |
| StringBuilder cb = new StringBuilder(); |
| for (int si = 0, sl = spans.length; si < sl; si++) { |
| cb.setLength(0); |
| for (int ti = spans[si].getStart(); ti < spans[si].getEnd(); ti++) { |
| cb.append(tokens[ti]).append(" "); |
| } |
| chunks[si] = cb.substring(0, cb.length() - 1); |
| } |
| return chunks; |
| } |
| |
| public double getProb() { |
| return prob; |
| } |
| |
| } |