blob: e5268d5473a9380b9893b9e00adafc744cb689eb [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.nlp.model.impl;
import java.lang.ref.SoftReference;
import java.util.Arrays;
import org.apache.stanbol.enhancer.nlp.model.Span;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A span selected in the given Text. This uses {@link SoftReference}s for
* holding the {@link #getSpan()} text to allow the Garbage Collector to
* free up memory for large texts. In addition the span text is lazzy initialised
* on the first call to {@link #getSpan()}.
*
* @author Rupert Westenthaler
*
*/
public abstract class SpanImpl extends AnnotatedImpl implements Span{
private final static Logger log = LoggerFactory.getLogger(SpanImpl.class);
protected final int[] span;
/**
* Lazzy initialised {@link SoftReference} to the text
*/
private SoftReference<String> textReference = null;
protected AnalysedTextImpl context;
protected final SpanTypeEnum type;
/**
* Allows to create a SpanImpl without the {@link #getContext()}. The
* context MUST BE set by using {@link #setContext(AnalysedTextImpl)} before
* using this span.
* @param type
* @param start
* @param end
*/
protected SpanImpl(SpanTypeEnum type, int start,int end) {
assert type != null : "The parsed SpanType MUST NOT be NULL!";
if(start < 0 || end < start){
throw new IllegalArgumentException("Illegal span ["+start+','+end+']');
}
this.type = type;
this.span = new int[]{start,end};
}
// protected SpanImpl(AnalysedTextImpl analysedText, SpanTypeEnum type, int start,int end) {
// this(analysedText,type,null,start,end);
// }
protected SpanImpl(AnalysedTextImpl analysedText, SpanTypeEnum type, Span relativeTo,int start,int end) {
this(type,
relativeTo == null ? start : relativeTo.getStart()+start,
relativeTo == null ? end : relativeTo.getStart()+end);
setContext(analysedText);
//check that Spans that are created relative to an other do not cross
//the borders of that span
if(relativeTo != null && relativeTo.getEnd() < getEnd()){
throw new IllegalArgumentException("Illegal span ["+start+','+end
+ "] for "+type+" relative to "+relativeTo+" : Span of the "
+ " contained Token MUST NOT extend the others!");
}
}
protected void setContext(AnalysedTextImpl analysedText){
assert analysedText != null : "The parsed AnalysedText MUST NOT be NULL!";
this.context = analysedText;
}
/* (non-Javadoc)
* @see org.apache.stanbol.enhancer.nlp.model.impl.Span#getType()
*/
@Override
public SpanTypeEnum getType(){
return type;
}
/* (non-Javadoc)
* @see org.apache.stanbol.enhancer.nlp.model.impl.Span#getStart()
*/
@Override
public int getStart(){
return span[0];
}
/* (non-Javadoc)
* @see org.apache.stanbol.enhancer.nlp.model.impl.Span#getEnd()
*/
@Override
public int getEnd(){
return span[1];
}
/* (non-Javadoc)
* @see org.apache.stanbol.enhancer.nlp.model.impl.Span#getText()
*/
@Override
public final AnalysedTextImpl getContext() {
return context;
}
/* (non-Javadoc)
* @see org.apache.stanbol.enhancer.nlp.model.impl.Span#getSpan()
*/
@Override
public String getSpan(){
String spanText = textReference == null ? null : textReference.get();
if(spanText == null){
spanText = getContext().getText().subSequence(span[0], span[1]).toString();
textReference = new SoftReference<String>(spanText);
}
return spanText;
}
@Override
public int hashCode() {
//include the SpanTypeEnum in the hash
return Arrays.hashCode(span);
}
@Override
public boolean equals(Object obj) {
return obj instanceof SpanImpl && getType() == ((Span)obj).getType() &&
Arrays.equals(this.span, ((SpanImpl)obj).span);
}
@Override
public String toString() {
return String.format("%s: %s",type ,Arrays.toString(span));
}
@Override
public int compareTo(Span o) {
if(context != null && o.getContext() != null &&
!context.equals(o.getContext())){
log.warn("Comparing Spans with different Context. This is not an " +
"intended usage of this class as start|end|type parameters " +
"do not have a natural oder over different texts.");
log.info("This will sort Spans based on start|end|type parameters "+
"regardless that the might be over different texts!");
//TODO consider throwing an IllegalStateExcetion!
}
//Compare Integers ASC (used here three times)
// (x < y) ? -1 : ((x == y) ? 0 : 1);
int start = (span[0] < o.getStart()) ? -1 : ((span[0] == o.getStart()) ? 0 : 1);
if(start == 0){
//sort end in DESC order
int end = (span[1] < o.getEnd()) ? 1 : ((span[1] == o.getEnd()) ? 0 : -1);
//if start AND end is the same compare based on the span type
//Natural order of span types is defined by the Enum.ordinal()
int o1 = getType().ordinal();
int o2 = o.getType().ordinal();
return end != 0 ? end :
(o1 < o2) ? -1 : ((o1 == o2) ? 0 : 1);
} else {
return start;
}
}
}