enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/SectionImpl.java - stanbol - Git at Google

 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.stanbol.enhancer.nlp.model.impl;

 import java.util.ConcurrentModificationException;
 import java.util.Iterator;
 import java.util.NavigableMap;
 import java.util.NoSuchElementException;
 import java.util.Set;
 import java.util.SortedSet;

 import org.apache.commons.collections.IteratorUtils;
 import org.apache.commons.collections.Predicate;
 import org.apache.commons.collections.functors.InstanceofPredicate;
 import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
 import org.apache.stanbol.enhancer.nlp.model.Chunk;
 import org.apache.stanbol.enhancer.nlp.model.Section;
 import org.apache.stanbol.enhancer.nlp.model.Sentence;
 import org.apache.stanbol.enhancer.nlp.model.Span;
 import org.apache.stanbol.enhancer.nlp.model.Token;


 /**
  * A Span that contains other spans
  * @author Rupert Westenthaler
  *
  */
 public abstract class SectionImpl extends SpanImpl implements Section {

     /**
      * Allows to create a SectionImpl without setting the AnalysedText context.
      * {@link #setContext(AnalysedTextImpl)} needs to be called before using
      * this instance.<p>
      * NOTE: this constructor is needed to instantiate {@link AnalysedTextImpl}.
      * @param type the type. MUST NOT be <code>null</code> nor {@link SpanTypeEnum#Token}
      * @param start
      * @param end
      */
     public SectionImpl(SpanTypeEnum type,int start,int end) {
         super(type,start,end);
         assert type != SpanTypeEnum.Token : "The SpanType 'Token' is NOT a Section - can not cover other spans!";
     }
 //    public SectionImpl(AnalysedTextImpl at, SpanTypeEnum type,int start,int end) {
 //        this(at,type,null,start,end);
 //    }
     public SectionImpl(AnalysedTextImpl at, SpanTypeEnum type,Span relativeTo, int start,int end) {
         super(at,type,relativeTo,start,end);
         assert type != SpanTypeEnum.Token : "The SpanType 'Token' is NOT a Section - can not cover other spans!";
     }


     @Override
     @SuppressWarnings("unchecked")
     public Iterator<Span> getEnclosed(final Set<SpanTypeEnum> types) {
         return IteratorUtils.filteredIterator(getIterator(),
             new Predicate() {
                 @Override
                 public boolean evaluate(Object span) {
                     return types.contains(((Span)span).getType());
                 }
             });
     }
     /**
      * Iterator that does not throw {@link ConcurrentModificationException} but
      * considers modifications to the underlying set by using the
      * {@link NavigableMap#higherKey(Object)} method for iterating over the
      * Elements!<p>
      * This allows to add new {@link Span}s to the {@link Section} while
      * iterating (e.g. add {@link Token}s and/or {@link Chunk}s while iterating
      * over the {@link Sentence}s of an {@link AnalysedText})
      * @return the iterator
      */
     protected Iterator<Span> getIterator(){
         //the end of this section
         final Span end = new SubSetHelperSpan(getEnd());
         return new Iterator<Span>() {

             boolean init = false;
             boolean removed = true;
             private Span span = SectionImpl.this;

             @Override
             public boolean hasNext() {
                 return getNext() != null;
             }

             private Span getNext(){
                 Span next = context.spans.higherKey(span);
                 return next == null || next.compareTo(end) >= 0 ? null : next;
             }

             @Override
             public Span next() {
                 init = true;
                 span = getNext();
                 removed = false;
                 if(span == null){
                     throw new NoSuchElementException();
                 }
                 return span;
             }

             @Override
             public void remove() {
                 if(!init){
                     throw new IllegalStateException("remove can not be called before the first call to next");
                 }
                 if(removed){
                     throw new IllegalStateException("the current Span was already removed!");
                 }
                 context.spans.remove(span);
                 removed = true;
             }

         };
     }

     /**
      * Adds a Token <b>relative</b> to the current Span. Negative values for start and
      * end are allowed (e.g. to add a Token that starts some characters before
      * this one.<p>
      * Users that want to use <b>absolute</b> indexes need to use
      * <code><pre>
      *     Span span; //any type of Span (Token, Chunk, Sentence ...)
      *     span.getContext().addToken(absoluteStart, absoluteEnd)
      * </pre></code>
      * @param start the start relative to this Span
      * @param end the end relative to this span
      * @return the created and added token
      */
     public Token addToken(int start,int end){
         return register(new TokenImpl(context, this,start, end));
     }
     /**
      * Registers the parsed - newly created token - with the {@link #getContext()}.
      * If the parsed {@link Span} already exists (an other Span instance with the
      * same values for {@link Span#getType()}, {@link Span#getStart()} and
      * {@link Span#getEnd()}) than the already present instance is returned
      * instead of the parsed one. In case the parsed Token does not already
      * exist the parsed instance is registered with the context and
      * returned.<p>
      * Typical usage:<pre><code>
      *     public add{something}(int start, int end){
      *         return register(new {somthing}Impl(context, this,start,end));
      *     }
      * </code></pre>
      * {something} ... the Span type (Token, Chunk, Sentence ...)<p>
      * @param span the Span instance to register
      * @return the parsed or an already existing instance
      */
     protected <T extends Span> T register(T span){
         //check if this token already exists
         @SuppressWarnings("unchecked")
         T current = (T)context.spans.get(span);
         //NOTE: type safety is ensured by the SpanTypeEnum in combination with the
         //      Compareable implementation of SpanImpl.
         if(current == null){ //add the new one
             context.spans.put(span, span);
             return span;
         } else { //else return the already contained token
             return current;
         }
     }

     public Iterator<Token> getTokens(){
         return filter(Token.class);
     }
     /**
      * Internal helper to generate correctly generic typed {@link Iterator}s for
      * filtered {@link Span} types
      * @param interfaze the Span interface e.g. {@link Token}
      * @param clazz the actual Span implementation e.g. {@link TokenImpl}
      * @return the {@link Iterator} of type {interface} iterating over
      * {implementation} instances (e.g.
      * <code>{@link Iterator}&lt;{@link Token}&gt;</code> returning
      * <code>{@link TokenImpl}</code> instances on calls to {@link Iterator#next()}
      */
     @SuppressWarnings("unchecked")
     protected <T extends Span> Iterator<T> filter(
         final Class<T> clazz){
         return IteratorUtils.filteredIterator(
             getIterator(),
             new InstanceofPredicate(clazz));
     }

     /**
      * Internal helper class used for building {@link SortedSet#subSet(Object, Object)}.
      *
      * @author Rupert Westenthaler
      *
      */
     class SubSetHelperSpan extends SpanImpl implements Span{
         /**
          * Create the start constraint for {@link SortedSet#subSet(Object, Object)}
          * @param start
          * @param end
          */
         protected SubSetHelperSpan(int start,int end){
             super(SpanTypeEnum.Text, //lowest pos type
                 start,end);
             setContext(SectionImpl.this.context);
         }
         /**
          * Creates the end constraint for {@link SortedSet#subSet(Object, Object)}
          * @param pos
          */
         protected SubSetHelperSpan(int pos){
             super(SpanTypeEnum.Token, //highest pos type,
                 pos,Integer.MAX_VALUE);
             setContext(SectionImpl.this.context);
         }
     }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.stanbol.enhancer.nlp.model.impl;

	import java.util.ConcurrentModificationException;
	import java.util.Iterator;
	import java.util.NavigableMap;
	import java.util.NoSuchElementException;
	import java.util.Set;
	import java.util.SortedSet;

	import org.apache.commons.collections.IteratorUtils;
	import org.apache.commons.collections.Predicate;
	import org.apache.commons.collections.functors.InstanceofPredicate;
	import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
	import org.apache.stanbol.enhancer.nlp.model.Chunk;
	import org.apache.stanbol.enhancer.nlp.model.Section;
	import org.apache.stanbol.enhancer.nlp.model.Sentence;
	import org.apache.stanbol.enhancer.nlp.model.Span;
	import org.apache.stanbol.enhancer.nlp.model.Token;


	/**
	* A Span that contains other spans
	* @author Rupert Westenthaler
	*
	*/
	public abstract class SectionImpl extends SpanImpl implements Section {

	/**
	* Allows to create a SectionImpl without setting the AnalysedText context.
	* {@link #setContext(AnalysedTextImpl)} needs to be called before using
	* this instance.<p>
	* NOTE: this constructor is needed to instantiate {@link AnalysedTextImpl}.
	* @param type the type. MUST NOT be <code>null</code> nor {@link SpanTypeEnum#Token}
	* @param start
	* @param end
	*/
	public SectionImpl(SpanTypeEnum type,int start,int end) {
	super(type,start,end);
	assert type != SpanTypeEnum.Token : "The SpanType 'Token' is NOT a Section - can not cover other spans!";
	}
	// public SectionImpl(AnalysedTextImpl at, SpanTypeEnum type,int start,int end) {
	// this(at,type,null,start,end);
	// }
	public SectionImpl(AnalysedTextImpl at, SpanTypeEnum type,Span relativeTo, int start,int end) {
	super(at,type,relativeTo,start,end);
	assert type != SpanTypeEnum.Token : "The SpanType 'Token' is NOT a Section - can not cover other spans!";
	}


	@Override
	@SuppressWarnings("unchecked")
	public Iterator<Span> getEnclosed(final Set<SpanTypeEnum> types) {
	return IteratorUtils.filteredIterator(getIterator(),
	new Predicate() {
	@Override
	public boolean evaluate(Object span) {
	return types.contains(((Span)span).getType());
	}
	});
	}
	/**
	* Iterator that does not throw {@link ConcurrentModificationException} but
	* considers modifications to the underlying set by using the
	* {@link NavigableMap#higherKey(Object)} method for iterating over the
	* Elements!<p>
	* This allows to add new {@link Span}s to the {@link Section} while
	* iterating (e.g. add {@link Token}s and/or {@link Chunk}s while iterating
	* over the {@link Sentence}s of an {@link AnalysedText})
	* @return the iterator
	*/
	protected Iterator<Span> getIterator(){
	//the end of this section
	final Span end = new SubSetHelperSpan(getEnd());
	return new Iterator<Span>() {

	boolean init = false;
	boolean removed = true;
	private Span span = SectionImpl.this;

	@Override
	public boolean hasNext() {
	return getNext() != null;
	}

	private Span getNext(){
	Span next = context.spans.higherKey(span);
	return next == null \|\| next.compareTo(end) >= 0 ? null : next;
	}

	@Override
	public Span next() {
	init = true;
	span = getNext();
	removed = false;
	if(span == null){
	throw new NoSuchElementException();
	}
	return span;
	}

	@Override
	public void remove() {
	if(!init){
	throw new IllegalStateException("remove can not be called before the first call to next");
	}
	if(removed){
	throw new IllegalStateException("the current Span was already removed!");
	}
	context.spans.remove(span);
	removed = true;
	}

	};
	}

	/**
	* Adds a Token <b>relative</b> to the current Span. Negative values for start and
	* end are allowed (e.g. to add a Token that starts some characters before
	* this one.<p>
	* Users that want to use <b>absolute</b> indexes need to use
	* <code><pre>
	* Span span; //any type of Span (Token, Chunk, Sentence ...)
	* span.getContext().addToken(absoluteStart, absoluteEnd)
	* </pre></code>
	* @param start the start relative to this Span
	* @param end the end relative to this span
	* @return the created and added token
	*/
	public Token addToken(int start,int end){
	return register(new TokenImpl(context, this,start, end));
	}
	/**
	* Registers the parsed - newly created token - with the {@link #getContext()}.
	* If the parsed {@link Span} already exists (an other Span instance with the
	* same values for {@link Span#getType()}, {@link Span#getStart()} and
	* {@link Span#getEnd()}) than the already present instance is returned
	* instead of the parsed one. In case the parsed Token does not already
	* exist the parsed instance is registered with the context and
	* returned.<p>
	* Typical usage:<pre><code>
	* public add{something}(int start, int end){
	* return register(new {somthing}Impl(context, this,start,end));
	* }
	* </code></pre>
	* {something} ... the Span type (Token, Chunk, Sentence ...)<p>
	* @param span the Span instance to register
	* @return the parsed or an already existing instance
	*/
	protected <T extends Span> T register(T span){
	//check if this token already exists
	@SuppressWarnings("unchecked")
	T current = (T)context.spans.get(span);
	//NOTE: type safety is ensured by the SpanTypeEnum in combination with the
	// Compareable implementation of SpanImpl.
	if(current == null){ //add the new one
	context.spans.put(span, span);
	return span;
	} else { //else return the already contained token
	return current;
	}
	}

	public Iterator<Token> getTokens(){
	return filter(Token.class);
	}
	/**
	* Internal helper to generate correctly generic typed {@link Iterator}s for
	* filtered {@link Span} types
	* @param interfaze the Span interface e.g. {@link Token}
	* @param clazz the actual Span implementation e.g. {@link TokenImpl}
	* @return the {@link Iterator} of type {interface} iterating over
	* {implementation} instances (e.g.
	* <code>{@link Iterator}<{@link Token}></code> returning
	* <code>{@link TokenImpl}</code> instances on calls to {@link Iterator#next()}
	*/
	@SuppressWarnings("unchecked")
	protected <T extends Span> Iterator<T> filter(
	final Class<T> clazz){
	return IteratorUtils.filteredIterator(
	getIterator(),
	new InstanceofPredicate(clazz));
	}

	/**
	* Internal helper class used for building {@link SortedSet#subSet(Object, Object)}.
	*
	* @author Rupert Westenthaler
	*
	*/
	class SubSetHelperSpan extends SpanImpl implements Span{
	/**
	* Create the start constraint for {@link SortedSet#subSet(Object, Object)}
	* @param start
	* @param end
	*/
	protected SubSetHelperSpan(int start,int end){
	super(SpanTypeEnum.Text, //lowest pos type
	start,end);
	setContext(SectionImpl.this.context);
	}
	/**
	* Creates the end constraint for {@link SortedSet#subSet(Object, Object)}
	* @param pos
	*/
	protected SubSetHelperSpan(int pos){
	super(SpanTypeEnum.Token, //highest pos type,
	pos,Integer.MAX_VALUE);
	setContext(SectionImpl.this.context);
	}
	}

	}