blob: 170ba0f730af1df4950bfb55329b26462eb41f83 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.clinicalpipeline.ae;
import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.util.annotation.OntologyConceptUtil;
import org.apache.ctakes.core.util.annotation.WordTokenUtil;
import org.apache.ctakes.typesystem.type.refsem.OntologyConcept;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.WordToken;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.ctakes.typesystem.type.util.Pair;
import org.apache.ctakes.typesystem.type.util.Pairs;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.jcas.cas.TOP;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import java.util.Collection;
import java.util.Iterator;
import java.util.Map;
/**
* UIMA annotator that prepares the CAS for output - performs
* some (final) updates to the CAS
*
* @author Mayo Clinic
*/
@PipeBitInfo(
name = "Extraction Prepper",
description = "Assigns IDs and Canonical text to Identified Annotations.",
dependencies = { PipeBitInfo.TypeProduct.SECTION, PipeBitInfo.TypeProduct.IDENTIFIED_ANNOTATION }
)
public class ExtractionPrepAnnotator extends JCasAnnotator_ImplBase {
private String iv_annotVerPropKey;
private int iv_annotVer;
/**
* Method invoked by UIMA framework to initialize this annotator
*/
public void initialize( UimaContext aCtx )
throws ResourceInitializationException {
super.initialize( aCtx );
try {
iv_annotVer = ((Integer)aCtx.getConfigParameterValue( "AnnotationVersion" )).intValue();
iv_annotVerPropKey = (String)aCtx.getConfigParameterValue( "AnnotationVersionPropKey" );
} catch ( Exception e ) {
throw new ResourceInitializationException( e );
}
}
/**
* Method invoked by UIMA framework to process a document
*/
public void process( JCas jcas )
throws AnalysisEngineProcessException {
generateUidValues( jcas );
generateTokenNormForms( jcas );
assignNamedEntityFeats( jcas );
storeAnnotationVersion( jcas );
}
/**
* Stores annotation version as a property JCas object.
*
* @param jcas
*/
private void storeAnnotationVersion( JCas jcas ) {
FSIterator<TOP> itr = jcas.getJFSIndexRepository().getAllIndexedFS( Pairs.type );
if ( itr == null || !itr.hasNext() ) {
return;
}
Pairs props = (Pairs)itr.next();
// create a new property array that is one item bigger
FSArray propArr = props.getPairs();
FSArray newPropArr = new FSArray( jcas, propArr.size() + 1 );
for ( int i = 0; i < propArr.size(); i++ ) {
newPropArr.set( i, propArr.get( i ) );
}
Pair annotVerProp = new Pair( jcas );
annotVerProp.setAttribute( iv_annotVerPropKey );
annotVerProp.setValue( String.valueOf( iv_annotVer ) );
// add annotation version prop as last item in array
newPropArr.set( newPropArr.size() - 1, annotVerProp );
props.setPairs( newPropArr );
}
/**
* Generates UID values for all IdentifiedAnnotation objects.
* This is just a numeric identifier, assigned sequentially.
*/
private void generateUidValues( JCas jcas ) {
int uid = 0;
Iterator itr = jcas.getJFSIndexRepository().getAnnotationIndex(
IdentifiedAnnotation.type ).iterator();
while ( itr.hasNext() ) {
IdentifiedAnnotation idAnnot = (IdentifiedAnnotation)itr.next();
idAnnot.setId( uid );
uid++;
}
}
/**
* Generates normalized form for each token annotation.
* Considers whether it is a <code>WordToken</code> with a canonical form
*/
private void generateTokenNormForms( final JCas jcas ) {
final JFSIndexRepository indexes = jcas.getJFSIndexRepository();
// Determine and set the normalized form for each <code>BaseToken</code>
for ( Annotation annotation : indexes.getAnnotationIndex( BaseToken.type ) ) {
if ( annotation instanceof WordToken ) {
((WordToken)annotation).setNormalizedForm( WordTokenUtil.getCanonicalForm( (WordToken)annotation ) );
}
}
}
/**
* Assigns OID and segmentID values to NamedEntities
*/
private void assignNamedEntityFeats( JCas jcas ) {
final Map<Segment,Collection<IdentifiedAnnotation>> sectionAnnotationsMap
= JCasUtil.indexCovered( jcas, Segment.class, IdentifiedAnnotation.class );
for ( Map.Entry<Segment,Collection<IdentifiedAnnotation>> sectionAnnotations
: sectionAnnotationsMap.entrySet() ) {
final String segmentId = sectionAnnotations.getKey().getId();
for ( IdentifiedAnnotation annotation : sectionAnnotations.getValue() ) {
annotation.setSegmentID( segmentId );
for ( OntologyConcept concept : OntologyConceptUtil.getOntologyConcepts( annotation ) ) {
concept.setOid( concept.getCode() + '#' + concept.getCodingScheme() );
}
}
}
}
}