blob: 18744932e85db47f407b62cb3d1f229e4a901b88 [file] [log] [blame]
package org.apache.ctakes.dictionary.cased.annotation;
import org.apache.ctakes.core.util.Pair;
import org.apache.ctakes.core.util.annotation.SemanticGroup;
import org.apache.ctakes.core.util.annotation.SemanticTui;
import org.apache.ctakes.dictionary.cased.encoder.CodeSchema;
import org.apache.ctakes.dictionary.cased.encoder.TermEncoding;
import org.apache.ctakes.dictionary.cased.lookup.DiscoveredTerm;
import org.apache.ctakes.dictionary.cased.util.textspan.ContiguousTextSpan;
import org.apache.ctakes.dictionary.cased.util.textspan.MagicTextSpan;
import org.apache.ctakes.dictionary.lookup2.util.CuiCodeUtil;
import org.apache.ctakes.dictionary.lookup2.util.TuiCodeUtil;
import org.apache.ctakes.typesystem.type.constants.CONST;
import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.log4j.Logger;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;
import javax.annotation.concurrent.Immutable;
import java.util.*;
import java.util.function.Predicate;
import java.util.stream.Collectors;
/**
* @author SPF , chip-nlp
* @version %I%
* @since 8/26/2020
*/
@Immutable
final public class AnnotationCreatorUtil {
static private final Logger LOGGER = Logger.getLogger( "AnnotationCreatorUtil" );
static private final TermEncoding NULL_CODE = new TermEncoding( "", "" );
private AnnotationCreatorUtil() {
}
static public Map<DiscoveredTerm, Collection<MagicTextSpan>> mapTermSpans(
final Map<Pair<Integer>, Collection<DiscoveredTerm>> allDiscoveredTermsMap ) {
final Map<DiscoveredTerm, Collection<MagicTextSpan>> termSpanMap = new HashMap<>();
for ( Map.Entry<Pair<Integer>, Collection<DiscoveredTerm>> spanTerms : allDiscoveredTermsMap.entrySet() ) {
final MagicTextSpan textSpan = new ContiguousTextSpan( spanTerms.getKey() );
spanTerms.getValue().forEach( t -> termSpanMap.computeIfAbsent( t, s -> new HashSet<>() ).add( textSpan ) );
}
return termSpanMap;
}
static public void createAnnotations( final JCas jcas,
final Pair<Integer> textSpan,
final Collection<DiscoveredTerm> discoveredTerms,
final Map<DiscoveredTerm, Collection<TermEncoding>> termEncodingMap,
final Map<SemanticTui, SemanticGroup> reassignSemantics ) {
discoveredTerms.forEach( t
-> createAnnotation( jcas, textSpan, t, termEncodingMap.get( t ), reassignSemantics ) );
}
static private void createAnnotation( final JCas jcas,
final Pair<Integer> textSpan,
final DiscoveredTerm discoveredTerm,
final Collection<TermEncoding> termEncodings,
final Map<SemanticTui, SemanticGroup> reassignSemantics ) {
final SemanticGroup bestGroup
= SemanticGroup.getBestGroup( getSemanticGroups( termEncodings, reassignSemantics ) );
final IdentifiedAnnotation annotation = bestGroup
.getCreator()
.apply( jcas );
annotation.setTypeID( bestGroup.getCode() );
annotation.setBegin( textSpan.getValue1() );
annotation.setEnd( textSpan.getValue2() );
annotation.setDiscoveryTechnique( CONST.NE_DISCOVERY_TECH_DICT_LOOKUP );
final String cui = CuiCodeUtil.getInstance().getAsCui( discoveredTerm.getCuiCode() );
Collection<String> tuis = getTuis( termEncodings );
if ( tuis.isEmpty() ) {
tuis = Collections.singletonList( SemanticTui.UNKNOWN.name() );
}
final String prefText = getPreferredText( termEncodings );
final Collection<UmlsConcept> umlsConcepts = new HashSet<>();
for ( String tui : tuis ) {
termEncodings.stream()
.filter( isPrefTextEncoding.negate() )
.filter( isTuiEncoding.negate() )
.map( e -> createUmlsConcept( jcas, cui, tui, prefText, e ) )
.forEach( umlsConcepts::add );
}
if ( umlsConcepts.isEmpty() ) {
// There were no special (e.g. snomedCT) encodings.
tuis.forEach( t -> umlsConcepts.add( createUmlsConcept( jcas, cui, t, prefText ) ) );
}
final FSArray conceptArr = new FSArray( jcas, umlsConcepts.size() );
int arrIdx = 0;
for ( UmlsConcept umlsConcept : umlsConcepts ) {
conceptArr.set( arrIdx, umlsConcept );
arrIdx++;
}
annotation.setOntologyConceptArr( conceptArr );
annotation.addToIndexes();
}
static private String getPreferredText( final Collection<TermEncoding> termEncodings ) {
return termEncodings.stream()
.filter( CodeSchema.PREFERRED_TEXT::isSchema )
.map( TermEncoding::getSchemaCode )
.map( Object::toString )
.distinct()
.collect( Collectors.joining( ";" ) );
}
static private final Predicate<TermEncoding> isPrefTextEncoding
= CodeSchema.PREFERRED_TEXT::isSchema;
static private String getTui( final Collection<TermEncoding> termEncodings ) {
return termEncodings.stream()
.filter( CodeSchema.TUI::isSchema )
.map( TermEncoding::getSchemaCode )
.map( AnnotationCreatorUtil::parseTuiValue )
.map( TuiCodeUtil::getAsTui )
.distinct()
.collect( Collectors.joining( ";" ) );
}
static private Collection<String> getTuis( final Collection<TermEncoding> termEncodings ) {
return termEncodings.stream()
.filter( CodeSchema.TUI::isSchema )
.map( TermEncoding::getSchemaCode )
.map( AnnotationCreatorUtil::parseTuiValue )
.map( TuiCodeUtil::getAsTui )
.collect( Collectors.toSet() );
}
static private final Predicate<TermEncoding> isTuiEncoding = CodeSchema.TUI::isSchema;
static private UmlsConcept createUmlsConcept( final JCas jcas,
final String cui,
final String tui,
final String preferredText ) {
return createUmlsConcept( jcas, cui, tui, preferredText, NULL_CODE );
}
static private UmlsConcept createUmlsConcept( final JCas jcas,
final String cui,
final String tui,
final String preferredText,
final TermEncoding termEncoding ) {
final UmlsConcept umlsConcept = new UmlsConcept( jcas );
umlsConcept.setCui( cui );
if ( tui != null ) {
umlsConcept.setTui( tui );
}
if ( preferredText != null && !preferredText.isEmpty() ) {
umlsConcept.setPreferredText( preferredText );
}
umlsConcept.setCodingScheme( termEncoding.getSchema() );
umlsConcept.setCode( termEncoding.getSchemaCode()
.toString() );
return umlsConcept;
}
static public Map<SemanticGroup, Collection<DiscoveredTerm>> mapSemanticTerms(
final Map<DiscoveredTerm, Collection<TermEncoding>> termEncodingMap,
final Map<SemanticTui, SemanticGroup> reassignSemantics ) {
final Map<SemanticGroup, Collection<DiscoveredTerm>> semanticTermMap = new EnumMap<>( SemanticGroup.class );
for ( Map.Entry<DiscoveredTerm, Collection<TermEncoding>> discoveredEncodings : termEncodingMap.entrySet() ) {
getSemanticGroups( discoveredEncodings.getValue(), reassignSemantics )
.forEach( g -> semanticTermMap.computeIfAbsent( g, s -> new HashSet<>() )
.add( discoveredEncodings.getKey() ) );
}
return semanticTermMap;
}
static private Collection<SemanticGroup> getSemanticGroups(
final Collection<TermEncoding> termEncodings,
final Map<SemanticTui, SemanticGroup> reassignSemantics ) {
final Collection<SemanticGroup> groups = termEncodings.stream()
.filter( CodeSchema.TUI::isSchema )
.map( e -> getSemanticGroup( e, reassignSemantics ) )
.collect( Collectors.toSet() );
if ( groups.isEmpty() ) {
return Collections.singletonList( SemanticGroup.UNKNOWN );
}
return groups;
}
static private SemanticGroup getSemanticGroup( final TermEncoding tuiEncoding,
final Map<SemanticTui, SemanticGroup> reassignSemantics ) {
final Object object = tuiEncoding.getSchemaCode();
if ( object instanceof Integer ) {
return getSemanticGroup( (Integer)object, reassignSemantics );
}
return getSemanticGroup( parseTuiValue( object ), reassignSemantics );
}
static private SemanticGroup getSemanticGroup( final int tuiCode,
final Map<SemanticTui, SemanticGroup> reassignSemantics ) {
final SemanticTui tui = SemanticTui.getTui( tuiCode );
if ( !reassignSemantics.isEmpty() ) {
final SemanticGroup reassignGroup = reassignSemantics.get( tui );
if ( reassignGroup != null ) {
return reassignGroup;
}
}
return tui.getGroup();
}
static private int parseTuiValue( final Object object ) {
try {
return Integer.parseInt( object.toString() );
} catch ( NumberFormatException nfE ) {
return SemanticTui.UNKNOWN.getCode();
}
}
// static private Map<DiscoveredTerm, Collection<SemanticGroup>> mapTermSemantics(
// final Map<DiscoveredTerm, Collection<TermEncoding>> termEncodingMap,
// final Map<SemanticTui,SemanticGroup> reassignSemantics ) {
// final Map<DiscoveredTerm, Collection<SemanticGroup>> termSemanticsMap = new HashMap<>( termEncodingMap.size() );
// termEncodingMap.forEach( (k,v) -> termSemanticsMap.put( k, getSemanticGroups( v, reassignSemantics) ) );
// return termSemanticsMap;
// }
// static private Map<TermEncoding,SemanticGroup> mapEncodingSemantics( final Collection<TermEncoding> termEncodings,
// final Map<SemanticTui,SemanticGroup> reassignSemantics ) {
// return termEncodings.stream()
// .collect( Collectors.toMap( Function.identity(),
// e -> getSemanticGroup( e, reassignSemantics ) ) );
// }
}