| package org.apache.ctakes.temporal.cc; |
| |
| import org.apache.ctakes.core.cc.AbstractJCasFileWriter; |
| import org.apache.ctakes.core.util.annotation.IdentifiedAnnotationUtil; |
| import org.apache.ctakes.typesystem.type.refsem.Event; |
| import org.apache.ctakes.typesystem.type.refsem.EventProperties; |
| import org.apache.ctakes.typesystem.type.textsem.EventMention; |
| import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation; |
| import org.apache.ctakes.typesystem.type.textsem.TimeMention; |
| import org.apache.uima.fit.descriptor.ConfigurationParameter; |
| import org.apache.uima.fit.util.JCasUtil; |
| import org.apache.uima.jcas.JCas; |
| import org.w3c.dom.Document; |
| import org.w3c.dom.Element; |
| |
| import javax.xml.parsers.DocumentBuilder; |
| import javax.xml.parsers.DocumentBuilderFactory; |
| import javax.xml.parsers.ParserConfigurationException; |
| import javax.xml.transform.*; |
| import javax.xml.transform.dom.DOMSource; |
| import javax.xml.transform.stream.StreamResult; |
| import java.io.*; |
| import java.text.SimpleDateFormat; |
| import java.util.*; |
| |
| /** |
| * @author SPF , chip-nlp |
| * @since {3/2/2023} |
| */ |
| final public class EventTimeAnaforaWriter extends AbstractJCasFileWriter { |
| |
| /** |
| * Sometimes you want a file extension that specifies specifics about the corpus, creator and phase. |
| * e.g. ".UmlsDeepPhe.dave.completed.xml" |
| */ |
| static public final String PARAM_FILE_EXTENSION = "FileExtension"; |
| @ConfigurationParameter( |
| name = PARAM_FILE_EXTENSION, |
| description = "The extension for the written files. Default is .EventTime.ctakes.completed.xml", |
| defaultValue = ".EventTime.ctakes.completed.xml", |
| mandatory = false |
| ) |
| private String _fileExtension; |
| |
| /** |
| * Sometimes you want a file extension that specifies specifics about the corpus, creator and phase. |
| * e.g. ".UmlsDeepPhe.dave.completed.xml" |
| */ |
| static public final String PARAM_ONLY_TIME_EVENTS = "OnlyTemporalEvents"; |
| @ConfigurationParameter( |
| name = PARAM_ONLY_TIME_EVENTS, |
| description = "Only use temporal events, not those created by dictionary lookup. Default is yes.", |
| defaultValue = "yes", |
| mandatory = false |
| ) |
| private String _onlyTemporalEvents; |
| |
| |
| static private final String SAVE_TIME_PATTERN = "yyyy-MMdd-HH:mm"; |
| static private final SimpleDateFormat SAVE_TIME_FORMAT = new SimpleDateFormat( SAVE_TIME_PATTERN); |
| |
| |
| private boolean onlyTemporalEvents() { |
| return _onlyTemporalEvents.equalsIgnoreCase( "yes" ) |
| || _onlyTemporalEvents.equalsIgnoreCase( "true" ); |
| } |
| |
| /** |
| * Writes some document metadata and discovered event information. |
| */ |
| @Override |
| public void writeFile( final JCas jCas, |
| final String outputDir, |
| final String documentId, |
| final String fileName ) throws IOException { |
| final File file = new File( outputDir, fileName + _fileExtension ); |
| try { |
| final DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); |
| final DocumentBuilder docBuilder = docFactory.newDocumentBuilder(); |
| final Document doc = docBuilder.newDocument(); |
| |
| final Element rootElement = doc.createElement( "data" ); |
| rootElement.appendChild( createInfoElement( doc ) ); |
| rootElement.appendChild( createSchemaElement( doc ) ); |
| rootElement.appendChild( createAnnotationsElement( jCas, documentId, doc ) ); |
| doc.appendChild( rootElement ); |
| |
| // boilerplate xml-writing code: |
| final TransformerFactory transformerFactory = TransformerFactory.newInstance(); |
| final Transformer transformer = transformerFactory.newTransformer(); |
| transformer.setOutputProperty( OutputKeys.INDENT, "yes" ); |
| transformer.setOutputProperty( OutputKeys.METHOD, "xml" ); |
| final DOMSource source = new DOMSource( doc ); |
| final StreamResult result = new StreamResult( file ); |
| transformer.transform( source, result ); |
| } catch ( ParserConfigurationException | TransformerException multE ) { |
| throw new IOException( multE ); |
| } |
| } |
| |
| |
| static private Element createInfoElement( final Document doc ) { |
| final Element info = doc.createElement( "info" ); |
| final Element saveTime = doc.createElement( "savetime" ); |
| final String saveTimeText = SAVE_TIME_FORMAT.format( new Date() ); |
| saveTime.setTextContent( saveTimeText ); |
| final Element progress = doc.createElement( "progress" ); |
| progress.setTextContent( "completed" ); |
| info.appendChild( saveTime ); |
| info.appendChild( progress ); |
| return info; |
| } |
| |
| static private Element createSchemaElement( final Document doc ) { |
| final Element schema = doc.createElement( "schema" ); |
| schema.setAttribute( "path", "./" ); |
| schema.setAttribute( "protocol", "file" ); |
| schema.setTextContent( "temporal-schema.xml" ); |
| return schema; |
| } |
| |
| private Element createAnnotationsElement( final JCas jCas, |
| final String documentId, |
| final Document doc ) { |
| final Element annotations = doc.createElement( "annotations" ); |
| int nextIdNumber = addEventElements( jCas, documentId, 1, annotations, doc ); |
| nextIdNumber = addTimeElements( jCas, documentId, nextIdNumber, annotations, doc ); |
| return annotations; |
| } |
| |
| private int addEventElements( final JCas jCas, |
| final String documentId, |
| final int startId, |
| final Element annotations, |
| final Document doc ) { |
| final List<EventMention> eventMentions = new ArrayList<>( JCasUtil.select( jCas, EventMention.class ) ); |
| eventMentions.sort( Comparator.comparingInt( EventMention::getBegin ) |
| .thenComparingInt( EventMention::getEnd ) ); |
| final boolean onlyTemporalEvents = onlyTemporalEvents(); |
| int idNumber = startId; |
| for ( EventMention eventMention : eventMentions ) { |
| // this ensures we are only looking at THYME events and not ctakes-dictionary-lookup events |
| if ( onlyTemporalEvents && !eventMention.getClass().equals( EventMention.class ) ) { |
| continue; |
| } |
| annotations.appendChild( createEventElement( eventMention, documentId, idNumber, doc ) ); |
| idNumber++; |
| } |
| return idNumber + 1; |
| } |
| |
| static private Element createEventElement( final EventMention eventMention, |
| final String documentId, |
| final int idNumber, |
| final Document doc ) { |
| |
| final Element event = createBaseElement( eventMention, "EVENT", documentId, idNumber, doc ); |
| event.appendChild( createEventPropertiesElement( eventMention, doc ) ); |
| return event; |
| } |
| |
| static private Element createEventPropertiesElement( final EventMention eventMention, |
| final Document doc ) { |
| final Event event = eventMention.getEvent(); |
| if ( event == null ) { |
| return createNullEventProperties( IdentifiedAnnotationUtil.isNegated( eventMention ) , doc ); |
| } |
| final Element properties = doc.createElement( "properties" ); |
| final EventProperties eventProperties = event.getProperties(); |
| Element docTimeRel = doc.createElement( "DocTimeRel" ); |
| final String dtrContent = eventProperties.getDocTimeRel(); |
| docTimeRel.setTextContent( dtrContent ); |
| final Element eventType = doc.createElement( "Type" ); |
| eventType.setTextContent( "N/A" ); |
| final Element degree = doc.createElement( "Degree" ); |
| degree.setTextContent( "N/A" ); |
| final Element polarity = doc.createElement( "Polarity" ); |
| final String polarityValue = IdentifiedAnnotationUtil.isNegated( eventMention ) ? "NEG" : "POS"; |
| polarity.setTextContent( polarityValue ); |
| final Element contextMode = doc.createElement( "ContextualModality" ); |
| contextMode.setTextContent( eventProperties.getContextualModality() ); |
| final Element contextAspect = doc.createElement( "ContextualAspect" ); |
| contextAspect.setTextContent( eventProperties.getContextualAspect() ); |
| final Element Permanence = doc.createElement( "Permanence" ); |
| Permanence.setTextContent( "UNDETERMINED" ); |
| properties.appendChild( docTimeRel ); |
| properties.appendChild( polarity ); |
| properties.appendChild( degree ); |
| properties.appendChild( eventType ); |
| properties.appendChild( contextMode ); |
| properties.appendChild( contextAspect ); |
| properties.appendChild( Permanence ); |
| return properties; |
| } |
| |
| static private Element createNullEventProperties( final boolean isNegated, final Document doc ) { |
| final Element properties = doc.createElement( "properties" ); |
| Element docTimeRel = doc.createElement( "DocTimeRel" ); |
| docTimeRel.setTextContent( "Overlap" ); |
| final Element eventType = doc.createElement( "Type" ); |
| eventType.setTextContent( "N/A" ); |
| final Element degree = doc.createElement( "Degree" ); |
| degree.setTextContent( "N/A" ); |
| final Element polarity = doc.createElement( "Polarity" ); |
| final String polarityValue = isNegated ? "NEG" : "POS"; |
| polarity.setTextContent( polarityValue ); |
| final Element contextMode = doc.createElement( "ContextualModality" ); |
| contextMode.setTextContent( "UNDETERMINED" ); |
| final Element contextAspect = doc.createElement( "ContextualAspect" ); |
| contextAspect.setTextContent( "UNDETERMINED" ); |
| final Element Permanence = doc.createElement( "Permanence" ); |
| Permanence.setTextContent( "UNDETERMINED" ); |
| properties.appendChild( docTimeRel ); |
| properties.appendChild( polarity ); |
| properties.appendChild( degree ); |
| properties.appendChild( eventType ); |
| properties.appendChild( contextMode ); |
| properties.appendChild( contextAspect ); |
| properties.appendChild( Permanence ); |
| return properties; |
| } |
| |
| |
| private int addTimeElements( final JCas jCas, |
| final String documentId, |
| final int startId, |
| final Element annotations, |
| final Document doc ) { |
| final List<TimeMention> timeMentions = new ArrayList<>( JCasUtil.select( jCas, TimeMention.class ) ); |
| timeMentions.sort( Comparator.comparingInt( TimeMention::getBegin ) |
| .thenComparingInt( TimeMention::getEnd ) ); |
| int idNumber = startId; |
| for ( TimeMention timeMention : timeMentions ) { |
| annotations.appendChild( createTimeElement( timeMention, documentId, idNumber, doc ) ); |
| idNumber++; |
| } |
| return idNumber + 1; |
| } |
| |
| static private Element createTimeElement( final TimeMention timeMention, |
| final String documentId, |
| final int idNumber, |
| final Document doc ) { |
| final Element properties = doc.createElement( "properties" ); |
| String typeName = ""; |
| final String timeClass = timeMention.getTimeClass(); |
| if ( timeClass != null && (timeClass.equals( "DOCTIME" ) || timeClass.equals( "SECTIONTIME" ) ) ) { |
| typeName = timeClass; |
| properties.setTextContent( "" ); |
| } else { |
| typeName = "TIMEX3"; |
| final Element classE = doc.createElement( "Class" ); |
| classE.setTextContent( timeClass ); |
| properties.appendChild( classE ); |
| } |
| final Element time = createBaseElement( timeMention, typeName, documentId, idNumber, doc ); |
| time.appendChild( properties ); |
| return time; |
| } |
| |
| |
| static private Element createBaseElement( final IdentifiedAnnotation annotation, |
| final String typeName, |
| final String documentId, |
| final int idNumber, |
| final Document doc ) { |
| |
| final Element base = doc.createElement( "entity" ); |
| final String eventID = idNumber + "@e@" + documentId + "@system"; |
| final Element id = doc.createElement( "id" ); |
| id.setTextContent( eventID ); |
| final Element span = doc.createElement( "span" ); |
| span.setTextContent( annotation.getBegin() + "," + annotation.getEnd() ); |
| final Element type = doc.createElement( "type" ); |
| type.setTextContent( typeName ); |
| final Element parentsType = doc.createElement( "parentsType" ); |
| parentsType.setTextContent( "TemporalEntities" ); |
| base.appendChild( id ); |
| base.appendChild( span ); |
| base.appendChild( type ); |
| base.appendChild( parentsType ); |
| return base; |
| } |
| |
| |
| } |