Add PipeBitInfo to EventTimeAnaforaWriter
Add simple token text and span writers:
- example extending AbstractJCasFileWriter
- core extending AbstractTableFileWriter
Add writers to BipPipeline
diff --git a/ctakes-core/src/main/java/org/apache/ctakes/core/cc/TokenTableFileWriter.java b/ctakes-core/src/main/java/org/apache/ctakes/core/cc/TokenTableFileWriter.java
new file mode 100644
index 0000000..4f2c497
--- /dev/null
+++ b/ctakes-core/src/main/java/org/apache/ctakes/core/cc/TokenTableFileWriter.java
@@ -0,0 +1,51 @@
+package org.apache.ctakes.core.cc;
+
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import static org.apache.ctakes.core.pipeline.PipeBitInfo.TypeProduct.*;
+
+/**
+ * @author SPF , chip-nlp
+ * @since {3/3/2023}
+ */
+@PipeBitInfo(
+ name = "Token Table Writer",
+ description = "Writes a table of base tokens and their spans in a directory tree.",
+ role = PipeBitInfo.Role.WRITER,
+ usables = { DOCUMENT_ID_PREFIX, BASE_TOKEN }
+)
+final public class TokenTableFileWriter extends AbstractTableFileWriter {
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ protected List<String> createHeaderRow( final JCas jCas ) {
+ return Arrays.asList( " Token Text ", " Text Span " );
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ protected List<List<String>> createDataRows( final JCas jCas ) {
+ final List<List<String>> dataRows = new ArrayList<>();
+ final String docText = jCas.getDocumentText();
+ for ( BaseToken token : JCasUtil.select( jCas, BaseToken.class ) ) {
+ final int begin = token.getBegin();
+ final int end = token.getEnd();
+ final String text = token instanceof NewlineToken ? "<EOL>" : docText.substring( begin, end );
+ dataRows.add( Arrays.asList( text, begin + "," + end ) );
+ }
+ return dataRows;
+ }
+
+}
diff --git a/ctakes-examples/src/main/java/org/apache/ctakes/examples/cc/TokenSpanWriter.java b/ctakes-examples/src/main/java/org/apache/ctakes/examples/cc/TokenSpanWriter.java
new file mode 100644
index 0000000..de15585
--- /dev/null
+++ b/ctakes-examples/src/main/java/org/apache/ctakes/examples/cc/TokenSpanWriter.java
@@ -0,0 +1,54 @@
+package org.apache.ctakes.examples.cc;
+
+import org.apache.ctakes.core.cc.AbstractJCasFileWriter;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.core.util.log.DotLogger;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
+import org.apache.log4j.Logger;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.UimaContext;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.io.*;
+
+import static org.apache.ctakes.core.pipeline.PipeBitInfo.TypeProduct.*;
+
+/**
+ * @author SPF , chip-nlp
+ * @since {3/3/2023}
+ */
+@PipeBitInfo(
+ name = "TokenSpanWriter",
+ description = "Writes files listing base tokens and their spans in a directory tree.",
+ role = PipeBitInfo.Role.WRITER,
+ usables = { DOCUMENT_ID_PREFIX, BASE_TOKEN }
+)
+public class TokenSpanWriter extends AbstractJCasFileWriter {
+
+ // If you do not need to utilize the entire cas, or need more than the doc cas, consider AbstractFileWriter<T>.
+ static private final Logger LOGGER = Logger.getLogger( "TokenSpanWriter" );
+ // to add a configuration parameter, type "param" and hit tab.
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void writeFile( JCas jCas,
+ String outputDir,
+ String documentId,
+ String fileName ) throws IOException {
+ final File file = new File( outputDir, documentId + "_tokenSpans.txt" );
+ final String docText = jCas.getDocumentText();
+ try ( Writer writer = new BufferedWriter( new FileWriter( file ) ) ) {
+ for ( BaseToken token : JCasUtil.select( jCas, BaseToken.class ) ) {
+ final int begin = token.getBegin();
+ final int end = token.getEnd();
+ final String text = token instanceof NewlineToken ? "<EOL>" : docText.substring( begin, end );
+ writer.write( text + "|" + begin + "," + end + "\n" );
+ }
+ }
+ }
+
+}
\ No newline at end of file
diff --git a/ctakes-examples/src/user/resources/org/apache/ctakes/examples/pipeline/BigPipeline.piper b/ctakes-examples/src/user/resources/org/apache/ctakes/examples/pipeline/BigPipeline.piper
index 4abd961..8651c8d 100644
--- a/ctakes-examples/src/user/resources/org/apache/ctakes/examples/pipeline/BigPipeline.piper
+++ b/ctakes-examples/src/user/resources/org/apache/ctakes/examples/pipeline/BigPipeline.piper
@@ -36,19 +36,26 @@
// Coreferences (e.g. patient = he).
load CorefSubPipe
+// Token covered text and token span offsets. Write bev (default) and html styles.
+add TokenTableFileWriter SubDirectory=bsv_tokens
+add TokenTableFileWriter SubDirectory=html_tokens TableType=HTML
+
// Html output, write to subdirectory.
add pretty.html.HtmlTextWriter SubDirectory=html
// Text output, write to subdirectory.
add pretty.plaintext.PrettyTextWriterFit SubDirectory=text
-// Table output, write to subdirectory. Write bsv (default) and html styles.
+// Table output, write to subdirectory. Write bsv (default), csv and html styles.
add SemanticTableFileWriter SubDirectory=bsv_table
+add SemanticTableFileWriter SubDirectory=csv_table TableType=CSV
add SemanticTableFileWriter SubDirectory=html_table TableType=HTML
-// XMI output. Warning: these can be very large.
-//writeXmis
+// XMI output, write to subdirectory. Warning: these can be very large.
add FileTreeXmiWriter SubDirectory=xmi
+// Temporal Events and Times in Anafora format, write to subdirectory.
+add EventTimeAnaforaWriter SubDirectory=anafora
+
// Write some information about the run.
addLast org.apache.ctakes.core.util.log.FinishedLogger
\ No newline at end of file
diff --git a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/cc/EventTimeAnaforaWriter.java b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/cc/EventTimeAnaforaWriter.java
index df0c361..80fa1a5 100644
--- a/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/cc/EventTimeAnaforaWriter.java
+++ b/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/cc/EventTimeAnaforaWriter.java
@@ -1,6 +1,7 @@
package org.apache.ctakes.temporal.cc;
import org.apache.ctakes.core.cc.AbstractJCasFileWriter;
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.util.annotation.IdentifiedAnnotationUtil;
import org.apache.ctakes.typesystem.type.refsem.Event;
import org.apache.ctakes.typesystem.type.refsem.EventProperties;
@@ -23,10 +24,19 @@
import java.text.SimpleDateFormat;
import java.util.*;
+import static org.apache.ctakes.core.pipeline.PipeBitInfo.TypeProduct.BASE_TOKEN;
+import static org.apache.ctakes.core.pipeline.PipeBitInfo.TypeProduct.DOCUMENT_ID_PREFIX;
+
/**
* @author SPF , chip-nlp
* @since {3/2/2023}
*/
+@PipeBitInfo(
+ name = "Event Time Anafora Writer",
+ description = "Writes Temporal Events and Times in Anafora format.",
+ role = PipeBitInfo.Role.WRITER,
+ usables = { DOCUMENT_ID_PREFIX, BASE_TOKEN }
+)
final public class EventTimeAnaforaWriter extends AbstractJCasFileWriter {
/**