uimaj-2.2.1-incubating/uimaj-examples/src/main/java/org/apache/uima/examples/cas/RegExAnnotator.java - uima-uimaj - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.apache.uima.examples.cas;

 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;

 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_component.CasAnnotator_ImplBase;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
 import org.apache.uima.analysis_engine.annotator.AnnotatorContext;
 import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException;
 import org.apache.uima.analysis_engine.annotator.AnnotatorProcessException;
 import org.apache.uima.analysis_engine.annotator.TextAnnotator;
 import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.FSIterator;
 import org.apache.uima.cas.FSTypeConstraint;
 import org.apache.uima.cas.FeatureStructure;
 import org.apache.uima.cas.Type;
 import org.apache.uima.cas.TypeSystem;
 import org.apache.uima.cas.text.AnnotationFS;
 import org.apache.uima.resource.ResourceAccessException;
 import org.apache.uima.resource.ResourceInitializationException;
 import org.apache.uima.util.Level;

 /**
  * Annotator that find substrings of the input document that match regular expressions.
  * <p>
  * There are two ways to specify the regular expressions - via configuration parameters or via an
  * external resource file.
  * <p>
  * This annotator takes the following optional configuration parameters:
  * <ul>
  * <li><code>Patterns</code> - array of Strings indicating regular expressions to match. The
  * pattern language is described at <a
  * href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">
  * http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html</a>) </li>
  * <li><code>TypeNames</code> - array of Strings indicating names of Types to be created from the
  * patterns. </li>
  * <li><code>ContainingAnnotationTypes</code> - an array of input annotation types. This
  * annotator will only produce new annotations that are contained within existing annotaions of
  * these types. (This is optional.) </li>
  * <li><code>AnnotateEntireContainedAnnotation</code> - When the ContainingAnnoationTypes
  * parameter is specified, a value of true for this parameter will cause the entire containing
  * annotation to be used as the span of the new annotation, rather than just the span of the regular
  * expression match. This can be used to "classify" previously created annotations according to
  * whether or not they contain text matching a regular expression. </li>
  * </ul>
  * <p>
  * The indices of the <code>Patterns</code> and <code>TypeNames</code> arrays correspond, so
  * that a substring that matches <code>Patterns[i]</code> will result in an annotation of type
  * <code>TypeNames[i]</code>.
  * <p>
  * It is also possible to provide an external resource file that declares the annotation type names
  * and the regular expressions to match. The annotator will look for this file under the resource
  * key "PatternFile". The file format is as follows:
  * <ul>
  * <li>Lines starting with # or whitepsace are ignored</li>
  * <li>Lines starting with % indicate an annotation type</li>
  * <li>All other lines are regular expressions, using the same syntax described for the
  * <code>Patterns</code> configuration parameter.</li>
  * </ul>
  * If a regular expression is matched, it will be annotated with the last annotation type declared
  * (the nearest preceding line starting with %).
  *
  *
  */
 public class RegExAnnotator extends CasAnnotator_ImplBase {
   public static final String MESSAGE_DIGEST = "org.apache.uima.examples.cas.RegExAnnotator_Messages";

   /**
    * Performs any startup tasks required by this annotator. This implementation reads the
    * configuration parmaeters and compiles the regular expressions.
    *
    * @see TextAnnotator#initialize(AnnotatorContext)
    */
   public void initialize(UimaContext aContext) throws ResourceInitializationException {
     super.initialize(aContext);
     try {
       // Retrieve configuration parameters
       String[] patternStrings = (String[]) getContext().getConfigParameterValue("Patterns");
       String[] typeNames = (String[]) getContext().getConfigParameterValue("TypeNames");
       mContainingAnnotationTypeNames = (String[]) getContext().getConfigParameterValue(
               "ContainingAnnotationTypes");
       if (mContainingAnnotationTypeNames != null && mContainingAnnotationTypeNames.length > 0) {
         mAnnotateEntireContainingAnnotation = (Boolean) getContext().getConfigParameterValue(
                 "AnnotateEntireContainingAnnotation");
       } else {
         mAnnotateEntireContainingAnnotation = Boolean.FALSE;
       }

       // create an ArrayList of type names and an ArrayList of pattern arrays,
       // where the indexes of the two lists corespond so that the patterns
       // at patternArray[i] correspond to the annotation type at
       // mTypeNames[i].
       mTypeNames = new ArrayList();
       ArrayList patternArray = new ArrayList();
       if (patternStrings != null) {
         if (typeNames == null || typeNames.length != patternStrings.length) {
           // throw exception - error message in external message digest
           throw new ResourceInitializationException(MESSAGE_DIGEST,
                   "type_pattern_array_length_mismatch", new Object[0]);
         }
         mTypeNames.addAll(Arrays.asList(typeNames));

         for (int i = 0; i < patternStrings.length; i++) {
           patternArray.add(new String[] { patternStrings[i] });
         }
       }

       // if PatternFile resource exists, parse it and add to patternArray
       InputStream in = getContext().getResourceAsStream("PatternFile");
       if (in != null) {
         try {
           ArrayList patternsForCurrentType = new ArrayList();
           boolean foundFirstType = false;
           // get buffered reader
           BufferedReader reader = new BufferedReader(new InputStreamReader(in));

           // read lines from file
           String line = reader.readLine();
           while (line != null) {
             if (!line.startsWith("#") && line.length() > 0
                     && !Character.isWhitespace(line.charAt(0))) {
               // line is not a comment
               if (line.startsWith("%")) // annotation type name
               {
                 // add pattern array for previous type (if any) to list
                 if (foundFirstType) {
                   String[] pats = new String[patternsForCurrentType.size()];
                   patternsForCurrentType.toArray(pats);
                   patternArray.add(pats);
                   patternsForCurrentType.clear();
                 }
                 // add new type name to mTypeNames list
                 mTypeNames.add(line.substring(1));
                 foundFirstType = true;
               } else // treat as regular expression
               {
                 patternsForCurrentType.add(line);
               }
             }
             line = reader.readLine();
           }
           // add last group of pattersn to patternArray
           String[] pats = new String[patternsForCurrentType.size()];
           patternsForCurrentType.toArray(pats);
           patternArray.add(pats);
         } finally {
           if (in != null) {
             in.close();
           }
         }
       }

       // make sure there is at least one pattern
       if (patternArray.isEmpty()) {
         throw new ResourceInitializationException(
                 AnnotatorConfigurationException.ONE_PARAM_REQUIRED,
                 new Object[] { "Patterns, Pattern File" });
       }

       // compile regular expression patterns
       mPatterns = new Pattern[patternArray.size()][];
       for (int i = 0; i < patternArray.size(); i++) {
         String[] pats = (String[]) patternArray.get(i);
         mPatterns[i] = new Pattern[pats.length];
         for (int j = 0; j < mPatterns[i].length; j++) {
           try {
             mPatterns[i][j] = Pattern.compile(pats[j]);
             // make sure no pattern matches the empty string - as this
             // would lead to infinite loops during processing
             if (mPatterns[i][j].matcher("").matches()) {
               throw new ResourceInitializationException(MESSAGE_DIGEST,
                       "regex_matches_empty_string", new Object[] { pats[j] });
             }
           } catch (PatternSyntaxException e) {
             throw new ResourceInitializationException(MESSAGE_DIGEST, "regex_syntax_error",
                     new Object[] { pats[j] }, e);
           }
         }
       }
     } catch (ResourceAccessException e) {
       throw new ResourceInitializationException(e);
     } catch (IOException e) {
       throw new ResourceInitializationException(e);
     }
   }

   /**
    * Acquires references to CAS Type and Feature objects that are later used during the
    * {@link #process(CAS)} method.
    *
    * @see TextAnnotator#typeSystemInit(TypeSystem)
    */
   public void typeSystemInit(TypeSystem aTypeSystem) throws AnalysisEngineProcessException {
     // get references to annotation types we will create
     mCASTypes = new Type[mTypeNames.size()];
     for (int i = 0; i < mTypeNames.size(); i++) {
       String curTypeName = (String) mTypeNames.get(i);
       mCASTypes[i] = aTypeSystem.getType(curTypeName);
       if (mCASTypes[i] == null) {
         throw new AnalysisEngineProcessException(AnnotatorInitializationException.TYPE_NOT_FOUND,
                 new Object[] { this.getClass().getName(), curTypeName });
       }
     }

     // get references to Containing Annotation Types
     if (mContainingAnnotationTypeNames == null) {
       mContainingAnnotationTypes = null;
     } else {
       mContainingAnnotationTypes = new Type[mContainingAnnotationTypeNames.length];
       for (int i = 0; i < mContainingAnnotationTypes.length; i++) {
         mContainingAnnotationTypes[i] = aTypeSystem.getType(mContainingAnnotationTypeNames[i]);
         if (mContainingAnnotationTypes[i] == null) {
           throw new AnalysisEngineProcessException(AnnotatorInitializationException.TYPE_NOT_FOUND,
                   new Object[] { getClass().getName(), mContainingAnnotationTypeNames[i] });
         }
       }
     }
   }

   /**
    * Invokes this annotator's analysis logic. This annotator uses the java regular expression
    * package to find annotations using the regular expressions defined by its configuration
    * parameters.
    *
    * @param aCAS
    *          the CAS to process
    * @param aResultSpec
    *          A list of outputs that this annotator should produce.
    *
    * @throws AnnotatorProcessException
    *           if a failure occurs during processing.
    *
    * @see CasAnnotator_ImplBase#process(CAS)
    */
   public void process(CAS aCAS) throws AnalysisEngineProcessException {
     try {
       String docText = aCAS.getDocumentText();
       // Determine which regions of the document we are going to annotate
       int[] rangesToAnnotate = getRangesToAnnotate(aCAS);

       // We treat the rangesToAnnotate array as a list of (start,end) offset
       // pairs. Iterate through all of these pairs.
       for (int i = 0; i < rangesToAnnotate.length; i += 2) {
         int startPos = rangesToAnnotate[i];
         int endPos = rangesToAnnotate[i + 1];
         // get the substring of text to be annotated
         String subText = docText.substring(startPos, endPos);

         // iterate over all annotation types for which we have patterns
         for (int j = 0; j < mCASTypes.length; j++) {
           // see if the ResultSpec contains this type
           if (getResultSpecification().containsType(mCASTypes[j].getName())) {
             // try to match each pattern that we have for this annotation type
             for (int k = 0; k < mPatterns[j].length; k++) {
               int pos = 0;
               Matcher matcher = mPatterns[j][k].matcher(subText);
               while (pos < subText.length() && matcher.find(pos)) {
                 getContext().getLogger().log(Level.FINER,
                         "RegEx match found: [" + matcher.group() + "]");
                 // match found; extract locations of start and end of match
                 // (or of entire containing annotation, if that option is on)
                 int annotStart, annotEnd;
                 if (mAnnotateEntireContainingAnnotation.booleanValue()) {
                   annotStart = startPos;
                   annotEnd = endPos;
                 } else {
                   annotStart = startPos + matcher.start();
                   annotEnd = startPos + matcher.end();
                 }
                 // create Annotation in CAS
                 FeatureStructure fs = aCAS.createAnnotation(mCASTypes[j], annotStart, annotEnd);
                 aCAS.getIndexRepository().addFS(fs);
                 pos = annotEnd - startPos;
               }
             }
           }
         }
       }
     } catch (Exception e) {
       throw new AnalysisEngineProcessException(e);
     }
   }

   /**
    * Utility method that determines which subranges of the document text should be annotated by this
    * annotator. This is done as follows:
    * <ul>
    * <li>If <code>mContainingAnnotationTypes</code> is <code>null</code>, the entire document
    * is eligible for annotation.</li>
    * <li>If <code>mContainingAnnotationTypes</code> is not <code>null</code>, then each of its
    * elements is expected to be an Annotation Type name. The CAS is queried for existing annotations
    * of any of these Types, and the only subranges of the document eligible for annotation are those
    * subranges contained within such annotations.</li>
    * </ul>
    *
    * @param aCAS
    *          CAS currently being processed
    *
    * @return an array of integers indicating the document subranges eligible for annotation. Begin
    *         and end positions of the subranges are stored in successive elements of the array. For
    *         example, elements 0 and 1 are the start and end of the first subrange; elements 2 and 3
    *         are the start and end of the second subrange, and so on.
    */
   protected int[] getRangesToAnnotate(CAS aCAS) {
     if (mContainingAnnotationTypes == null || mContainingAnnotationTypes.length == 0) {
       // ContainingAnnotationTypes is not set - the whole document is eligible
       return new int[] { 0, aCAS.getDocumentText().length() };
     } else {
       // get iterator over all annotations in the CAS
       FSIterator iterator = aCAS.getAnnotationIndex().iterator();

       // filter the iterator so that only instances of Types in the
       // mContainingAnnotationTypes array are returned
       FSTypeConstraint constraint = aCAS.getConstraintFactory().createTypeConstraint();
       for (int i = 0; i < mContainingAnnotationTypes.length; i++) {
         constraint.add(mContainingAnnotationTypes[i]);
       }
       iterator = aCAS.createFilteredIterator(iterator, constraint);

       // iterate over annotations and add them to an ArrayList
       List annotationList = new ArrayList();
       while (iterator.isValid()) {
         annotationList.add(iterator.get());
         iterator.moveToNext();
       }

       // For each Annotation in the list, add its start and end
       // positions to the result array.
       int numRanges = annotationList.size();
       int[] result = new int[numRanges * 2];
       for (int j = 0; j < numRanges; j++) {
         AnnotationFS curFS = (AnnotationFS) annotationList.get(j);
         result[j * 2] = curFS.getBegin();
         result[j * 2 + 1] = curFS.getEnd();
       }
       return result;
     }
   }

   /**
    * The regular expression Patterns to be matched.
    */
   private Pattern[][] mPatterns;

   /**
    * The names of the CAS types that this annotator produces from the patterns in {@link #mPatterns}.
    */
   private ArrayList mTypeNames;

   /**
    * The names of the CAS types within which this annotator will search for new annotations. This
    * may be null, indicating that the entire document will be searched.
    */
   private String[] mContainingAnnotationTypeNames;

   /**
    * The CAS types corresponding to {@link #mTypeNames}.
    */
   private Type[] mCASTypes;

   /**
    * The CAS types corresponding to {@link #mContainingAnnotationTypeNames}.
    */
   private Type[] mContainingAnnotationTypes;

   /**
    * Whether to annotate the entire span of the containing annotation when a match is found.
    */
   private Boolean mAnnotateEntireContainingAnnotation;

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	package org.apache.uima.examples.cas;

	import java.io.BufferedReader;
	import java.io.IOException;
	import java.io.InputStream;
	import java.io.InputStreamReader;
	import java.util.ArrayList;
	import java.util.Arrays;
	import java.util.List;
	import java.util.regex.Matcher;
	import java.util.regex.Pattern;
	import java.util.regex.PatternSyntaxException;

	import org.apache.uima.UimaContext;
	import org.apache.uima.analysis_component.CasAnnotator_ImplBase;
	import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
	import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
	import org.apache.uima.analysis_engine.annotator.AnnotatorContext;
	import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException;
	import org.apache.uima.analysis_engine.annotator.AnnotatorProcessException;
	import org.apache.uima.analysis_engine.annotator.TextAnnotator;
	import org.apache.uima.cas.CAS;
	import org.apache.uima.cas.FSIterator;
	import org.apache.uima.cas.FSTypeConstraint;
	import org.apache.uima.cas.FeatureStructure;
	import org.apache.uima.cas.Type;
	import org.apache.uima.cas.TypeSystem;
	import org.apache.uima.cas.text.AnnotationFS;
	import org.apache.uima.resource.ResourceAccessException;
	import org.apache.uima.resource.ResourceInitializationException;
	import org.apache.uima.util.Level;

	/**
	* Annotator that find substrings of the input document that match regular expressions.
	* <p>
	* There are two ways to specify the regular expressions - via configuration parameters or via an
	* external resource file.
	* <p>
	* This annotator takes the following optional configuration parameters:
	* <ul>
	* <li><code>Patterns</code> - array of Strings indicating regular expressions to match. The
	* pattern language is described at <a
	* href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">
	* http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html</a>) </li>
	* <li><code>TypeNames</code> - array of Strings indicating names of Types to be created from the
	* patterns. </li>
	* <li><code>ContainingAnnotationTypes</code> - an array of input annotation types. This
	* annotator will only produce new annotations that are contained within existing annotaions of
	* these types. (This is optional.) </li>
	* <li><code>AnnotateEntireContainedAnnotation</code> - When the ContainingAnnoationTypes
	* parameter is specified, a value of true for this parameter will cause the entire containing
	* annotation to be used as the span of the new annotation, rather than just the span of the regular
	* expression match. This can be used to "classify" previously created annotations according to
	* whether or not they contain text matching a regular expression. </li>
	* </ul>
	* <p>
	* The indices of the <code>Patterns</code> and <code>TypeNames</code> arrays correspond, so
	* that a substring that matches <code>Patterns[i]</code> will result in an annotation of type
	* <code>TypeNames[i]</code>.
	* <p>
	* It is also possible to provide an external resource file that declares the annotation type names
	* and the regular expressions to match. The annotator will look for this file under the resource
	* key "PatternFile". The file format is as follows:
	* <ul>
	* <li>Lines starting with # or whitepsace are ignored</li>
	* <li>Lines starting with % indicate an annotation type</li>
	* <li>All other lines are regular expressions, using the same syntax described for the
	* <code>Patterns</code> configuration parameter.</li>
	* </ul>
	* If a regular expression is matched, it will be annotated with the last annotation type declared
	* (the nearest preceding line starting with %).
	*
	*
	*/
	public class RegExAnnotator extends CasAnnotator_ImplBase {
	public static final String MESSAGE_DIGEST = "org.apache.uima.examples.cas.RegExAnnotator_Messages";

	/**
	* Performs any startup tasks required by this annotator. This implementation reads the
	* configuration parmaeters and compiles the regular expressions.
	*
	* @see TextAnnotator#initialize(AnnotatorContext)
	*/
	public void initialize(UimaContext aContext) throws ResourceInitializationException {
	super.initialize(aContext);
	try {
	// Retrieve configuration parameters
	String[] patternStrings = (String[]) getContext().getConfigParameterValue("Patterns");
	String[] typeNames = (String[]) getContext().getConfigParameterValue("TypeNames");
	mContainingAnnotationTypeNames = (String[]) getContext().getConfigParameterValue(
	"ContainingAnnotationTypes");
	if (mContainingAnnotationTypeNames != null && mContainingAnnotationTypeNames.length > 0) {
	mAnnotateEntireContainingAnnotation = (Boolean) getContext().getConfigParameterValue(
	"AnnotateEntireContainingAnnotation");
	} else {
	mAnnotateEntireContainingAnnotation = Boolean.FALSE;
	}

	// create an ArrayList of type names and an ArrayList of pattern arrays,
	// where the indexes of the two lists corespond so that the patterns
	// at patternArray[i] correspond to the annotation type at
	// mTypeNames[i].
	mTypeNames = new ArrayList();
	ArrayList patternArray = new ArrayList();
	if (patternStrings != null) {
	if (typeNames == null \|\| typeNames.length != patternStrings.length) {
	// throw exception - error message in external message digest
	throw new ResourceInitializationException(MESSAGE_DIGEST,
	"type_pattern_array_length_mismatch", new Object[0]);
	}
	mTypeNames.addAll(Arrays.asList(typeNames));

	for (int i = 0; i < patternStrings.length; i++) {
	patternArray.add(new String[] { patternStrings[i] });
	}
	}

	// if PatternFile resource exists, parse it and add to patternArray
	InputStream in = getContext().getResourceAsStream("PatternFile");
	if (in != null) {
	try {
	ArrayList patternsForCurrentType = new ArrayList();
	boolean foundFirstType = false;
	// get buffered reader
	BufferedReader reader = new BufferedReader(new InputStreamReader(in));

	// read lines from file
	String line = reader.readLine();
	while (line != null) {
	if (!line.startsWith("#") && line.length() > 0
	&& !Character.isWhitespace(line.charAt(0))) {
	// line is not a comment
	if (line.startsWith("%")) // annotation type name
	{
	// add pattern array for previous type (if any) to list
	if (foundFirstType) {
	String[] pats = new String[patternsForCurrentType.size()];
	patternsForCurrentType.toArray(pats);
	patternArray.add(pats);
	patternsForCurrentType.clear();
	}
	// add new type name to mTypeNames list
	mTypeNames.add(line.substring(1));
	foundFirstType = true;
	} else // treat as regular expression
	{
	patternsForCurrentType.add(line);
	}
	}
	line = reader.readLine();
	}
	// add last group of pattersn to patternArray
	String[] pats = new String[patternsForCurrentType.size()];
	patternsForCurrentType.toArray(pats);
	patternArray.add(pats);
	} finally {
	if (in != null) {
	in.close();
	}
	}
	}

	// make sure there is at least one pattern
	if (patternArray.isEmpty()) {
	throw new ResourceInitializationException(
	AnnotatorConfigurationException.ONE_PARAM_REQUIRED,
	new Object[] { "Patterns, Pattern File" });
	}

	// compile regular expression patterns
	mPatterns = new Pattern[patternArray.size()][];
	for (int i = 0; i < patternArray.size(); i++) {
	String[] pats = (String[]) patternArray.get(i);
	mPatterns[i] = new Pattern[pats.length];
	for (int j = 0; j < mPatterns[i].length; j++) {
	try {
	mPatterns[i][j] = Pattern.compile(pats[j]);
	// make sure no pattern matches the empty string - as this
	// would lead to infinite loops during processing
	if (mPatterns[i][j].matcher("").matches()) {
	throw new ResourceInitializationException(MESSAGE_DIGEST,
	"regex_matches_empty_string", new Object[] { pats[j] });
	}
	} catch (PatternSyntaxException e) {
	throw new ResourceInitializationException(MESSAGE_DIGEST, "regex_syntax_error",
	new Object[] { pats[j] }, e);
	}
	}
	}
	} catch (ResourceAccessException e) {
	throw new ResourceInitializationException(e);
	} catch (IOException e) {
	throw new ResourceInitializationException(e);
	}
	}

	/**
	* Acquires references to CAS Type and Feature objects that are later used during the
	* {@link #process(CAS)} method.
	*
	* @see TextAnnotator#typeSystemInit(TypeSystem)
	*/
	public void typeSystemInit(TypeSystem aTypeSystem) throws AnalysisEngineProcessException {
	// get references to annotation types we will create
	mCASTypes = new Type[mTypeNames.size()];
	for (int i = 0; i < mTypeNames.size(); i++) {
	String curTypeName = (String) mTypeNames.get(i);
	mCASTypes[i] = aTypeSystem.getType(curTypeName);
	if (mCASTypes[i] == null) {
	throw new AnalysisEngineProcessException(AnnotatorInitializationException.TYPE_NOT_FOUND,
	new Object[] { this.getClass().getName(), curTypeName });
	}
	}

	// get references to Containing Annotation Types
	if (mContainingAnnotationTypeNames == null) {
	mContainingAnnotationTypes = null;
	} else {
	mContainingAnnotationTypes = new Type[mContainingAnnotationTypeNames.length];
	for (int i = 0; i < mContainingAnnotationTypes.length; i++) {
	mContainingAnnotationTypes[i] = aTypeSystem.getType(mContainingAnnotationTypeNames[i]);
	if (mContainingAnnotationTypes[i] == null) {
	throw new AnalysisEngineProcessException(AnnotatorInitializationException.TYPE_NOT_FOUND,
	new Object[] { getClass().getName(), mContainingAnnotationTypeNames[i] });
	}
	}
	}
	}

	/**
	* Invokes this annotator's analysis logic. This annotator uses the java regular expression
	* package to find annotations using the regular expressions defined by its configuration
	* parameters.
	*
	* @param aCAS
	* the CAS to process
	* @param aResultSpec
	* A list of outputs that this annotator should produce.
	*
	* @throws AnnotatorProcessException
	* if a failure occurs during processing.
	*
	* @see CasAnnotator_ImplBase#process(CAS)
	*/
	public void process(CAS aCAS) throws AnalysisEngineProcessException {
	try {
	String docText = aCAS.getDocumentText();
	// Determine which regions of the document we are going to annotate
	int[] rangesToAnnotate = getRangesToAnnotate(aCAS);

	// We treat the rangesToAnnotate array as a list of (start,end) offset
	// pairs. Iterate through all of these pairs.
	for (int i = 0; i < rangesToAnnotate.length; i += 2) {
	int startPos = rangesToAnnotate[i];
	int endPos = rangesToAnnotate[i + 1];
	// get the substring of text to be annotated
	String subText = docText.substring(startPos, endPos);

	// iterate over all annotation types for which we have patterns
	for (int j = 0; j < mCASTypes.length; j++) {
	// see if the ResultSpec contains this type
	if (getResultSpecification().containsType(mCASTypes[j].getName())) {
	// try to match each pattern that we have for this annotation type
	for (int k = 0; k < mPatterns[j].length; k++) {
	int pos = 0;
	Matcher matcher = mPatterns[j][k].matcher(subText);
	while (pos < subText.length() && matcher.find(pos)) {
	getContext().getLogger().log(Level.FINER,
	"RegEx match found: [" + matcher.group() + "]");
	// match found; extract locations of start and end of match
	// (or of entire containing annotation, if that option is on)
	int annotStart, annotEnd;
	if (mAnnotateEntireContainingAnnotation.booleanValue()) {
	annotStart = startPos;
	annotEnd = endPos;
	} else {
	annotStart = startPos + matcher.start();
	annotEnd = startPos + matcher.end();
	}
	// create Annotation in CAS
	FeatureStructure fs = aCAS.createAnnotation(mCASTypes[j], annotStart, annotEnd);
	aCAS.getIndexRepository().addFS(fs);
	pos = annotEnd - startPos;
	}
	}
	}
	}
	}
	} catch (Exception e) {
	throw new AnalysisEngineProcessException(e);
	}
	}

	/**
	* Utility method that determines which subranges of the document text should be annotated by this
	* annotator. This is done as follows:
	* <ul>
	* <li>If <code>mContainingAnnotationTypes</code> is <code>null</code>, the entire document
	* is eligible for annotation.</li>
	* <li>If <code>mContainingAnnotationTypes</code> is not <code>null</code>, then each of its
	* elements is expected to be an Annotation Type name. The CAS is queried for existing annotations
	* of any of these Types, and the only subranges of the document eligible for annotation are those
	* subranges contained within such annotations.</li>
	* </ul>
	*
	* @param aCAS
	* CAS currently being processed
	*
	* @return an array of integers indicating the document subranges eligible for annotation. Begin
	* and end positions of the subranges are stored in successive elements of the array. For
	* example, elements 0 and 1 are the start and end of the first subrange; elements 2 and 3
	* are the start and end of the second subrange, and so on.
	*/
	protected int[] getRangesToAnnotate(CAS aCAS) {
	if (mContainingAnnotationTypes == null \|\| mContainingAnnotationTypes.length == 0) {
	// ContainingAnnotationTypes is not set - the whole document is eligible
	return new int[] { 0, aCAS.getDocumentText().length() };
	} else {
	// get iterator over all annotations in the CAS
	FSIterator iterator = aCAS.getAnnotationIndex().iterator();

	// filter the iterator so that only instances of Types in the
	// mContainingAnnotationTypes array are returned
	FSTypeConstraint constraint = aCAS.getConstraintFactory().createTypeConstraint();
	for (int i = 0; i < mContainingAnnotationTypes.length; i++) {
	constraint.add(mContainingAnnotationTypes[i]);
	}
	iterator = aCAS.createFilteredIterator(iterator, constraint);

	// iterate over annotations and add them to an ArrayList
	List annotationList = new ArrayList();
	while (iterator.isValid()) {
	annotationList.add(iterator.get());
	iterator.moveToNext();
	}

	// For each Annotation in the list, add its start and end
	// positions to the result array.
	int numRanges = annotationList.size();
	int[] result = new int[numRanges * 2];
	for (int j = 0; j < numRanges; j++) {
	AnnotationFS curFS = (AnnotationFS) annotationList.get(j);
	result[j * 2] = curFS.getBegin();
	result[j * 2 + 1] = curFS.getEnd();
	}
	return result;
	}
	}

	/**
	* The regular expression Patterns to be matched.
	*/
	private Pattern[][] mPatterns;

	/**
	* The names of the CAS types that this annotator produces from the patterns in {@link #mPatterns}.
	*/
	private ArrayList mTypeNames;

	/**
	* The names of the CAS types within which this annotator will search for new annotations. This
	* may be null, indicating that the entire document will be searched.
	*/
	private String[] mContainingAnnotationTypeNames;

	/**
	* The CAS types corresponding to {@link #mTypeNames}.
	*/
	private Type[] mCASTypes;

	/**
	* The CAS types corresponding to {@link #mContainingAnnotationTypeNames}.
	*/
	private Type[] mContainingAnnotationTypes;

	/**
	* Whether to annotate the entire span of the containing annotation when a match is found.
	*/
	private Boolean mAnnotateEntireContainingAnnotation;

	}