blob: d92222c600852b04c2e69f639d498b2034d55143 [file] [log] [blame]
/*
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
*/
/**
* Example of beanshell script to be used as input for the
* BSFAnnotator
* @author Olivier Terrier
* @version 1.0
*/
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.util.Level;
import org.apache.uima.util.Logger;
import org.apache.uima.cas.*;
import org.apache.uima.jcas.cas.*;
import org.apache.uima.jcas.tcas.*;
import org.apache.uima.annotator.bsf.types.Token;
import java.util.regex.*;
/**
* Initialisation of global variables
*/
String scriptName;
Pattern matchPattern;
/**
* Performs any startup tasks required by this annotator.
* The Analysis Engine calls this method only once, just after an Annotator has been instantiated.
* @param aContext Provides access to external resources that may be used by this annotator.
* This includes configuration parameters, logging and instrumentation services, and access to external analysis resources.
* @see com.ibm.uima.analysis_engine.annotator.BaseAnnotator#initialize(com.ibm.uima.analysis_engine.annotator.AnnotatorContext)
*/
public void initialize(UimaContext aContext)
throws ResourceInitializationException {
String source = (String) aContext.getConfigParameterValue("SourceFile");
String regexp = (String) aContext.getConfigParameterValue("Regexp");
if (regexp == null)
regexp = "Dave|David|Bob|Tim|Joe";
matchPattern = Pattern.compile(regexp);
}
/**
* Invokes this annotator's analysis logic.
* This annotator will access the data in the JCas and add new data to the JCas.
* @param jcas contains the document to be analyzed and may contain other metadata about that document.
* @param rs a list of output types and features that this annotator should produce.
*/
public void process(JCas jcas)
throws AnalysisEngineProcessException {
// Looking for regexp in the document text and storing start/end indexes in arrays
String text = jcas.getDocumentText();
Matcher matcher = matchPattern.matcher(text);
List annotArray = new ArrayList();
while (matcher.find()) {
// Create an EntityOccurrence for each firstname found in the text
Token token = new Token(jcas, matcher.start(), matcher.end());
token.addToIndexes();
}
}