blob: 104a39c6b516c839ce6bc09a0cd6800d7342525a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.annotator.regex.impl;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.regex.Matcher;
import org.apache.incubator.uima.regex.AnnotationDocument;
import org.apache.incubator.uima.regex.ConceptDocument;
import org.apache.incubator.uima.regex.ConceptSetDocument;
import org.apache.incubator.uima.regex.CreateAnnotationsDocument;
import org.apache.incubator.uima.regex.ExceptionDocument;
import org.apache.incubator.uima.regex.FeatureDocument;
import org.apache.incubator.uima.regex.RuleDocument;
import org.apache.incubator.uima.regex.RulesDocument;
import org.apache.incubator.uima.regex.SetFeatureDocument;
import org.apache.incubator.uima.regex.VariableDocument;
import org.apache.incubator.uima.regex.VariablesDocument;
import org.apache.uima.annotator.regex.Annotation;
import org.apache.uima.annotator.regex.Concept;
import org.apache.uima.annotator.regex.ConceptFileParser;
import org.apache.uima.annotator.regex.Feature;
import org.apache.uima.annotator.regex.FilterFeature;
import org.apache.uima.annotator.regex.Position;
import org.apache.uima.annotator.regex.RegexVariables;
import org.apache.uima.annotator.regex.Rule;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.xmlbeans.XmlError;
import org.apache.xmlbeans.XmlOptions;
/**
*
*/
public class ConceptFileParser_impl implements ConceptFileParser {
/*
* (non-Javadoc)
*
* @see org.apache.uima.annotator.regex.ConceptFileParser#parseConceptFile(java.lang.String,
* java.io.InputStream)
*/
public Concept[] parseConceptFile(String conceptFilePathName,
InputStream conceptFileStream) throws ResourceInitializationException {
ArrayList<Concept> conceptList = new ArrayList<Concept>();
// parse regex concept file and extract content to local objects
ConceptSetDocument conceptSetDoc;
try {
conceptSetDoc = ConceptSetDocument.Factory.parse(conceptFileStream);
} catch (Exception ex) {
throw new RegexAnnotatorConfigException(
"regex_annotator_error_parsing_rule_set_file",
new Object[] { conceptFilePathName }, ex);
}
// validate input file
ArrayList<XmlError> validationErrors = new ArrayList<XmlError>();
XmlOptions validationOptions = new XmlOptions();
validationOptions.setErrorListener(validationErrors);
boolean isValid = conceptSetDoc.validate(validationOptions);
// output the errors if the XML is invalid.
if (!isValid) {
Iterator<XmlError> iter = validationErrors.iterator();
StringBuffer errorMessages = new StringBuffer();
while (iter.hasNext()) {
errorMessages.append("\n>> ");
errorMessages.append(iter.next());
}
throw new RegexAnnotatorConfigException(
"regex_annotator_error_xml_validation", new Object[] {
conceptFilePathName, errorMessages.toString() });
}
// get concept file regex variables and store them all to the variables
// object
VariablesDocument.Variables variablesDoc = conceptSetDoc.getConceptSet()
.getVariables();
RegexVariables variables = null;
if (variablesDoc != null) {
VariableDocument.Variable[] varArray = variablesDoc.getVariableArray();
if (varArray.length > 0) {
variables = new RegexVariables_impl();
for (int i = 0; i < varArray.length; i++) {
String value = varArray[i].getValue();
// replace (existing) variables in variables
Matcher matcher = RegexVariables.VARIABLE_REGEX_PATTERN.matcher(value);
int pos = 0;
while (matcher.find(pos)) {
// replace variable value
String varName = matcher.group().substring(3,
matcher.group().length() - 1);
String substitutionValue = variables.getVariableValue(varName);
if (substitutionValue == null) {
throw new ResourceInitializationException(
ResourceInitializationException.ERROR_INITIALIZING_FROM_DESCRIPTOR,
new String[] {
varName,
conceptFilePathName
+ ", referencing a variable that has not been defined " });
}
value = value.replaceAll(RegexVariables.VARIABLE_REGEX_BEGIN + varName
+ "}", substitutionValue);
pos = matcher.end();// current end match position
}
variables.addVariable(varArray[i].getName(), value);
}
}
}
// ***************************************************
// get the concepts from the concept file document
// ***************************************************
ConceptSetDocument.ConceptSet conceptSet = conceptSetDoc.getConceptSet();
ConceptDocument.Concept[] concepts = conceptSet.getConceptArray();
for (int i = 0; i < concepts.length; i++) {
// get concept meta data
String conceptName = concepts[i].getName();
boolean processAllRules = concepts[i].getProcessAllRules();
// create new concept object
org.apache.uima.annotator.regex.Concept concept = new Concept_impl(
conceptName, processAllRules);
// ********************************
// get all rules for this concept
// ********************************
RulesDocument.Rules rules = concepts[i].getRules();
RuleDocument.Rule[] ruleList = rules.getRuleArray();
for (int r = 0; r < ruleList.length; r++) {
// get rule meta data
String regex = ruleList[r].getRegEx();
String matchType = ruleList[r].getMatchType();
int matchStrategy = ruleList[r].getMatchStrategy().intValue();
String id = ruleList[r].getRuleId();
String featurePath = ruleList[r].getFeaturePath();
float confidence = (float) 0.0;
if (ruleList[r].getConfidence() != null) {
confidence = ruleList[r].getConfidence().floatValue();
}
// create new rule and add all rule settings
// additionally add a reference to the regex variables object
// to resolve regex variables
Rule rule = new Rule_impl(regex, matchStrategy, matchType, id,
confidence, featurePath, variables);
// ********************************
// get match type filter features
// ********************************
if (ruleList[r].getMatchTypeFilter() != null) {
// iterate over all filter features and add them to the rule
FeatureDocument.Feature[] filterFeatures = ruleList[r]
.getMatchTypeFilter().getFeatureArray();
for (int x = 0; x < filterFeatures.length; x++) {
String featureName = filterFeatures[x].getFeaturePath();
String featureValue = filterFeatures[x].getStringValue();
// create new filter feature and add them to the rule
FilterFeature filterFeature = new FilterFeature_impl(
featureName, featureValue);
rule.addFilterFeature(filterFeature);
}
}
// ***********************************************
// get all update match type annotation features
// ***********************************************
if (ruleList[r].getUpdateMatchTypeAnnotation() != null) {
// iterate over all match type annotation update features and add
// them to the rule
SetFeatureDocument.SetFeature[] updateFeatures = ruleList[r]
.getUpdateMatchTypeAnnotation().getSetFeatureArray();
for (int x = 0; x < updateFeatures.length; x++) {
String featureName = updateFeatures[x].getName();
String featureValue = updateFeatures[x].getStringValue();
int featureType = updateFeatures[x].getType().intValue();
int normalization = 0;
if (updateFeatures[x].getNormalization() != null) {
normalization = updateFeatures[x].getNormalization()
.intValue();
}
String implClass = updateFeatures[x].getClass1();
// create new feature and add them to the rule
Feature updateFeature = new Feature_impl(featureType,
featureName, featureValue, normalization, implClass);
rule.addUpdateFeature(updateFeature);
}
}
// **********************************
// get all exceptions for this rule
// **********************************
if (ruleList[r].getRuleExceptions() != null) {
// iterate over all match type annotation update features and add
// them to the rule
ExceptionDocument.Exception[] exceptions = ruleList[r]
.getRuleExceptions().getExceptionArray();
for (int x = 0; x < exceptions.length; x++) {
String exceptionMatchType = exceptions[x].getMatchType();
String regexPattern = exceptions[x].getStringValue();
// create new Exception object and add them to the rule
org.apache.uima.annotator.regex.RuleException exception = new RuleException_impl(
exceptionMatchType, regexPattern);
rule.addException(exception);
}
}
// add rule to the concept
concept.addRule(rule);
}
// **************************************
// get all annotations for this concept
// **************************************
CreateAnnotationsDocument.CreateAnnotations annotations = concepts[i]
.getCreateAnnotations();
AnnotationDocument.Annotation[] annotationList = annotations
.getAnnotationArray();
for (int a = 0; a < annotationList.length; a++) {
// create annotation position objects
int beginMatchGroup = annotationList[a].getBegin().getGroup().intValue();
int beginLocation = annotationList[a].getBegin().getLocation()
.intValue();
int endMatchGroup = annotationList[a].getEnd().getGroup().intValue();
int endLocation = annotationList[a].getEnd().getLocation()
.intValue();
Position begin = new Position_impl(beginMatchGroup, beginLocation);
Position end = new Position_impl(endMatchGroup, endLocation);
// create annotation object
String id = annotationList[a].getId();
String type = annotationList[a].getType();
String validationClass = annotationList[a].getValidate();
Annotation annotation = new Annotation_impl(id, type, begin, end,
validationClass);
// read out feature values and add it to the annotation
SetFeatureDocument.SetFeature[] features = annotationList[a]
.getSetFeatureArray();
for (int f = 0; f < features.length; f++) {
String name = features[f].getName();
int featureType = features[f].getType().intValue();
String value = features[f].getStringValue();
int normalization = 0;
if (features[f].getNormalization() != null) {
normalization = features[f].getNormalization().intValue();
}
String implClass = features[f].getClass1();
Feature feature = new Feature_impl(featureType, name, value,
normalization, implClass);
annotation.addFeature(feature);
}
// add annotation to rule
concept.addAnnotation(annotation);
}
conceptList.add(concept);
}
return conceptList.toArray(new Concept[0]);
}
}