blob: 0242e063299897818939e7e86d1f231ee91eea1e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.annotator.regex.impl;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.uima.annotator.regex.Feature;
import org.apache.uima.annotator.regex.FeaturePath;
import org.apache.uima.annotator.regex.FilterFeature;
import org.apache.uima.annotator.regex.RegexVariables;
import org.apache.uima.annotator.regex.Rule;
import org.apache.uima.annotator.regex.RuleException;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.resource.ResourceInitializationException;
/**
* Implementation of the Rule interface.
*/
public class Rule_impl implements Rule {
// rule regex string
private String regex;
// rule ID
private final String id;
// rule confidence value
private final float confidence;
// rule match type as string value
private final String matchTypeStr;
// rule featurePath object
private FeaturePath_impl featurePath;
// true if a featurePath was specified for this rule
private boolean isFeaturePathMatch = false;
// rule match strategy
private final int matchStrategy;
// compiled rule regex pattern
private Pattern pattern;
// resolved rule match type
private Type matchType;
// rule filter features
private ArrayList<FilterFeature> filterFeatures;
// rule update features
private ArrayList<Feature> updateFeatures;
// rule exceptions
private ArrayList<RuleException> exceptions;
// concept variables
private RegexVariables variables;
private HashMap<String, Integer> matchGroupNames;
/**
* Constructor to create a new Rule object.
*
* @param regex
* regex pattern as String
* @param matchStrategy
* matching strategy
* @param matchType
* match type as String
* @param id
* rule id (can also be null)
* @param confidence
* confidence value
* @param featurePath
* featurePath (can also be null)
*/
public Rule_impl(String regex, int matchStrategy, String matchType,
String id, float confidence, String featurePath,
RegexVariables variables) {
this.regex = regex;
this.matchStrategy = matchStrategy;
this.matchTypeStr = matchType;
this.filterFeatures = new ArrayList<FilterFeature>();
this.updateFeatures = new ArrayList<Feature>();
this.exceptions = new ArrayList<RuleException>();
this.pattern = null;
this.id = id;
this.confidence = confidence;
this.featurePath = new FeaturePath_impl(featurePath);
// set FeaturePath matching mode if a feature path is specified
if (featurePath != null) {
this.isFeaturePathMatch = true;
}
this.variables = variables;
this.matchGroupNames = new HashMap<String, Integer>();
}
/*
* (non-Javadoc)
*
* @see org.apache.uima.annotator.regex.Rule#addFilterFeature(org.apache.uima.annotator.regex.Feature)
*/
public void addFilterFeature(FilterFeature aFeature) {
this.filterFeatures.add(aFeature);
}
/*
* (non-Javadoc)
*
* @see org.apache.uima.annotator.regex.Rule#getMatchTypeFilterFeatures()
*/
public FilterFeature[] getMatchTypeFilterFeatures() {
return this.filterFeatures.toArray(new FilterFeature[0]);
}
/*
* (non-Javadoc)
*
* @see org.apache.uima.annotator.regex.Rule#getMatchStrategy()
*/
public int getMatchStrategy() {
return this.matchStrategy;
}
/*
* (non-Javadoc)
*
* @see org.apache.uima.annotator.regex.Rule#getMatchType()
*/
public Type getMatchType() {
return this.matchType;
}
/*
* (non-Javadoc)
*
* @see org.apache.uima.annotator.regex.Rule#getRegex()
*/
public Pattern getRegexPattern() {
return this.pattern;
}
/*
* (non-Javadoc)
*
* @see org.apache.uima.annotator.regex.Rule#getConfidence()
*/
public float getConfidence() {
return this.confidence;
}
/*
* (non-Javadoc)
*
* @see org.apache.uima.annotator.regex.Rule#getId()
*/
public String getId() {
return this.id;
}
/*
* (non-Javadoc)
*
* @see org.apache.uima.annotator.regex.Rule#addUpdateFeature(org.apache.uima.annotator.regex.Feature)
*/
public void addUpdateFeature(Feature aFeature) {
this.updateFeatures.add(aFeature);
}
/*
* (non-Javadoc)
*
* @see org.apache.uima.annotator.regex.Rule#getMatchTypeUpdateFeatures()
*/
public Feature[] getMatchTypeUpdateFeatures() {
return this.updateFeatures.toArray(new Feature[0]);
}
/*
* (non-Javadoc)
*
* @see org.apache.uima.annotator.regex.Rule#addException(org.apache.uima.annotator.regex.Exception)
*/
public void addException(RuleException aException) {
this.exceptions.add(aException);
}
/*
* (non-Javadoc)
*
* @see org.apache.uima.annotator.regex.Rule#getExceptions()
*/
public RuleException[] getExceptions() {
return this.exceptions.toArray(new RuleException[0]);
}
/*
* (non-Javadoc)
*
* @see org.apache.uima.annotator.regex.Rule#getMatchTypeFeaturePath()
*/
public FeaturePath getMatchTypeFeaturePath() {
return this.featurePath;
}
/*
* (non-Javadoc)
*
* @see org.apache.uima.annotator.regex.Rule#isFeaturePathMatch()
*/
public boolean isFeaturePathMatch() {
return this.isFeaturePathMatch;
}
/**
* @param ts
* @throws ResourceInitializationException
*/
public void typeInit(TypeSystem ts) throws ResourceInitializationException {
// initialize the match type
if (this.matchTypeStr != null) {
this.matchType = ts.getType(this.matchTypeStr);
if (this.matchType == null) {
throw new RegexAnnotatorConfigException(
"regex_annotator_error_resolving_types",
new Object[] { this.matchTypeStr });
}
}
// initialize match type filters
FilterFeature[] filterFeats = getMatchTypeFilterFeatures();
for (int i = 0; i < filterFeats.length; i++) {
((FilterFeature_impl) filterFeats[i]).typeInit(this.matchType);
}
// initialize match type update features
Feature[] updateFeats = getMatchTypeUpdateFeatures();
for (int i = 0; i < updateFeats.length; i++) {
((Feature_impl) updateFeats[i]).typeInit(this.matchType);
}
// initialize rule exceptions
RuleException[] ruleExceptions = getExceptions();
for (int i = 0; i < ruleExceptions.length; i++) {
((RuleException_impl) ruleExceptions[i]).typeInit(ts);
}
// initialize featurePath element
this.featurePath.initialize(this.matchType);
}
/**
* @throws RegexAnnotatorConfigException
*/
public void initialize() throws RegexAnnotatorConfigException {
// check if regular expression contains a regex variable, it must be
// replaced first
if (this.regex.indexOf(RegexVariables.VARIABLE_START) > -1) {
// we have to replace the regex variables
replaceRegexVariables();
}
// evaluate match group names
if (this.regex.indexOf(Rule.MATCH_GROUP_START) > -1) {
evaluateMatchGroupNames();
}
// compile regex
this.pattern = Pattern.compile(this.regex);
// initialize match type filters
FilterFeature[] filterFeats = getMatchTypeFilterFeatures();
for (int i = 0; i < filterFeats.length; i++) {
((FilterFeature_impl) filterFeats[i]).initialize();
}
// initialize match type update features
Feature[] updateFeats = getMatchTypeUpdateFeatures();
for (int i = 0; i < updateFeats.length; i++) {
((Feature_impl) updateFeats[i]).initialize();
}
// initialize rule exceptions
RuleException[] ruleExceptions = getExceptions();
for (int i = 0; i < ruleExceptions.length; i++) {
((RuleException_impl) ruleExceptions[i]).initialize();
}
}
/*
* (non-Javadoc)
*
* @see org.apache.uima.annotator.regex.Rule#getMatchGroupNumber(java.lang.String)
*/
public int getMatchGroupNumber(String matchGroupName) {
Integer value = this.matchGroupNames.get(matchGroupName.toLowerCase());
if (value != null) {
return value.intValue();
} else {
return -1;
}
}
/**
* replace the variables used in the regular expression pattern
*
* @throws RegexAnnotatorConfigException
*/
private void replaceRegexVariables() throws RegexAnnotatorConfigException {
// create a regex matcher for the variable pattern
Matcher matcher = RegexVariables.VARIABLE_REGEX_PATTERN
.matcher(this.regex);
// find all variables in the regular expression
int pos = 0;
HashSet<String> variableSet = new HashSet<String>();
while (matcher.find(pos)) {
// get match area for match group 1
int varStart = matcher.start(1);
int varEnd = matcher.end(1);
// add match group 1 content (variable name) to the variable list
variableSet.add(this.regex.substring(varStart, varEnd));
// current end match position
pos = matcher.end();
}
// replace all found variables in the regular expression
for (String variableName : variableSet) {
// check if variables are defined
if (this.variables == null) {
throw new RegexAnnotatorConfigException(
"regex_annotator_error_variable_not_found", new Object[] {
variableName, this.id });
} else {
// get variable value for the variable name
String varValue = this.variables.getVariableValue(variableName);
if (varValue != null) {
// create variable expression that must be replaced
String variablePattern = RegexVariables.VARIABLE_REGEX_BEGIN
+ variableName + RegexVariables.VARIABLE_REGEX_END;
// replace variable with the variable value. quote for . and $
this.regex = this.regex.replaceAll(variablePattern, Matcher.quoteReplacement(varValue));
} else {
throw new RegexAnnotatorConfigException(
"regex_annotator_error_variable_not_found", new Object[] {
variableName, this.id });
}
}
}
}
/**
* replace the variables used in the regular expression pattern
*
* @throws RegexAnnotatorConfigException
*/
private void evaluateMatchGroupNames() {
// create a regex matcher for the match group pattern
Matcher matcher = Rule.MATCH_GROUP_REGEX_PATTERN.matcher(this.regex);
ArrayList<String> names = new ArrayList<String>();
// find all match group names in the regular expression
int pos = 0;
while (matcher.find(pos)) {
// get match area for match group 1
int varStart = matcher.start(1);
int varEnd = matcher.end(1);
// count match groups
int groupCounter = 1;
for (int i = 0; i < varEnd; i++) {
if (this.regex.charAt(i) == '(') {
if (this.regex.charAt(i + 1) != '?') {
groupCounter++;
}
}
}
String matchGroupName = this.regex.substring(varStart, varEnd);
// add first match group content (match group name) to the variable list
this.matchGroupNames.put(matchGroupName.toLowerCase(), Integer.valueOf(
groupCounter));
// store match group name with original case
names.add(matchGroupName);
// current end match position
pos = matcher.end();
}
// replace all found match group names in the regular expression - never
// needed
for (String matchGroupName : names) {
// create variable expression that must be replaced
String matchGroupNamePattern = Rule.MATCH_GROUP_REGEX_BEGIN
+ matchGroupName + Rule.MATCH_GROUP_REGEX_END;
// replace variable with the variable value
this.regex = this.regex.replaceAll(matchGroupNamePattern, "");
}
}
/*
* (non-Javadoc)
*
* @see java.lang.Object#toString()
*/
public String toString() {
StringBuffer buffer = new StringBuffer();
buffer.append("Rule ");
if (this.id != null) {
buffer.append(this.id);
}
buffer.append("\n");
buffer.append("Regex: ");
buffer.append(this.regex);
if (this.matchStrategy == Rule.MATCH_ALL) {
buffer.append("\nMatch strategy: MATCH_ALL");
} else if (this.matchStrategy == Rule.MATCH_COMPLETE) {
buffer.append("\nMatch strategy: MATCH_COMPLETE");
} else if (this.matchStrategy == Rule.MATCH_FIRST) {
buffer.append("\nMatch strategy: MATCH_FIRST");
}
buffer.append("\nMatch type: ");
buffer.append(this.matchTypeStr);
buffer.append("\nFeaturePath: ");
buffer.append(this.featurePath.getFeaturePath());
if (this.confidence != 0.0) {
buffer.append("\nConfidence: ");
buffer.append(this.confidence);
}
FilterFeature[] filterFeats = getMatchTypeFilterFeatures();
if (filterFeats.length > 0) {
buffer.append("\nMatch type filter features: \n");
}
for (int i = 0; i < filterFeats.length; i++) {
buffer.append(filterFeats[i].toString());
}
buffer.append("\n");
RuleException[] ruleExceptions = getExceptions();
if (ruleExceptions.length > 0) {
buffer.append("\nRule exceptions: \n");
}
for (int i = 0; i < ruleExceptions.length; i++) {
buffer.append(ruleExceptions[i].toString());
}
buffer.append("\n");
return buffer.toString();
}
}