blob: c26ac22880b3eef498206e783769b216d2f75c64 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.ruta.rule;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.TreeSet;
import org.apache.uima.cas.ConstraintFactory;
import org.apache.uima.cas.FSMatchConstraint;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.ruta.RutaBlock;
import org.apache.uima.ruta.RutaStream;
import org.apache.uima.ruta.constraint.BasicTypeConstraint;
import org.apache.uima.ruta.expression.MatchReference;
import org.apache.uima.ruta.expression.RutaExpression;
import org.apache.uima.ruta.expression.feature.FeatureExpression;
import org.apache.uima.ruta.expression.feature.FeatureMatchExpression;
import org.apache.uima.ruta.expression.type.TypeExpression;
import org.apache.uima.ruta.type.RutaBasic;
public class RutaTypeMatcher implements RutaMatcher {
private static final boolean CHECK_ON_FEATURE = false;
protected final MatchReference mr;
protected AnnotationComparator comparator;
public RutaTypeMatcher(MatchReference mr) {
super();
this.mr = mr;
this.comparator = new AnnotationComparator();
}
public Collection<AnnotationFS> getMatchingAnnotations(RutaStream stream, RutaBlock parent) {
// TODO what about the matching direction?
Collection<AnnotationFS> annotations = new TreeSet<AnnotationFS>(comparator);
List<Type> types = getTypes(parent, stream);
for (Type type : types) {
if (type == null) {
continue;
}
String name = type.getName();
RutaBasic firstBasicOfAll = stream.getFirstBasicOfAll();
if ("uima.tcas.DocumentAnnotation".equals(name)
|| "org.apache.uima.ruta.type.Document".equals(name)
|| (stream.getDocumentAnnotationType().getName().equals(name) && (firstBasicOfAll != null && firstBasicOfAll
.beginsWith(type)))) {
// TODO what about dynamic windowing?
annotations.add(stream.getDocumentAnnotation());
} else {
stream.moveToFirst();
while (stream.isValid()) {
RutaBasic nextBasic = (RutaBasic) stream.get();
List<Type> allTypes = stream.getCas().getTypeSystem().getProperlySubsumedTypes(type);
allTypes.add(type);
for (Type eachType : allTypes) {
Collection<AnnotationFS> beginAnchors = nextBasic.getBeginAnchors(eachType);
if (beginAnchors != null) {
for (AnnotationFS afs : beginAnchors) {
if (afs.getEnd() <= stream.getDocumentAnnotation().getEnd()) {
annotations.add(afs);
}
}
}
}
stream.moveToNext();
}
}
}
FeatureExpression featureExpression = mr.getFeatureExpression(parent, stream);
if (featureExpression != null) {
return featureExpression.getFeatureAnnotations(annotations, stream, parent, CHECK_ON_FEATURE);
} else {
return annotations;
}
}
public Collection<AnnotationFS> getAnnotationsAfter(RutaRuleElement ruleElement,
AnnotationFS annotation, RutaStream stream, RutaBlock parent) {
RutaBasic lastBasic = stream.getEndAnchor(annotation.getEnd());
int end = 0;
if (lastBasic == null) {
if (annotation.getEnd() != 0) {
return Collections.emptyList();
}
} else {
end = lastBasic.getEnd();
}
if (annotation.getEnd() > 0) {
stream.moveTo(lastBasic);
if (stream.isVisible(lastBasic)) {
stream.moveToNext();
}
} else {
stream.moveToFirst();
}
if (stream.isValid()) {
RutaBasic nextBasic = (RutaBasic) stream.get();
// TODO HOTFIX for annotation of length 0
while (stream.isValid() && nextBasic.getBegin() < end) {
stream.moveToNext();
if (stream.isValid()) {
nextBasic = (RutaBasic) stream.get();
}
}
List<Type> reTypes = getTypes(parent, stream);
Collection<AnnotationFS> anchors = new TreeSet<AnnotationFS>(comparator);
for (Type eachMatchType : reTypes) {
List<Type> types = stream.getCas().getTypeSystem().getProperlySubsumedTypes(eachMatchType);
types.add(eachMatchType);
for (Type eachType : types) {
Collection<AnnotationFS> beginAnchors = nextBasic.getBeginAnchors(eachType);
if (beginAnchors != null) {
for (AnnotationFS afs : beginAnchors) {
if (afs.getEnd() <= stream.getDocumentAnnotation().getEnd()) {
anchors.add(afs);
}
}
}
}
}
FeatureExpression fm = mr.getFeatureExpression(parent, stream);
if (fm != null) {
return fm.getFeatureAnnotations(anchors, stream, parent, CHECK_ON_FEATURE);
} else {
return anchors;
}
}
return Collections.emptyList();
}
public Collection<AnnotationFS> getAnnotationsBefore(RutaRuleElement ruleElement,
AnnotationFS annotation, RutaStream stream, RutaBlock parent) {
RutaBasic firstBasic = stream.getBeginAnchor(annotation.getBegin());
if (firstBasic == null) {
return Collections.emptyList();
}
stream.moveTo(firstBasic);
if (stream.isVisible(firstBasic)) {
stream.moveToPrevious();
}
if (stream.isValid()) {
RutaBasic nextBasic = (RutaBasic) stream.get();
// TODO HOTFIX for annotation of length 0
while (stream.isValid() && nextBasic.getEnd() > firstBasic.getBegin()) {
stream.moveToPrevious();
if (stream.isValid()) {
nextBasic = (RutaBasic) stream.get();
}
}
List<Type> reTypes = getTypes(parent, stream);
Collection<AnnotationFS> anchors = new TreeSet<AnnotationFS>(comparator);
for (Type eachMatchType : reTypes) {
List<Type> types = stream.getCas().getTypeSystem().getProperlySubsumedTypes(eachMatchType);
types.add(eachMatchType);
for (Type eachType : types) {
Collection<AnnotationFS> endAnchors = nextBasic.getEndAnchors(eachType);
if (endAnchors != null) {
for (AnnotationFS afs : endAnchors) {
if (afs.getBegin() >= stream.getDocumentAnnotation().getBegin()) {
anchors.add(afs);
}
}
}
}
}
FeatureExpression fm = mr.getFeatureExpression(parent, stream);
if (fm != null) {
return fm.getFeatureAnnotations(anchors, stream, parent, CHECK_ON_FEATURE);
} else {
return anchors;
}
}
return Collections.emptyList();
}
public FSMatchConstraint createAnchorConstraints(RutaBlock block, RutaStream stream) {
ConstraintFactory cf = stream.getCas().getConstraintFactory();
List<Type> types = getTypes(block, stream);
FSMatchConstraint result = null;
for (Type eachType : types) {
BasicTypeConstraint anchorConstraint = new BasicTypeConstraint(cf.createTypeConstraint(),
eachType);
anchorConstraint.add(eachType);
if (result != null) {
result = cf.or(result, anchorConstraint);
} else {
result = anchorConstraint;
}
}
return result;
}
public boolean match(AnnotationFS annotation, RutaStream stream, RutaBlock parent) {
if (annotation == null) {
return false;
}
FeatureExpression featureExpression = mr.getFeatureExpression(parent, stream);
if (featureExpression == null) {
boolean b = checkType(annotation, stream, parent);
if (b) {
return true;
}
} else {
boolean b = checkFeature(annotation, stream, parent);
if (b) {
return true;
}
}
return false;
}
private boolean checkType(AnnotationFS annotation, RutaStream stream, RutaBlock parent) {
List<Type> types = getTypes(parent, stream);
for (Type type : types) {
String name = type.getName();
if ("uima.tcas.DocumentAnnotation".equals(name)
|| stream.getDocumentAnnotationType().getName().equals(name)) {
return true;
}
boolean b = stream.getJCas().getTypeSystem().subsumes(type, annotation.getType());
if (b) {
return true;
}
}
return false;
}
private boolean checkFeature(AnnotationFS annotation, RutaStream stream, RutaBlock parent) {
FeatureExpression fe = mr.getFeatureExpression(parent, stream);
Feature feature = fe.getFeature(parent);
if (fe instanceof FeatureMatchExpression) {
FeatureMatchExpression fme = (FeatureMatchExpression) fe;
boolean checkFeatureValue = fme.checkFeatureValue(annotation, stream, parent);
if (checkFeatureValue) {
return true;
}
} else {
TypeSystem typeSystem = stream.getCas().getTypeSystem();
boolean subsumes = typeSystem.subsumes(feature.getRange(), annotation.getType());
if (subsumes) {
return true;
}
}
return false;
}
@Override
public String toString() {
return mr.toString();
}
public RutaExpression getExpression() {
return mr;
}
protected Type getType(TypeExpression expression, RutaBlock parent, RutaStream stream) {
Type type = expression.getType(parent);
if (type != null && "uima.tcas.DocumentAnnotation".equals(type.getName())) {
return stream.getDocumentAnnotationType();
}
return type;
}
public long estimateAnchors(RutaBlock parent, RutaStream stream) {
TypeExpression typeExpression = mr.getTypeExpression(parent, stream);
return stream.getHistogram(getType(typeExpression, parent, stream));
}
public List<Type> getTypes(RutaBlock parent, RutaStream stream) {
List<Type> result = new ArrayList<Type>(1);
TypeExpression typeExpression = mr.getTypeExpression(parent, stream);
Type type = getType(typeExpression, parent, stream);
result.add(type);
return result;
}
}