blob: f788dee4f24768ffd1fc5fa6fdba23e78eea42a1 [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.apache.uima.ruta.textruler.learner.whisk.generic;
import java.util.ArrayList;
import java.util.List;
import org.apache.uima.cas.Type;
import org.apache.uima.ruta.textruler.core.TextRulerAnnotation;
import org.apache.uima.ruta.textruler.core.TextRulerRule;
import org.apache.uima.ruta.textruler.core.TextRulerRuleItem;
import org.apache.uima.ruta.textruler.core.TextRulerWordConstraint;
public class WhiskRuleItem implements TextRulerRuleItem {
private TextRulerWordConstraint wordConstraint;
private boolean isStarWildCard = false;
private boolean hideRegExp = true;
private boolean hideFeature = true;
protected List<MLWhiskOtherConstraint> otherConstraints = new ArrayList<MLWhiskOtherConstraint>();
private List<String> activeFeatures = new ArrayList<String>();
public static class MLWhiskOtherConstraint {
TextRulerAnnotation tokenAnnotation;
TextRulerAnnotation constraintAnnotation;
boolean canBeAnchor;
Type type;
public MLWhiskOtherConstraint(TextRulerAnnotation tokenAnnotation,
TextRulerAnnotation constraintAnnotation) {
this.tokenAnnotation = tokenAnnotation;
this.constraintAnnotation = constraintAnnotation;
this.type = constraintAnnotation.getType();
canBeAnchor = (tokenAnnotation.getBegin() == constraintAnnotation.getBegin())
&& (tokenAnnotation.getEnd() == constraintAnnotation.getEnd());
// TODO is the matching END also a requirement ?
public boolean isTMBasicTypeTokenConstraint() {
return tokenAnnotation == constraintAnnotation;
public boolean canBeAnchorConstraint() {
return canBeAnchor;
public boolean equals(Object o) {
MLWhiskOtherConstraint co = (MLWhiskOtherConstraint) o;
return toString().equals(co.toString()) && (canBeAnchor == co.canBeAnchor);
public int hashCode() {
return toString().hashCode() * (canBeAnchor ? 2 : 1);
public String toString() {
return type.getShortName();
public MLWhiskOtherConstraint copy() {
return new MLWhiskOtherConstraint(tokenAnnotation, constraintAnnotation);
public WhiskRuleItem() {
wordConstraint = null;
public static WhiskRuleItem newWildCardItem() {
WhiskRuleItem i = new WhiskRuleItem();
return i;
public WhiskRuleItem(WhiskRuleItem copyFrom) {
if (copyFrom.wordConstraint != null)
wordConstraint = copyFrom.wordConstraint.copy();
isStarWildCard = copyFrom.isStarWildCard;
// termNumberInExample = copyFrom.termNumberInExample;
hideRegExp = copyFrom.hideRegExp;
for (MLWhiskOtherConstraint c : copyFrom.otherConstraints) {
this.activeFeatures = new ArrayList<String>(copyFrom.getActivatedFeatures());
public WhiskRuleItem(TextRulerAnnotation tokenAnnotation) {
setWordConstraint(new TextRulerWordConstraint(tokenAnnotation));
public void setWordConstraint(TextRulerWordConstraint c) {
wordConstraint = c;
public TextRulerWordConstraint getWordConstraint() {
return wordConstraint;
public TextRulerRuleItem copy() {
return new WhiskRuleItem(this);
public String getStringForRuleString(TextRulerRule rule, MLRuleItemType type,
int numberInPattern, int patternSize, int numberInRule, int ruleSize, int slotIndex) {
String result = "";
WhiskRule whiskRule = (WhiskRule) rule;
boolean isMarkingItem = type == MLRuleItemType.FILLER && numberInPattern == 0;
ArrayList<String> constraints = new ArrayList<String>();
String anchor = null;
if (wordConstraint != null) {
if (wordConstraint.isRegExpConstraint()) {
anchor = wordConstraint.typeShortName();
if (!hideRegExp)
constraints.add("REGEXP(\"" + wordConstraint + "\")");
} else
anchor = wordConstraint.toString();
MLWhiskOtherConstraint anchorConstraint = null;
if (anchor == null) {
for (MLWhiskOtherConstraint c : otherConstraints)
if (c.canBeAnchorConstraint()) {
anchorConstraint = c;
for (MLWhiskOtherConstraint oc : otherConstraints) {
if (oc != anchorConstraint) {
if (oc.canBeAnchorConstraint())
constraints.add("IS(" + oc + ")");
constraints.add("PARTOF(" + oc + ")");
if (anchor == null) {
if (anchorConstraint != null)
anchor = anchorConstraint.toString();
anchor = "ALL";
for (String featureString : activeFeatures) {
String stringValue = wordConstraint.getTokenAnnotation().getFeatureMap().get(featureString);
if(stringValue != null) {
constraints.add("FEATURE(\"" + featureString + "\", "+stringValue+")");
if (constraints.size() > 0) {
String cStr = "";
for (String constraintStr : constraints) {
if (cStr.length() > 0)
cStr += ", ";
cStr += constraintStr;
result += "{" + cStr;
if (!isMarkingItem)
result += "}";
if (isMarkingItem) {
if (constraints.size() == 0)
result += "{";
result += "->MARKONCE(" + whiskRule.getMarkName(slotIndex);
if (patternSize > 1)
result += ", " + (numberInRule + 1) + ", " + (numberInRule + patternSize);
result += ")}";
if (isStarWildCard) {
anchor += "*?";
if(anchor.equals("ALL*?")) {
anchor = "#";
return anchor + result;
public void setIsStarWildCard(boolean flag) {
isStarWildCard = flag;
public boolean isStarWildCard() {
return isStarWildCard;
// public void setTermNumberInExample(int i) {
// termNumberInExample = i;
// }
// public int getTermNumberInExample() {
// return termNumberInExample;
// }
public boolean equals(TextRulerRuleItem o) {
WhiskRuleItem it = (WhiskRuleItem) o;
if (wordConstraint != null)
if (!wordConstraint.equals(it.wordConstraint))
return false;
return isStarWildCard == it.isStarWildCard; // && termNumberInExample == it.termNumberInExample;
public String toString() {
return getStringForRuleString(null, null, 0, 0, 0, 0, 0);
public void setHideRegExp(boolean flag) {
hideRegExp = flag;
public boolean isHideRegExp() {
return hideRegExp;
public void addOtherConstraint(MLWhiskOtherConstraint c) {
if (!otherConstraints.contains(c))
public List<MLWhiskOtherConstraint> getOtherConstraints() {
return otherConstraints;
public int constraintPoints() {
int result = 0;
if (wordConstraint != null)
result += hideRegExp ? 1 : 3; // a regexp constraint is less general
// so point it bad here!
if (wordConstraint != null)
result += hideFeature ? 1 : 3;
result += otherConstraints.size();
return result;
public boolean isHideFeature() {
return hideFeature;
public void setHideFeature(boolean hideFeature) {
this.hideFeature = hideFeature;
public void activateFeature(String stringValue) {
public void deactivateFeature(String stringValue) {
public List<String> getActivatedFeatures() {
return activeFeatures;