blob: 5f7175b5c2330756cf70495063a2e58e71321a24 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.ruta.textruler.learner.lp2;
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.ruta.engine.RutaEngine;
import org.apache.uima.ruta.textruler.TextRulerPlugin;
import org.apache.uima.ruta.textruler.core.TextRulerAnnotation;
import org.apache.uima.ruta.textruler.core.TextRulerExample;
import org.apache.uima.ruta.textruler.core.TextRulerRule;
import org.apache.uima.ruta.textruler.core.TextRulerRuleList;
import org.apache.uima.ruta.textruler.core.TextRulerStatisticsCollector;
import org.apache.uima.ruta.textruler.core.TextRulerTarget;
import org.apache.uima.ruta.textruler.core.TextRulerTarget.MLTargetType;
import org.apache.uima.ruta.textruler.core.TextRulerToolkit;
import org.apache.uima.ruta.textruler.extension.TextRulerLearnerDelegate;
import org.apache.uima.ruta.textruler.learner.lp2.LP2RuleItem.MLLP2ContextConstraint;
import org.apache.uima.ruta.textruler.learner.lp2.LP2RuleItem.MLLP2OtherConstraint;
import org.apache.uima.util.FileUtils;
public class OptimizedLP2 extends BasicLP2 {
public static final boolean SAVE_DEBUG_INFO_IN_TEMPFOLDER = false;
private Map<String, TextRulerStatisticsCollector> cachedTestedStartRuleStatistics = new HashMap<String, TextRulerStatisticsCollector>();
private long cachedTestedStartRuleStatisticsHitCounter = 0;
public OptimizedLP2(String inputDir, String prePropTMFile, String tmpDir, String[] slotNames,
Set<String> filterSet, boolean skip, TextRulerLearnerDelegate delegate) {
super(inputDir, prePropTMFile, tmpDir, slotNames, filterSet, skip, delegate);
}
@Override
protected TextRulerRuleList learnTaggingRules(TextRulerTarget target,
TextRulerRuleList contextualRules) {
cachedTestedStartRuleStatisticsHitCounter = 0;
cachedTestedStartRuleStatistics.clear();
TextRulerRuleList result = super.learnTaggingRules(target, contextualRules);
TextRulerToolkit
.log("[OptimizedLP2.learnTaggingRules] saved rule testings due to start rule results cache: "
+ cachedTestedStartRuleStatisticsHitCounter);
TextRulerToolkit.log("[OptimizedLP2.learnTaggingRules] cacheSize at end of induction: "
+ cachedTestedStartRuleStatistics.size());
cachedTestedStartRuleStatistics.clear();
return result;
}
@Override
protected void induceRulesFromExample(final TextRulerExample e, final int roundNumber) {
// in order to get cache optimized contextual start rules testing, we
// simply create
// and add the ctxStartRules HERE (not when we want to create a
// contextual rule!) to
// all startRules, test those startRules + ctxStartRules cacheOptimized
// on the trainings-Set
// and then have their results in our covering-statistics-cache for
// later ctx rules creation!
List<LP2Rule> startRules = createStartRulesForExample(e);
if (startRules.size() < 1)
return;
List<LP2Rule> ctxStartRules = new ArrayList<LP2Rule>();
// TODO set isCorrectionRuleMode = true ?!
if (!(e.getTarget().isLeftCorrection() || e.getTarget().isRightCorrection()))
ctxStartRules = createContextStartRulesForStartRule(startRules.get(0));
List<LP2Rule> rulesToTest = new ArrayList<LP2Rule>(ctxStartRules.size() + startRules.size());
rulesToTest.addAll(startRules);
rulesToTest.addAll(ctxStartRules);
sendStatusUpdateToDelegate(
"Round " + roundNumber + " - Testing " + rulesToTest.size() + " start rules... "
+ " - uncovered examples: "
+ (examples.size() - coveredExamples.size() + " / " + examples.size())
+ " cs: " + cachedTestedStartRuleStatistics.size(),
TextRulerLearnerState.ML_RUNNING, false);
testStartRulesIfNotCached(rulesToTest);
if (shouldAbort())
return;
// int i=0;
// for (LP2Rule r : startRules)
// {
// i++;
// sendStatusUpdateToDelegate("Round "+roundNumber+" - Testing start rule "+i+"/"+startRules.size()+
// " - uncovered examples: "+
// (examples.size()-coveredExamples.size() + " / "+examples.size())+
// "cs:"+cachedTestedStartRuleStatistics.size(),
// TextRulerLearnerState.ML_RUNNING, false);
// testStartRuleIfNotCached(r);
// if (shouldAbort())
// return;
// if (TextRulerToolkit.DEBUG)
// {
// if
// (!r.getCoveringStatistics().getCoveredPositiveExamples().contains(e))
// {
// TextRulerToolkit.log("A START RULE MUST (!) COVER ITs POSITIVE EXAMPLE! OTHEREWISE, THERE IS SOMETHING WRONG!!");
// }
// }
// }
Comparator<LP2Rule> cmp = new Comparator<LP2Rule>() {
public int compare(LP2Rule o1, LP2Rule o2) {
return o1.getCoveringStatistics().getCoveredPositivesCount()
- o2.getCoveringStatistics().getCoveredPositivesCount();
}
};
// sort from low to high positive coverage in order to higher the
// pruning probability while recursion:
Collections.sort(startRules, cmp);
Collections.sort(ctxStartRules, cmp);
while ((startRules.size() > 0)
&& (startRules.get(0).getCoveringStatistics().getCoveredPositivesCount() < minCoveredPositives))
startRules.remove(0);
while ((ctxStartRules.size() > 0)
&& (ctxStartRules.get(0).getCoveringStatistics().getCoveredPositivesCount() < minCoveredPositives))
ctxStartRules.remove(0);
sendStatusUpdateToDelegate("Round " + roundNumber + " - Creating all generalizations..."
+ " - uncovered examples: "
+ (examples.size() - coveredExamples.size() + " / " + examples.size()),
TextRulerLearnerState.ML_RUNNING, false);
// only for debugging purposes: List<LP2Rule> resultRules = new
// ArrayList<LP2Rule>();
ArrayList<LP2Rule> debugRuleCollector = null;
if (TextRulerToolkit.DEBUG && SAVE_DEBUG_INFO_IN_TEMPFOLDER) {
debugRuleCollector = new ArrayList<LP2Rule>();
}
if (!recursiveCreateAllRuleCombinations(startRules, ctxStartRules, 0, new ArrayList<LP2Rule>(),
null, debugRuleCollector))
return; // aborted!
if (TextRulerToolkit.DEBUG && debugRuleCollector != null && SAVE_DEBUG_INFO_IN_TEMPFOLDER) {
// TextRulerToolkit.log("all combinations: "+debugRuleCollector.size());
Collections.sort(debugRuleCollector, new Comparator<LP2Rule>() {
public int compare(LP2Rule o1, LP2Rule o2) {
return o1.getRuleString().compareTo(o2.getRuleString());
}
});
String startend = e.getTarget().type == MLTargetType.SINGLE_LEFT_BOUNDARY ? "left_"
: "right_";
File file = new File(tempDirectory() + startend + "generalizations" + roundNumber
+ RutaEngine.SCRIPT_FILE_EXTENSION);
StringBuffer str = new StringBuffer();
for (TextRulerRule rule : debugRuleCollector) {
str.append(rule.getCoveringStatistics() + "\t\t" + rule.getRuleString() + "\n");
}
try {
FileUtils.saveString2File(str.toString(), file);
} catch (Exception ex) {
TextRulerPlugin.error(ex);
}
// TextRulerToolkit.log("----");
}
}
protected void testStartRulesIfNotCached(List<LP2Rule> startRules) {
List<TextRulerRule> rulesToTest = new ArrayList<TextRulerRule>();
for (LP2Rule r : startRules) {
String key = r.getRuleString();
if (cachedTestedStartRuleStatistics.containsKey(key)) {
r.setCoveringStatistics(cachedTestedStartRuleStatistics.get(key).copy());
cachedTestedStartRuleStatisticsHitCounter++;
} else
rulesToTest.add(r);
}
if (rulesToTest.size() > 0) {
testRulesOnDocumentSet(rulesToTest, exampleDocuments);
if (shouldAbort())
return;
for (TextRulerRule r : rulesToTest) {
String key = r.getRuleString();
cachedTestedStartRuleStatistics.put(key, r.getCoveringStatistics().copy());
}
}
}
protected LP2Rule combineRulesToOneRule(List<LP2Rule> ruleList,
TextRulerStatisticsCollector covering) {
// TextRulerToolkit.log("----------------------------------------------");
// for (LP2Rule r : ruleList)
// TextRulerToolkit.log(r+" ; "+r.getCoveringStatistics());
LP2Rule rule = new LP2Rule(this, ruleList.get(0).getTarget());
int maxPreCount = 0;
int maxPostCount = 0;
for (LP2Rule r : ruleList) {
if (r.getPreFillerPattern().size() > maxPreCount)
maxPreCount = r.getPreFillerPattern().size();
if (r.getPostFillerPattern().size() > maxPostCount)
maxPostCount = r.getPostFillerPattern().size();
}
for (int i = 0; i < maxPreCount; i++) {
LP2RuleItem newItem = new LP2RuleItem();
for (LP2Rule r : ruleList)
if (i < r.getPreFillerPattern().size()) {
LP2RuleItem rItem = (LP2RuleItem) r.getPreFillerPattern().get(
r.getPreFillerPattern().size() - i - 1);
if (rItem.getWordConstraint() != null)
newItem.setWordConstraint(rItem.getWordConstraint().copy());
if (rItem.getContextConstraint() != null)
newItem.setContextConstraint(rItem.getContextConstraint().copy());
// for (String key : rItem.getOtherConstraints().keySet())
// newItem.setOtherConstraint(key,
// rItem.getOtherConstraints().get(key).copy());
for (MLLP2OtherConstraint c : rItem.getOtherConstraints())
newItem.addOtherConstraint(c.copy());
}
rule.addPreFillerItem(newItem);
}
for (int i = 0; i < maxPostCount; i++) {
LP2RuleItem newItem = new LP2RuleItem();
for (LP2Rule r : ruleList)
if (i < r.getPostFillerPattern().size()) {
LP2RuleItem rItem = (LP2RuleItem) r.getPostFillerPattern().get(i);
if (rItem.getWordConstraint() != null)
newItem.setWordConstraint(rItem.getWordConstraint().copy());
if (rItem.getContextConstraint() != null)
newItem.setContextConstraint(rItem.getContextConstraint().copy());
// for (String key : rItem.getOtherConstraints().keySet())
// newItem.setOtherConstraint(key,
// rItem.getOtherConstraints().get(key).copy());
for (MLLP2OtherConstraint c : rItem.getOtherConstraints())
newItem.addOtherConstraint(c.copy());
}
rule.addPostFillerItem(newItem);
}
rule.setCoveringStatistics(covering.copy());
return rule;
}
protected boolean recursiveCreateAllRuleCombinations(final List<LP2Rule> startRules,
final List<LP2Rule> ctxStartRules, final int index, final List<LP2Rule> currentRuleTuple,
final TextRulerStatisticsCollector currentCovering, final List<LP2Rule> debugRuleCollector) {
if (index > startRules.size() - 1) {
if (shouldAbort())
return false;
if (currentRuleTuple.size() > 0) {
LP2Rule newRule = createAndCheckRuleFromStartRules(currentRuleTuple, currentCovering,
ctxStartRules);
if (debugRuleCollector != null)
debugRuleCollector.add(newRule);
}
} else {
// recurse WITHOUT and WITH this start rule:
if (!recursiveCreateAllRuleCombinations(startRules, ctxStartRules, index + 1,
currentRuleTuple, currentCovering, debugRuleCollector))
return false;
// only recurse WITH this rule, if the constraint it adds does
// really create a new rule!
// if a word constraint of the same item is already present, we do
// not need to add any other constraint of that
// token, since it does not make sense to add more constraints to
// the obviously most specific rule !
LP2Rule candidateRule = startRules.get(index);
boolean isPre = candidateRule.isPreFillerStartRule();
boolean containsWordConstraint = false;
for (LP2Rule r : currentRuleTuple)
if (r.isPreFillerStartRule() == isPre) {
if (isPre)
containsWordConstraint = r.getPreFillerPattern().size() == candidateRule
.getPreFillerPattern().size();
else
containsWordConstraint = r.getPostFillerPattern().size() == candidateRule
.getPostFillerPattern().size();
if (containsWordConstraint)
break;
}
if (!containsWordConstraint) {
// and calculate intersection of coverings:
TextRulerStatisticsCollector newCovering;
if (currentCovering != null)
newCovering = getCoveringIntersection(currentCovering,
candidateRule.getCoveringStatistics());
else
newCovering = candidateRule.getCoveringStatistics();
// prune all rules that go below our minCoveredPositives
// threshold!
if (newCovering.getCoveredPositivesCount() >= minCoveredPositives) {
// add rule to configuration tuple
currentRuleTuple.add(candidateRule);
if (!recursiveCreateAllRuleCombinations(startRules, ctxStartRules, index + 1,
currentRuleTuple, newCovering, debugRuleCollector))
return false;
currentRuleTuple.remove(currentRuleTuple.size() - 1);
}
}
}
return true;
}
protected TextRulerStatisticsCollector getCoveringIntersection(
final TextRulerStatisticsCollector c1, final TextRulerStatisticsCollector c2) {
// calculate intersections of coverings:
TextRulerStatisticsCollector resultC = new TextRulerStatisticsCollector(c1);
resultC.getCoveredPositiveExamples().retainAll(c2.getCoveredPositiveExamples());
resultC.getCoveredNegativeExamples().retainAll(c2.getCoveredNegativeExamples());
resultC.reflectCountsFromCoveredExamples();
return resultC;
}
protected LP2Rule createAndCheckRuleFromStartRules(final List<LP2Rule> startRules,
final TextRulerStatisticsCollector covering, final List<LP2Rule> ctxStartRules) {
LP2Rule newRule = combineRulesToOneRule(startRules, covering);
// TextRulerToolkit.log("COMBINED RULE: "+newRule.getRuleString()+" ; "+newRule.getCoveringStatistics());
boolean tooFewPositives = newRule.getCoveringStatistics().getCoveredPositivesCount() < minCoveredPositives;
boolean tooManyErrors = newRule.getErrorRate() > maxErrorThreshold;
boolean isBestRule = !(tooFewPositives || tooManyErrors);
if (TextRulerToolkit.DEBUG && SAVE_DEBUG_INFO_IN_TEMPFOLDER)
TextRulerToolkit.appendStringToFile(tempDirectory() + "bestcandidates"
+ RutaEngine.SCRIPT_FILE_EXTENSION, newRule.getRuleString() + "\n");
if (isBestRule) {
currentBestRules.add(newRule);
currentBestRules.removeSubsumedRules();
currentBestRules.cutToMaxSize();
} else if (!tooFewPositives && (ctxStartRules.size() > 0)) {
// new: use precalculated ctx startrules:
for (LP2Rule ctxStartRule : ctxStartRules) {
MLLP2ContextConstraint ctxConstraint = ctxStartRule.getMarkingRuleItem()
.getContextConstraint();
LP2Rule newCTXRule = newRule.copy();
newCTXRule.setIsContextualRule(true);
newCTXRule.getMarkingRuleItem().setContextConstraint(ctxConstraint.copy());
newCTXRule.setNeedsCompile(true);
newCTXRule.compileRuleString();
newCTXRule.setCoveringStatistics(getCoveringIntersection(newRule.getCoveringStatistics(),
ctxStartRule.getCoveringStatistics()));
// if
// (newCTXRule.getCoveringStatistics().getCoveredPositivesCount()
// < 1)
// {
// TextRulerToolkit.log("ERROR!");
// }
boolean ctxTooFewPositives = newCTXRule.getCoveringStatistics().getCoveredPositivesCount() < minCoveredPositives;
boolean ctxTooManyErrors = newCTXRule.getErrorRate() > maxErrorThreshold;
boolean isGoodCTXRule = !(ctxTooFewPositives || ctxTooManyErrors);
// TextRulerToolkit.log("CTXRULE : "+newCTXRule.getRuleString()
// + " ; "+newCTXRule.getCoveringStatistics());
if (TextRulerToolkit.DEBUG && SAVE_DEBUG_INFO_IN_TEMPFOLDER)
TextRulerToolkit.appendStringToFile(tempDirectory() + "ctxcandidates"
+ RutaEngine.SCRIPT_FILE_EXTENSION, newCTXRule.getRuleString() + "\n");
if (isGoodCTXRule) {
currentContextualRules.add(newCTXRule);
currentContextualRules.removeSubsumedRules();
currentContextualRules.cutToMaxSize();
}
}
}
return newRule;
}
protected LP2Rule createStartRuleForConstraint(final TextRulerTarget target,
final int contextSize, final boolean isLeftContext, final LP2RuleItem constraintItem) {
LP2Rule newRule = new LP2Rule(this, target);
// add contextSize-1 ANY items
for (int j = 0; j < contextSize - 1; j++)
if (isLeftContext)
newRule.addPreFillerItem(new LP2RuleItem());
else
newRule.addPostFillerItem(new LP2RuleItem());
// add 1 constraint item:
if (isLeftContext)
newRule.addPreFillerItem(constraintItem);
else
newRule.addPostFillerItem(constraintItem);
// if we are building the left context start rules for LEFT BOUNDARIES,
// we need at least ONE
// empty ANY item as the marking item:
if (isLeftContext
&& (target.type == MLTargetType.SINGLE_LEFT_BOUNDARY || target.type == MLTargetType.SINGLE_LEFT_CORRECTION))
newRule.addPostFillerItem(new LP2RuleItem());
// otherwise, if we build the right context rules for RIGHT BOUNDARY
// RULES, we need at least ONE
// empty ANY item on the LEFT as the marking item:
else if (!isLeftContext
&& (target.type == MLTargetType.SINGLE_RIGHT_BOUNDARY || target.type == MLTargetType.SINGLE_RIGHT_CORRECTION))
newRule.addPreFillerItem(new LP2RuleItem());
newRule.setIsPreFillerStartRule(isLeftContext);
// if (isLeftContext)
// newRule.setStartRuleItemIndex(0);
// else
// newRule.setStartRuleItemIndex(newRule.getPreFillerPattern().size()+newRule.getPostFillerPattern().size()-1);
return newRule;
}
protected List<LP2Rule> createContextStartRulesForStartRule(final LP2Rule aStartRule) {
List<LP2Rule> result = new ArrayList<LP2Rule>();
// TODO make all other tags contextual tags here. for now we take only
// the counterpart
// tag of the current learning process: (opening/closing tags)
LP2RuleItem ctxItem = new LP2RuleItem();
MLLP2ContextConstraint ctxConstraint = new MLLP2ContextConstraint(
slotMaximumTokenCountMap.get(aStartRule.getTarget().getSingleSlotRawTypeName()),
aStartRule);
ctxItem.setContextConstraint(ctxConstraint);
LP2Rule ctxStartRule = new LP2Rule(this, aStartRule.getTarget());
ctxStartRule.setIsContextualRule(true);
if (aStartRule.getTarget().type == MLTargetType.SINGLE_LEFT_BOUNDARY)
ctxStartRule.addPostFillerItem(ctxItem);
else
ctxStartRule.addPreFillerItem(ctxItem);
result.add(ctxStartRule);
return result;
}
protected List<LP2Rule> createStartRulesForExample(final TextRulerExample example) {
TextRulerTarget target = example.getTarget();
List<LP2Rule> result = new ArrayList<LP2Rule>();
CAS docCas = example.getDocumentCAS();
TextRulerAnnotation exampleAnnotation = example.getAnnotation();
TypeSystem ts = docCas.getTypeSystem();
Type tokensRootType = ts.getType(TextRulerToolkit.RUTA_ANY_TYPE_NAME);
boolean isLeftBoundary = (target.type == MLTargetType.SINGLE_LEFT_BOUNDARY || target.type == MLTargetType.SINGLE_LEFT_CORRECTION);
int thePosition = isLeftBoundary ? exampleAnnotation.getBegin() : exampleAnnotation.getEnd();
List<AnnotationFS> leftContext = TextRulerToolkit.getAnnotationsBeforePosition(docCas,
thePosition, windowSize,
TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);
List<AnnotationFS> rightContext;
if (target.type == MLTargetType.SINGLE_LEFT_CORRECTION
|| target.type == MLTargetType.SINGLE_RIGHT_CORRECTION) {
rightContext = TextRulerToolkit.getAnnotationsAfterPosition(docCas, thePosition,
windowSize + 1, TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet),
tokensRootType);
rightContext.remove(0);
} else {
rightContext = TextRulerToolkit.getAnnotationsAfterPosition(docCas, thePosition, windowSize,
TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);
}
int totalCount = leftContext.size() + rightContext.size();
// LEFT CONTEXT (PRE FILLER PATTERN)
// result.add(createStartRuleForConstraint(example, 0, true, null));
for (int index = 0; index < totalCount; index++) {
boolean isPre = index < leftContext.size();
int prePostIndex = isPre ? index : index - leftContext.size();
AnnotationFS tokenAFS = isPre ? leftContext.get(leftContext.size() - 1 - prePostIndex)
: rightContext.get(prePostIndex);
TextRulerAnnotation tokenAnnotation = new TextRulerAnnotation(tokenAFS, example.getDocument());
LP2RuleItem wordItem = new LP2RuleItem();
// one rule with only the word constraint:
wordItem.setWordConstraint(tokenAnnotation);
result.add(createStartRuleForConstraint(example.getTarget(), prePostIndex + 1, isPre,
wordItem));
if (wordItem.getWordConstraint().isRegExpConstraint()) {
LP2RuleItem basicItem = new LP2RuleItem();
// basicItem.setOtherConstraint("basicTM", new
// MLLP2OtherConstraint(tokenAnnotation, tokenAnnotation));
basicItem.addOtherConstraint(new MLLP2OtherConstraint(tokenAnnotation, tokenAnnotation));
result.add(createStartRuleForConstraint(example.getTarget(), prePostIndex + 1, isPre,
basicItem));
}
// // POS-Tags created by our test hmm tagger.
// Type posTagsRootType = ts.getType("org.apache.uima.ml.ML.postag");
// if (posTagsRootType != null)
// {
// List<AnnotationFS> posTagAnnotations =
// TextRulerToolkit.getAnnotationsWithinBounds(example.getDocumentCAS(),
// tokenAnnotation.getBegin(), tokenAnnotation.getEnd(), null,
// posTagsRootType);
// if (posTagAnnotations.size()>0)
// {
// if (TextRulerToolkit.DEBUG && posTagAnnotations.size()>1)
// {
// TextRulerToolkit.logIfDebug("HOW CAN ONE TOKEN HAVE MORE THAN ONE POS TAG ?? "+tokenAnnotation.getBegin()+":"+tokenAnnotation.getEnd()+"="+tokenAnnotation.getCoveredText());
// for (AnnotationFS afs : posTagAnnotations)
// {
// System.out.print(afs.getType().getShortName()+":"+afs.getCoveredText()+" "+afs.getBegin()+":"+afs.getEnd()+"\n");
// }
// TextRulerToolkit.logIfDebug("");
// }
//
// TextRulerAnnotation posTagAnnotation = new
// TextRulerAnnotation(posTagAnnotations.get(0),
// example.getDocument());
// LP2RuleItem basicItem = new LP2RuleItem();
// basicItem.setOtherConstraint("postag", new
// MLLP2OtherConstraint(posTagAnnotation, posTagAnnotation));
// result.add(createStartRuleForConstraint(example.getTarget(),
// prePostIndex+1, isPre, basicItem));
// }
// }
// new dynamic system: grab everything we get from the annotation
// index that lies over this token:
// (annotations WITHIN (with smaller bounds than the token itself)
// are ignored for now! we could
// add using them with the CONTAINS constraint. but our
// MLLP2OtherConstraint is not yet capable of this!
List<AnnotationFS> featureAnnotations = TextRulerToolkit.getOtherAnnotationsOverToken(docCas,
tokenAFS, filterSetWithSlotNames);
if (TextRulerToolkit.DEBUG && featureAnnotations.size() > 1) {
TextRulerToolkit.log("FOUND MORE THAN ONE EXTRA TOKEN FEATURE ANNOTATION !");
for (AnnotationFS featA : featureAnnotations)
TextRulerToolkit.log(featA.toString());
TextRulerToolkit.log("--------------------------------");
}
for (AnnotationFS featA : featureAnnotations) {
TextRulerAnnotation featureAnnot = new TextRulerAnnotation(featA, example.getDocument());
LP2RuleItem basicItem = new LP2RuleItem();
basicItem.addOtherConstraint(new MLLP2OtherConstraint(tokenAnnotation, featureAnnot));
result.add(createStartRuleForConstraint(example.getTarget(), prePostIndex + 1, isPre,
basicItem));
}
}
// for (TextRulerRule r : result)
// {
// TextRulerToolkit.log("STARTRULE = "+r.getRuleString());
// }
return result;
}
@Override
public boolean collectNegativeCoveredInstancesWhenTesting() {
return true;
}
}