blob: ee7b4fbdb49380f62554b77fa7c2dfc0d98ceb40 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.ruta.textruler.learner.lp2;
import java.io.File;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.apache.commons.lang3.StringUtils;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.ruta.engine.RutaEngine;
import org.apache.uima.ruta.textruler.TextRulerPlugin;
import org.apache.uima.ruta.textruler.core.GlobalCASSource;
import org.apache.uima.ruta.textruler.core.TextRulerAnnotation;
import org.apache.uima.ruta.textruler.core.TextRulerBasicLearner;
import org.apache.uima.ruta.textruler.core.TextRulerExample;
import org.apache.uima.ruta.textruler.core.TextRulerExampleDocument;
import org.apache.uima.ruta.textruler.core.TextRulerRule;
import org.apache.uima.ruta.textruler.core.TextRulerRuleList;
import org.apache.uima.ruta.textruler.core.TextRulerShiftExample;
import org.apache.uima.ruta.textruler.core.TextRulerStatisticsCollector;
import org.apache.uima.ruta.textruler.core.TextRulerTarget;
import org.apache.uima.ruta.textruler.core.TextRulerTarget.MLTargetType;
import org.apache.uima.ruta.textruler.core.TextRulerToolkit;
import org.apache.uima.ruta.textruler.extension.TextRulerLearnerDelegate;
import org.apache.uima.util.FileUtils;
public abstract class BasicLP2 extends TextRulerBasicLearner {
public static final String WINDOW_SIZE_KEY = "windowSize";
public static final String CURRENT_BEST_RULES_SIZE_KEY = "currentBestRulesSize";
public static final String CURRENT_CONTEXTUAL_RULES_SIZE_KEY = "currentContextualRulesSize";
public static final String MIN_COVERED_POSITIVES_PER_RULE_KEY = "minCoveredPositivesPerRule";
public static final String MAX_ERROR_THRESHOLD_KEY = "maxErrorThreshold";
public static final int STANDARD_WINDOW_SIZE = 2;
public static final int STANDARD_MAX_CURRENT_BEST_RULES_COUNT = 4;
public static final int STANDARD_MAX_CONTEXTUAL_RULES_COUNT = 4;
public static final int STANDARD_MIN_COVERED_POSITIVES_PER_RULE = 1;
public static final float STANDARD_MAX_ERROR_THRESHOLD = 0.1f;
public static final String CORRECTION_ANNOTATION_NAME = "lp2shift";
private static final int STANDARD_SHIFT_SIZE = 2;
protected int maxCurrentBestRulesCount = STANDARD_MAX_CURRENT_BEST_RULES_COUNT;
protected int maxCurrentContextualRulesCount = STANDARD_MAX_CONTEXTUAL_RULES_COUNT;
protected int windowSize = STANDARD_WINDOW_SIZE;
protected int shiftSize = STANDARD_SHIFT_SIZE;
protected int minCoveredPositives = STANDARD_MIN_COVERED_POSITIVES_PER_RULE;
protected float maxErrorThreshold = STANDARD_MAX_ERROR_THRESHOLD;
protected List<TextRulerExample> examples;
protected Set<TextRulerExample> coveredExamples;
protected Map<String, Integer> slotMaximumTokenCountMap = new TreeMap<String, Integer>();
protected LP2CurrentBestRulesQueue currentBestRules;
protected LP2CurrentBestRulesQueue currentContextualRules;
protected Map<String, TextRulerRuleList> bestRulesPoolMap = new TreeMap<String, TextRulerRuleList>();
protected Map<String, TextRulerRuleList> contextRulesPoolMap = new TreeMap<String, TextRulerRuleList>();
protected Map<String, String> leftBoundaryBestRulesMap = new TreeMap<String, String>();
protected Map<String, String> rightBoundaryBestRulesMap = new TreeMap<String, String>();
protected Map<String, String> leftBoundaryContextualRulesMap = new TreeMap<String, String>();
protected Map<String, String> rightBoundaryContextualRulesMap = new TreeMap<String, String>();
public BasicLP2(String inputDir, String prePropTMFile, String tmpDir, String[] slotNames,
Set<String> filterSet, boolean skip, TextRulerLearnerDelegate delegate) {
super(inputDir, prePropTMFile, tmpDir, slotNames, filterSet, skip, delegate);
supportBoundaries = true;
}
protected TextRulerRuleList learnTaggingRules(TextRulerTarget target,
TextRulerRuleList contextualRules) {
if (target.type == MLTargetType.SINGLE_LEFT_BOUNDARY)
sendStatusUpdateToDelegate("Creating Left-Boundary Examples...",
TextRulerLearnerState.ML_RUNNING, false);
else if (target.type == MLTargetType.SINGLE_RIGHT_BOUNDARY)
sendStatusUpdateToDelegate("Creating Right-Boundary Examples...",
TextRulerLearnerState.ML_RUNNING, false);
else if (target.type == MLTargetType.SINGLE_LEFT_CORRECTION)
sendStatusUpdateToDelegate("Creating Left Correction Examples...",
TextRulerLearnerState.ML_RUNNING, false);
else
// if (target.type == MLTargetType.SINGLE_RIGHT_CORRECTION)
sendStatusUpdateToDelegate("Creating Right Correction Examples...",
TextRulerLearnerState.ML_RUNNING, false);
exampleDocuments.clearCurrentExamples();
exampleDocuments.createExamplesForTarget(target);
examples = exampleDocuments.getAllPositiveExamples();
if (shouldAbort())
return null;
TextRulerRuleList bestRulesPool = new TextRulerRuleList();
TextRulerRuleList contextRulesPool = new TextRulerRuleList();
String slotName = target.getSingleSlotRawTypeName();
bestRulesPoolMap.put(slotName, bestRulesPool);
contextRulesPoolMap.put(slotName, contextRulesPool);
coveredExamples = new HashSet<TextRulerExample>();
int roundNumber = 0;
for (TextRulerExample e : examples)
if (!coveredExamples.contains(e)) {
if (shouldAbort())
break;
roundNumber++;
currentBestRules = new LP2CurrentBestRulesQueue(maxCurrentBestRulesCount);
currentContextualRules = new LP2CurrentBestRulesQueue(maxCurrentContextualRulesCount);
// TextRulerToolkit.log("Example: "+e.getAnnotation().getBegin()+" : "+e.getAnnotation().getEnd());
induceRulesFromExample(e, roundNumber);
// TextRulerToolkit.log("Best Rules from this Seed: "+currentBestRules.size());
// if (TextRulerToolkit.DEBUG && currentBestRules.size()>1)
// {
// for (TextRulerRule r : currentBestRules)
// {
// TextRulerToolkit.log("\tp="+r.getCoveringStatistics().getCoveredPositivesCount()+"; n="+r.getCoveringStatistics().getCoveredNegativesCount()+"; "+r.getRuleString());
// for (TextRulerExample ex :
// r.getCoveringStatistics().getCoveredPositiveExamples())
// {
// TextRulerToolkit.log("\t\te="+ex.getAnnotation().getBegin());
//
// }
// }
// }
for (LP2Rule bestRule : currentBestRules) {
addToFinalBestRulesPool(bestRule);
}
for (LP2Rule ctxRule : currentContextualRules) {
addToFinalContextRulesPool(ctxRule);
}
sendStatusUpdateToDelegate("New Rules added.", TextRulerLearnerState.ML_RUNNING, true);
}
if (TextRulerToolkit.DEBUG) {
bestRulesPool.saveToRulesFile(getIntermediateRulesFileName(), getFileHeaderString(true));
// for (TextRulerRule r : bestRulesPool)
// {
// TextRulerToolkit.log("p="+r.getCoveringStatistics().getCoveredPositivesCount()+"; n="+r.getCoveringStatistics().getCoveredNegativesCount()+"; "+r.getRuleString());
// }
}
TextRulerRuleList result = bestRulesPool;
if (contextualRules != null)
for (TextRulerRule r : contextRulesPool)
contextualRules.add(r);
return result;
}
@Override
public CAS loadCAS(String fileName, CAS reuseCAS) {
CAS cas = super.loadCAS(fileName, reuseCAS);
prepareCASWithBoundaries(cas);
return cas;
}
public void prepareCASWithBoundaries(CAS cas) {
for (String slotName : slotNames)
TextRulerExampleDocument.createBoundaryAnnotationsForCas(cas, slotName, filterSet);
}
public void prepareCachedCASesWithBoundaries() {
for (CAS cas : exampleDocuments.getCachedCASes())
prepareCASWithBoundaries(cas);
}
@Override
protected void cleanUp() {
super.cleanUp();
examples = null;
coveredExamples = null;
currentBestRules = null;
currentContextualRules = null;
bestRulesPoolMap.clear();
contextRulesPoolMap.clear();
}
@Override
protected void doRun() {
TextRulerToolkit.logIfDebug("--- LP2 START");
prepareCachedCASesWithBoundaries(); // if some cases are already loaded,
// prepare them! all others get prepared when loaded (see loadCAS)
for (int i = 0; i < slotNames.length; i++) {
runForSlotName(slotNames[i]);
}
sendStatusUpdateToDelegate("Done", TextRulerLearnerState.ML_DONE, true);
TextRulerToolkit.logIfDebug("--- LP2 END");
}
protected void runForSlotName(String slotName) {
// 1. get slot length histogram in order to find maximum slot length
// (counted in tokens)
sendStatusUpdateToDelegate("Creating slot length histogram...",
TextRulerLearnerState.ML_RUNNING, false);
List<Integer> histogram = exampleDocuments.getTokenCountHistogrammForSlotName(slotName,
TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet));
if (shouldAbort())
return;
slotMaximumTokenCountMap.put(slotName, histogram.size() - 1); // -1 since the
// zero-histogram point
// also needs a place!
TextRulerRuleList ctxRules = new TextRulerRuleList();
TextRulerRuleList bestRules = learnTaggingRules(new TextRulerTarget(slotName,
MLTargetType.SINGLE_LEFT_BOUNDARY, this), ctxRules); // learn
// left
// boundary
// best
// rules
if (bestRules != null) {
leftBoundaryBestRulesMap.put(slotName, bestRules.getRulesString(""));
leftBoundaryContextualRulesMap.put(slotName, ctxRules.getRulesString("\t"));
bestRules.clear(); // free som memory/references
}
if (shouldAbort())
return;
ctxRules.clear();
bestRules = learnTaggingRules(new TextRulerTarget(slotName, MLTargetType.SINGLE_RIGHT_BOUNDARY,
this), ctxRules); // learn
// right
// boundary best
// rules
if (bestRules != null) {
rightBoundaryBestRulesMap.put(slotName, bestRules.getRulesString(""));
rightBoundaryContextualRulesMap.put(slotName, ctxRules.getRulesString("\t"));
}
// TODO add correction rule learn stuff
// testTaggingRulesAndCreateCorrectionRulesExamples(null, STANDARD_MAX_CONTEXTUAL_RULES_COUNT)
// correct left start
TextRulerTarget lsTarget = new TextRulerTarget(slotName, MLTargetType.SINGLE_LEFT_CORRECTION,
this);
lsTarget.setMaxShiftDistance(shiftSize);
TextRulerRuleList correctLeftRules = learnTaggingRules(lsTarget, null);
// resultString = "CAP{REGEXP(\"PM\")} ALL{->MARKONCE(stimeEND)};";
// try {
// FileUtils.saveString2File(resultString, file);
// } catch (IOException e) {
// }
// correct right start
// TextRulerTarget rsTarget = new TextRulerTarget(slotName,
// MLTargetType.SINGLE_RIGHT_CORRECTION,
// this);
// rsTarget.setMaxShiftDistance(shiftSize);
// TextRulerRuleList correctRightRules = learnTaggingRules(rsTarget, null);
//
sendStatusUpdateToDelegate("SLOT Done", TextRulerLearnerState.ML_RUNNING, true);
TextRulerToolkit.logIfDebug("--- LP2 END FOR SLOT:" + slotName);
}
protected abstract void induceRulesFromExample(TextRulerExample e, int roundNumber);
protected void addToFinalContextRulesPool(LP2Rule rule) {
if (TextRulerToolkit.DEBUG)
TextRulerToolkit.appendStringToFile(tempDirectory() + "ctxpool"
+ RutaEngine.SCRIPT_FILE_EXTENSION, rule.getRuleString() + "\n");
String slotName = rule.getTarget().getSingleSlotRawTypeName();
if (!contextRulesPoolMap.get(slotName).contains(rule)) {
contextRulesPoolMap.get(slotName).add(rule);
// TextRulerToolkit.log("CONTEXT RULE: "+rule.getRuleString()+" ; "+rule.getCoveringStatistics());
} else {
if (TextRulerToolkit.DEBUG) {
TextRulerToolkit.appendStringToFile(tempDirectory() + "ctxpool"
+ RutaEngine.SCRIPT_FILE_EXTENSION, "\tDUPLICATE\n");
}
}
}
protected void addToFinalBestRulesPool(LP2Rule rule) {
if (TextRulerToolkit.DEBUG && false)
TextRulerToolkit.appendStringToFile(tempDirectory() + "bestpool"
+ RutaEngine.SCRIPT_FILE_EXTENSION, rule.getRuleString() + "\n");
String slotName = rule.getTarget().getSingleSlotRawTypeName();
if (!bestRulesPoolMap.get(slotName).contains(rule)) {
bestRulesPoolMap.get(slotName).add(rule);
// TextRulerToolkit.log("BEST RULE: "+rule.getRuleString());
// add all covered positives to covering set
coveredExamples.addAll(rule.getCoveringStatistics().getCoveredPositiveExamples());
if (TextRulerToolkit.DEBUG)
bestRulesPoolMap.get(slotName).saveToRulesFile(getIntermediateRulesFileName(),
getFileHeaderString(false));
} else {
if (TextRulerToolkit.DEBUG && false) {
TextRulerToolkit.log("KANN SOWAS PASSIEREN ??");
TextRulerToolkit.appendStringToFile(tempDirectory() + "bestpool"
+ RutaEngine.SCRIPT_FILE_EXTENSION, "\tDUPLICATE\n");
}
}
}
public String getResultString() {
StringBuilder sb = new StringBuilder();
String header = getFileHeaderString(true);
sb.append(header);
for (String eachSlot : slotNames) {
String leftBoundaryBestRulesString = leftBoundaryBestRulesMap.get(eachSlot);
String rightBoundaryBestRulesString = rightBoundaryBestRulesMap.get(eachSlot);
String leftBoundaryContextualRulesString = leftBoundaryContextualRulesMap.get(eachSlot);
String rightBoundaryContextualRulesString = rightBoundaryContextualRulesMap.get(eachSlot);
TextRulerRuleList bestRulesPool = bestRulesPoolMap.get(eachSlot);
TextRulerRuleList contextRulesPool = contextRulesPoolMap.get(eachSlot);
sb.append("\n// Slot: " + TextRulerToolkit.getTypeShortName(eachSlot) + "\n");
sb.append("// LEFT BOUNDARY RULES:\n");
if (leftBoundaryBestRulesString != null) {
sb.append(leftBoundaryBestRulesString);
sb.append("\n// RIGHT BOUNDARY RULES:\n");
if (rightBoundaryBestRulesString != null)
sb.append(rightBoundaryBestRulesString);
else if (bestRulesPool != null)
sb.append(bestRulesPool.getRulesString(""));
sb.append("\nBLOCK(contextualRules_" + TextRulerToolkit.getTypeShortName(eachSlot)
+ ") Document{} {\n"
+ "\tDocument{->ASSIGN(redoContextualRules, false)}; // reset flag\n");
sb.append("\n\t// LEFT BOUNDARY CONTEXTUAL RULES:\n");
sb.append(leftBoundaryContextualRulesString);
sb.append("\n\t// RIGHT BOUNDARY CONTEXTUAL RULES:\n");
if (rightBoundaryBestRulesString != null)
sb.append(rightBoundaryContextualRulesString);
else if (contextRulesPool != null)
sb.append(contextRulesPool.getRulesString("\t"));
sb.append("\n\t//Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_"
+ TextRulerToolkit.getTypeShortName(eachSlot) + ")};\n}\n");
} else if (bestRulesPool != null) {
sb.append(bestRulesPool.getRulesString(""));
sb.append("\n\t// LEFT BOUNDARY CONTEXTUAL RULES:\n");
if (contextRulesPool != null)
sb.append(contextRulesPool.getRulesString(""));
}
}
for (String eachSlot : slotNames) {
String leftBoundary = TextRulerToolkit.getTypeShortName((new TextRulerTarget(eachSlot,
MLTargetType.SINGLE_LEFT_BOUNDARY, this)).getSingleSlotTypeName());
String rightBoundary = TextRulerToolkit.getTypeShortName((new TextRulerTarget(eachSlot,
MLTargetType.SINGLE_RIGHT_BOUNDARY, this)).getSingleSlotTypeName());
String slotMarkName = TextRulerToolkit.getTypeShortName(eachSlot);
int maxInnerLength = (getMaxTokens(eachSlot) * 3) - 2;
sb.append("\n//slot-building rules:\n");
sb.append(leftBoundary + "{IS(" + rightBoundary + ")->UNMARK(" + leftBoundary + "), UNMARK("
+ rightBoundary + "), MARKONCE(" + slotMarkName + ")};\n");
sb.append(leftBoundary + "{->UNMARK(" + leftBoundary + ")} ");
if (maxInnerLength > 0) {
sb.append("ANY[0, " + maxInnerLength + "]? ");
sb.append(rightBoundary + "{->UNMARK(" + rightBoundary + "), MARKONCE(" + slotMarkName
+ ", 1, 3)};\n");
} else
sb.append(rightBoundary + "{->UNMARK(" + rightBoundary + "), MARKONCE(" + slotMarkName
+ ", 1, 2)};\n");
sb.append("\n//cleaning up:\n" + leftBoundary + "{->UNMARK(" + leftBoundary + ")};\n"
+ rightBoundary + "{->UNMARK(" + rightBoundary + ")};\n");
}
return sb.toString();
}
private Integer getMaxTokens(String slot) {
if (slotMaximumTokenCountMap.get(slot) == null) {
return 0;
}
return slotMaximumTokenCountMap.get(slot);
}
public void setParameters(Map<String, Object> params) {
if (TextRulerToolkit.DEBUG)
saveParametersToTempFolder(params);
// TODO try catch
if (params.containsKey(WINDOW_SIZE_KEY))
windowSize = (Integer) params.get(WINDOW_SIZE_KEY);
if (params.containsKey(CURRENT_BEST_RULES_SIZE_KEY))
maxCurrentBestRulesCount = (Integer) params.get(CURRENT_BEST_RULES_SIZE_KEY);
if (params.containsKey(CURRENT_CONTEXTUAL_RULES_SIZE_KEY))
maxCurrentContextualRulesCount = (Integer) params.get(CURRENT_CONTEXTUAL_RULES_SIZE_KEY);
if (params.containsKey(MIN_COVERED_POSITIVES_PER_RULE_KEY))
minCoveredPositives = (Integer) params.get(MIN_COVERED_POSITIVES_PER_RULE_KEY);
if (params.containsKey(MAX_ERROR_THRESHOLD_KEY))
maxErrorThreshold = (Float) params.get(MAX_ERROR_THRESHOLD_KEY);
}
protected String correctionRulesInputDirectory(TextRulerTarget target) {
if (target.isLeftBoundary())
return tempDirectory() + "leftCorrectionDocs";
else
return tempDirectory() + "rightCorrectionDocs";
}
protected boolean testTaggingRulesAndCreateCorrectionRulesExamples(TextRulerTarget target,
int maxDistance) {
try {
File dir = new File(correctionRulesInputDirectory(target));
if (!dir.exists())
dir.mkdir();
exampleDocuments.clearCurrentExamples();
exampleDocuments.createExamplesForTarget(target);
examples = exampleDocuments.getAllPositiveExamples();
TextRulerExampleDocument[] sortedDocs = exampleDocuments
.getSortedDocumentsInCacheOptimizedOrder();
TypeSystem ts = sortedDocs[0].getCAS().getTypeSystem();
Type tokensRootType = ts.getType(TextRulerToolkit.RUTA_ANY_TYPE_NAME);
// String allRulesContent = getResultString();
String allRulesContent = FileUtils.file2String(new File("/testinput/testrules/rules"
+ RutaEngine.SCRIPT_FILE_EXTENSION));
FileUtils.saveString2File(allRulesContent, new File(getTempRulesFileName()));
CAS testCAS = getTestCAS();
for (TextRulerExampleDocument doc : sortedDocs) {
TextRulerStatisticsCollector c = new TextRulerStatisticsCollector();
doc.resetAndFillTestCAS(testCAS, target);
CAS docCAS = doc.getCAS();
ae.process(testCAS);
compareOriginalDocumentWithTestCAS(doc, testCAS, target, c, true); // test whole ruleset and
// collect negative
// examples
// now we have some covered positive examples that are good, and
// maybe some negative examples
// for that we might create Correction Rules... in order to do
// that we have to create
// ShiftExamples and map negative examples (incorrect inserted
// boundaries) with a specific
// distance to an original positive example...
// TODO should that be done in both directions ? left and right
// ?! what happes if we
// find two potential examples, one left, one right ? --> for
// now: use the nearer one. if
// exactly the same distance, use the one where the wrong tag
// would be IN the slot filler!
List<TextRulerExample> correctTags = doc.getPositiveExamples();
List<TextRulerExample> wrongTags = new ArrayList<TextRulerExample>(
c.getCoveredNegativeExamples());
List<TextRulerShiftExample> newExamples = new ArrayList<TextRulerShiftExample>();
for (TextRulerExample wrongTag : wrongTags) {
// test, if there's a corresponding positive example
// somewhere around (within maxDistance)
List<AnnotationFS> left = TextRulerToolkit.getAnnotationsBeforePosition(docCAS, wrongTag
.getAnnotation().getBegin(), maxDistance, TextRulerToolkit
.getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);
List<AnnotationFS> right = TextRulerToolkit.getAnnotationsAfterPosition(docCAS, wrongTag
.getAnnotation().getEnd(), maxDistance, TextRulerToolkit
.getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);
// TODO stop after the first found match or create one bad
// example for each found occurence ??!!
// for now: stop after one ! so create only ONE bad
// example...
int leftDistance = 0;
TextRulerExample leftCorrectTag = null;
for (int i = left.size() - 1; i >= 0; i--) {
leftDistance++;
TextRulerAnnotation needle = TextRulerToolkit.convertToTargetAnnotation(left.get(i),
doc, target, docCAS.getTypeSystem());
leftCorrectTag = TextRulerToolkit.exampleListContainsAnnotation(correctTags, needle);
if (leftCorrectTag != null)
break;
}
int rightDistance = 0;
TextRulerExample rightCorrectTag = null;
for (AnnotationFS fs : right) {
rightDistance++;
TextRulerAnnotation needle = TextRulerToolkit.convertToTargetAnnotation(fs, doc,
target, docCAS.getTypeSystem());
rightCorrectTag = TextRulerToolkit.exampleListContainsAnnotation(correctTags, needle);
if (rightCorrectTag != null)
break;
}
TextRulerExample theCorrectTag = null;
if (rightDistance < leftDistance && rightCorrectTag != null)
theCorrectTag = rightCorrectTag;
else if (rightDistance > leftDistance && leftCorrectTag != null)
theCorrectTag = leftCorrectTag;
else // use the one that would lie in the slot filler:
{
if (target.type == MLTargetType.SINGLE_LEFT_BOUNDARY && rightCorrectTag != null)
theCorrectTag = rightCorrectTag;
else
theCorrectTag = leftCorrectTag;
}
if (theCorrectTag != null) {
TextRulerToolkit.log("FOUND BAD EXAMPLE FOR SHIFTING !!");
TextRulerShiftExample shiftExample = new TextRulerShiftExample(doc,
wrongTag.getAnnotation(), theCorrectTag.getAnnotation(), true, target);
newExamples.add(shiftExample);
}
}
TextRulerToolkit
.writeCAStoXMIFile(testCAS, dir + File.pathSeparator + doc.getCasFileName());
}
testCAS.reset();
} catch (Exception e) {
TextRulerPlugin.error(e);
return false;
}
return true;
}
@Override
public String getFileHeaderString(boolean complete) {
return super.getFileHeaderString(complete) + "BOOLEAN redoContextualRules;\n\n";
}
@Override
protected boolean checkForMandatoryTypes() {
if (!super.checkForMandatoryTypes()) {
return false;
}
CAS someCas = getTestCAS();
TypeSystem ts = someCas.getTypeSystem();
// GlobalCASSource.releaseCAS(someCas);
// check if all helper types are present:
List<String> list = new ArrayList<String>();
for (String eachSlot : slotNames) {
list.add(new TextRulerTarget(eachSlot, MLTargetType.SINGLE_LEFT_BOUNDARY, this)
.getSingleSlotTypeName());
list.add(new TextRulerTarget(eachSlot, MLTargetType.SINGLE_RIGHT_BOUNDARY, this)
.getSingleSlotTypeName());
}
boolean result = true;
List<String> missingTypes = new ArrayList<String>();
for (String s : list) {
if (ts.getType(s) == null) {
missingTypes.add(s);
result = false;
}
}
String missingString = "";
for (String string : missingTypes) {
missingString += string + ", ";
}
if (!StringUtils.isEmpty(missingString)) {
missingString = missingString.substring(0, missingString.length() - 2);
}
if (!result) {
sendStatusUpdateToDelegate("Error: Some Slot- or Helper-Types were not found in TypeSystem: "
+ missingString, TextRulerLearnerState.ML_ERROR, false);
}
return result;
}
}