trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/lp2/BasicLP2.java - uima-ruta - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.apache.uima.ruta.textruler.learner.lp2;

 import java.io.File;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;

 import org.apache.commons.lang3.StringUtils;
 import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.Type;
 import org.apache.uima.cas.TypeSystem;
 import org.apache.uima.cas.text.AnnotationFS;
 import org.apache.uima.ruta.engine.RutaEngine;
 import org.apache.uima.ruta.textruler.TextRulerPlugin;
 import org.apache.uima.ruta.textruler.core.GlobalCASSource;
 import org.apache.uima.ruta.textruler.core.TextRulerAnnotation;
 import org.apache.uima.ruta.textruler.core.TextRulerBasicLearner;
 import org.apache.uima.ruta.textruler.core.TextRulerExample;
 import org.apache.uima.ruta.textruler.core.TextRulerExampleDocument;
 import org.apache.uima.ruta.textruler.core.TextRulerRule;
 import org.apache.uima.ruta.textruler.core.TextRulerRuleList;
 import org.apache.uima.ruta.textruler.core.TextRulerShiftExample;
 import org.apache.uima.ruta.textruler.core.TextRulerStatisticsCollector;
 import org.apache.uima.ruta.textruler.core.TextRulerTarget;
 import org.apache.uima.ruta.textruler.core.TextRulerTarget.MLTargetType;
 import org.apache.uima.ruta.textruler.core.TextRulerToolkit;
 import org.apache.uima.ruta.textruler.extension.TextRulerLearnerDelegate;
 import org.apache.uima.util.FileUtils;

 public abstract class BasicLP2 extends TextRulerBasicLearner {

   public static final String WINDOW_SIZE_KEY = "windowSize";

   public static final String CURRENT_BEST_RULES_SIZE_KEY = "currentBestRulesSize";

   public static final String CURRENT_CONTEXTUAL_RULES_SIZE_KEY = "currentContextualRulesSize";

   public static final String MIN_COVERED_POSITIVES_PER_RULE_KEY = "minCoveredPositivesPerRule";

   public static final String MAX_ERROR_THRESHOLD_KEY = "maxErrorThreshold";

   public static final int STANDARD_WINDOW_SIZE = 2;

   public static final int STANDARD_MAX_CURRENT_BEST_RULES_COUNT = 4;

   public static final int STANDARD_MAX_CONTEXTUAL_RULES_COUNT = 4;

   public static final int STANDARD_MIN_COVERED_POSITIVES_PER_RULE = 1;

   public static final float STANDARD_MAX_ERROR_THRESHOLD = 0.1f;

   public static final String CORRECTION_ANNOTATION_NAME = "lp2shift";

   private static final int STANDARD_SHIFT_SIZE = 2;

   protected int maxCurrentBestRulesCount = STANDARD_MAX_CURRENT_BEST_RULES_COUNT;

   protected int maxCurrentContextualRulesCount = STANDARD_MAX_CONTEXTUAL_RULES_COUNT;

   protected int windowSize = STANDARD_WINDOW_SIZE;

   protected int shiftSize = STANDARD_SHIFT_SIZE;

   protected int minCoveredPositives = STANDARD_MIN_COVERED_POSITIVES_PER_RULE;

   protected float maxErrorThreshold = STANDARD_MAX_ERROR_THRESHOLD;

   protected List<TextRulerExample> examples;

   protected Set<TextRulerExample> coveredExamples;

   protected Map<String, Integer> slotMaximumTokenCountMap = new TreeMap<String, Integer>();

   protected LP2CurrentBestRulesQueue currentBestRules;

   protected LP2CurrentBestRulesQueue currentContextualRules;

   protected Map<String, TextRulerRuleList> bestRulesPoolMap = new TreeMap<String, TextRulerRuleList>();

   protected Map<String, TextRulerRuleList> contextRulesPoolMap = new TreeMap<String, TextRulerRuleList>();

   protected Map<String, String> leftBoundaryBestRulesMap = new TreeMap<String, String>();

   protected Map<String, String> rightBoundaryBestRulesMap = new TreeMap<String, String>();

   protected Map<String, String> leftBoundaryContextualRulesMap = new TreeMap<String, String>();

   protected Map<String, String> rightBoundaryContextualRulesMap = new TreeMap<String, String>();

   public BasicLP2(String inputDir, String prePropTMFile, String tmpDir, String[] slotNames,
           Set<String> filterSet, boolean skip, TextRulerLearnerDelegate delegate) {
     super(inputDir, prePropTMFile, tmpDir, slotNames, filterSet, skip, delegate);
     supportBoundaries = true;
   }

   protected TextRulerRuleList learnTaggingRules(TextRulerTarget target,
           TextRulerRuleList contextualRules) {
     if (target.type == MLTargetType.SINGLE_LEFT_BOUNDARY)
       sendStatusUpdateToDelegate("Creating Left-Boundary Examples...",
               TextRulerLearnerState.ML_RUNNING, false);
     else if (target.type == MLTargetType.SINGLE_RIGHT_BOUNDARY)
       sendStatusUpdateToDelegate("Creating Right-Boundary Examples...",
               TextRulerLearnerState.ML_RUNNING, false);
     else if (target.type == MLTargetType.SINGLE_LEFT_CORRECTION)
       sendStatusUpdateToDelegate("Creating Left Correction Examples...",
               TextRulerLearnerState.ML_RUNNING, false);
     else
       // if (target.type == MLTargetType.SINGLE_RIGHT_CORRECTION)
       sendStatusUpdateToDelegate("Creating Right Correction Examples...",
               TextRulerLearnerState.ML_RUNNING, false);
     exampleDocuments.clearCurrentExamples();
     exampleDocuments.createExamplesForTarget(target);
     examples = exampleDocuments.getAllPositiveExamples();

     if (shouldAbort())
       return null;
     TextRulerRuleList bestRulesPool = new TextRulerRuleList();
     TextRulerRuleList contextRulesPool = new TextRulerRuleList();
     String slotName = target.getSingleSlotRawTypeName();
     bestRulesPoolMap.put(slotName, bestRulesPool);
     contextRulesPoolMap.put(slotName, contextRulesPool);

     coveredExamples = new HashSet<TextRulerExample>();
     int roundNumber = 0;
     for (TextRulerExample e : examples)
       if (!coveredExamples.contains(e)) {
         if (shouldAbort())
           break;
         roundNumber++;
         currentBestRules = new LP2CurrentBestRulesQueue(maxCurrentBestRulesCount);
         currentContextualRules = new LP2CurrentBestRulesQueue(maxCurrentContextualRulesCount);
         // TextRulerToolkit.log("Example: "+e.getAnnotation().getBegin()+" : "+e.getAnnotation().getEnd());

         induceRulesFromExample(e, roundNumber);

         // TextRulerToolkit.log("Best Rules from this Seed: "+currentBestRules.size());
         // if (TextRulerToolkit.DEBUG && currentBestRules.size()>1)
         // {
         // for (TextRulerRule r : currentBestRules)
         // {
         // TextRulerToolkit.log("\tp="+r.getCoveringStatistics().getCoveredPositivesCount()+"; n="+r.getCoveringStatistics().getCoveredNegativesCount()+";  "+r.getRuleString());
         // for (TextRulerExample ex :
         // r.getCoveringStatistics().getCoveredPositiveExamples())
         // {
         // TextRulerToolkit.log("\t\te="+ex.getAnnotation().getBegin());
         //
         // }
         // }
         // }
         for (LP2Rule bestRule : currentBestRules) {
           addToFinalBestRulesPool(bestRule);
         }
         for (LP2Rule ctxRule : currentContextualRules) {
           addToFinalContextRulesPool(ctxRule);
         }
         sendStatusUpdateToDelegate("New Rules added.", TextRulerLearnerState.ML_RUNNING, true);
       }
     if (TextRulerToolkit.DEBUG) {
       bestRulesPool.saveToRulesFile(getIntermediateRulesFileName(), getFileHeaderString(true));
       // for (TextRulerRule r : bestRulesPool)
       // {
       // TextRulerToolkit.log("p="+r.getCoveringStatistics().getCoveredPositivesCount()+"; n="+r.getCoveringStatistics().getCoveredNegativesCount()+";  "+r.getRuleString());
       // }
     }

     TextRulerRuleList result = bestRulesPool;
     if (contextualRules != null)
       for (TextRulerRule r : contextRulesPool)
         contextualRules.add(r);
     return result;
   }

   @Override
   public CAS loadCAS(String fileName, CAS reuseCAS) {
     CAS cas = super.loadCAS(fileName, reuseCAS);
     prepareCASWithBoundaries(cas);
     return cas;
   }

   public void prepareCASWithBoundaries(CAS cas) {
     for (String slotName : slotNames)
       TextRulerExampleDocument.createBoundaryAnnotationsForCas(cas, slotName, filterSet);
   }

   public void prepareCachedCASesWithBoundaries() {
     for (CAS cas : exampleDocuments.getCachedCASes())
       prepareCASWithBoundaries(cas);
   }

   @Override
   protected void cleanUp() {
     super.cleanUp();
     examples = null;
     coveredExamples = null;
     currentBestRules = null;
     currentContextualRules = null;
     bestRulesPoolMap.clear();
     contextRulesPoolMap.clear();
   }

   @Override
   protected void doRun() {
     TextRulerToolkit.logIfDebug("--- LP2 START");

     prepareCachedCASesWithBoundaries(); // if some cases are already loaded,
     // prepare them! all others get prepared when loaded (see loadCAS)

     for (int i = 0; i < slotNames.length; i++) {
       runForSlotName(slotNames[i]);
     }

     sendStatusUpdateToDelegate("Done", TextRulerLearnerState.ML_DONE, true);
     TextRulerToolkit.logIfDebug("--- LP2 END");
   }

   protected void runForSlotName(String slotName) {
     // 1. get slot length histogram in order to find maximum slot length
     // (counted in tokens)

     sendStatusUpdateToDelegate("Creating slot length histogram...",
             TextRulerLearnerState.ML_RUNNING, false);
     List<Integer> histogram = exampleDocuments.getTokenCountHistogrammForSlotName(slotName,
             TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet));
     if (shouldAbort())
       return;
     slotMaximumTokenCountMap.put(slotName, histogram.size() - 1); // -1 since the
     // zero-histogram point
     // also needs a place!

     TextRulerRuleList ctxRules = new TextRulerRuleList();
     TextRulerRuleList bestRules = learnTaggingRules(new TextRulerTarget(slotName,
             MLTargetType.SINGLE_LEFT_BOUNDARY, this), ctxRules); // learn
     // left
     // boundary
     // best
     // rules
     if (bestRules != null) {
       leftBoundaryBestRulesMap.put(slotName, bestRules.getRulesString(""));
       leftBoundaryContextualRulesMap.put(slotName, ctxRules.getRulesString("\t"));
       bestRules.clear(); // free som memory/references
     }
     if (shouldAbort())
       return;
     ctxRules.clear();
     bestRules = learnTaggingRules(new TextRulerTarget(slotName, MLTargetType.SINGLE_RIGHT_BOUNDARY,
             this), ctxRules); // learn
     // right
     // boundary best
     // rules
     if (bestRules != null) {
       rightBoundaryBestRulesMap.put(slotName, bestRules.getRulesString(""));
       rightBoundaryContextualRulesMap.put(slotName, ctxRules.getRulesString("\t"));
     }

     // TODO add correction rule learn stuff
     // testTaggingRulesAndCreateCorrectionRulesExamples(null, STANDARD_MAX_CONTEXTUAL_RULES_COUNT)

     // correct left start
     TextRulerTarget lsTarget = new TextRulerTarget(slotName, MLTargetType.SINGLE_LEFT_CORRECTION,
             this);
     lsTarget.setMaxShiftDistance(shiftSize);
     TextRulerRuleList correctLeftRules = learnTaggingRules(lsTarget, null);

     // resultString = "CAP{REGEXP(\"PM\")} ALL{->MARKONCE(stimeEND)};";
     // try {
     // FileUtils.saveString2File(resultString, file);
     // } catch (IOException e) {
     // }

     // correct right start
     // TextRulerTarget rsTarget = new TextRulerTarget(slotName,
     // MLTargetType.SINGLE_RIGHT_CORRECTION,
     // this);
     // rsTarget.setMaxShiftDistance(shiftSize);
     // TextRulerRuleList correctRightRules = learnTaggingRules(rsTarget, null);
     //
     sendStatusUpdateToDelegate("SLOT Done", TextRulerLearnerState.ML_RUNNING, true);
     TextRulerToolkit.logIfDebug("--- LP2 END FOR SLOT:" + slotName);
   }

   protected abstract void induceRulesFromExample(TextRulerExample e, int roundNumber);

   protected void addToFinalContextRulesPool(LP2Rule rule) {
     if (TextRulerToolkit.DEBUG)
       TextRulerToolkit.appendStringToFile(tempDirectory() + "ctxpool"
               + RutaEngine.SCRIPT_FILE_EXTENSION, rule.getRuleString() + "\n");
     String slotName = rule.getTarget().getSingleSlotRawTypeName();
     if (!contextRulesPoolMap.get(slotName).contains(rule)) {
       contextRulesPoolMap.get(slotName).add(rule);
       // TextRulerToolkit.log("CONTEXT RULE: "+rule.getRuleString()+" ; "+rule.getCoveringStatistics());
     } else {
       if (TextRulerToolkit.DEBUG) {
         TextRulerToolkit.appendStringToFile(tempDirectory() + "ctxpool"
                 + RutaEngine.SCRIPT_FILE_EXTENSION, "\tDUPLICATE\n");
       }
     }

   }

   protected void addToFinalBestRulesPool(LP2Rule rule) {
     if (TextRulerToolkit.DEBUG && false)
       TextRulerToolkit.appendStringToFile(tempDirectory() + "bestpool"
               + RutaEngine.SCRIPT_FILE_EXTENSION, rule.getRuleString() + "\n");
     String slotName = rule.getTarget().getSingleSlotRawTypeName();
     if (!bestRulesPoolMap.get(slotName).contains(rule)) {
       bestRulesPoolMap.get(slotName).add(rule);
       // TextRulerToolkit.log("BEST RULE: "+rule.getRuleString());
       // add all covered positives to covering set
       coveredExamples.addAll(rule.getCoveringStatistics().getCoveredPositiveExamples());
       if (TextRulerToolkit.DEBUG)
         bestRulesPoolMap.get(slotName).saveToRulesFile(getIntermediateRulesFileName(),
                 getFileHeaderString(false));
     } else {
       if (TextRulerToolkit.DEBUG && false) {
         TextRulerToolkit.log("KANN SOWAS PASSIEREN ??");
         TextRulerToolkit.appendStringToFile(tempDirectory() + "bestpool"
                 + RutaEngine.SCRIPT_FILE_EXTENSION, "\tDUPLICATE\n");
       }
     }

   }

   public String getResultString() {
     StringBuilder sb = new StringBuilder();
     String header = getFileHeaderString(true);
     sb.append(header);

     for (String eachSlot : slotNames) {

       String leftBoundaryBestRulesString = leftBoundaryBestRulesMap.get(eachSlot);
       String rightBoundaryBestRulesString = rightBoundaryBestRulesMap.get(eachSlot);
       String leftBoundaryContextualRulesString = leftBoundaryContextualRulesMap.get(eachSlot);
       String rightBoundaryContextualRulesString = rightBoundaryContextualRulesMap.get(eachSlot);
       TextRulerRuleList bestRulesPool = bestRulesPoolMap.get(eachSlot);
       TextRulerRuleList contextRulesPool = contextRulesPoolMap.get(eachSlot);

       sb.append("\n// Slot: " + TextRulerToolkit.getTypeShortName(eachSlot) + "\n");
       sb.append("// LEFT BOUNDARY RULES:\n");
       if (leftBoundaryBestRulesString != null) {
         sb.append(leftBoundaryBestRulesString);
         sb.append("\n// RIGHT BOUNDARY RULES:\n");
         if (rightBoundaryBestRulesString != null)
           sb.append(rightBoundaryBestRulesString);
         else if (bestRulesPool != null)
           sb.append(bestRulesPool.getRulesString(""));

         sb.append("\nBLOCK(contextualRules_" + TextRulerToolkit.getTypeShortName(eachSlot)
                 + ") Document{} {\n"
                 + "\tDocument{->ASSIGN(redoContextualRules, false)}; // reset flag\n");
         sb.append("\n\t// LEFT BOUNDARY CONTEXTUAL RULES:\n");
         sb.append(leftBoundaryContextualRulesString);

         sb.append("\n\t// RIGHT BOUNDARY CONTEXTUAL RULES:\n");
         if (rightBoundaryBestRulesString != null)
           sb.append(rightBoundaryContextualRulesString);
         else if (contextRulesPool != null)
           sb.append(contextRulesPool.getRulesString("\t"));

         sb.append("\n\t//Document{IF(redoContextualRules)->CALL(thisFile.contextualRules_"
                 + TextRulerToolkit.getTypeShortName(eachSlot) + ")};\n}\n");
       } else if (bestRulesPool != null) {
         sb.append(bestRulesPool.getRulesString(""));
         sb.append("\n\t// LEFT BOUNDARY CONTEXTUAL RULES:\n");
         if (contextRulesPool != null)
           sb.append(contextRulesPool.getRulesString(""));
       }
     }

     for (String eachSlot : slotNames) {
       String leftBoundary = TextRulerToolkit.getTypeShortName((new TextRulerTarget(eachSlot,
               MLTargetType.SINGLE_LEFT_BOUNDARY, this)).getSingleSlotTypeName());
       String rightBoundary = TextRulerToolkit.getTypeShortName((new TextRulerTarget(eachSlot,
               MLTargetType.SINGLE_RIGHT_BOUNDARY, this)).getSingleSlotTypeName());
       String slotMarkName = TextRulerToolkit.getTypeShortName(eachSlot);
       int maxInnerLength = (getMaxTokens(eachSlot) * 3) - 2;
       sb.append("\n//slot-building rules:\n");
       sb.append(leftBoundary + "{IS(" + rightBoundary + ")->UNMARK(" + leftBoundary + "), UNMARK("
               + rightBoundary + "), MARKONCE(" + slotMarkName + ")};\n");
       sb.append(leftBoundary + "{->UNMARK(" + leftBoundary + ")} ");
       if (maxInnerLength > 0) {
         sb.append("ANY[0, " + maxInnerLength + "]? ");
         sb.append(rightBoundary + "{->UNMARK(" + rightBoundary + "), MARKONCE(" + slotMarkName
                 + ", 1, 3)};\n");
       } else
         sb.append(rightBoundary + "{->UNMARK(" + rightBoundary + "), MARKONCE(" + slotMarkName
                 + ", 1, 2)};\n");

       sb.append("\n//cleaning up:\n" + leftBoundary + "{->UNMARK(" + leftBoundary + ")};\n"
               + rightBoundary + "{->UNMARK(" + rightBoundary + ")};\n");
     }

     return sb.toString();
   }

   private Integer getMaxTokens(String slot) {
     if (slotMaximumTokenCountMap.get(slot) == null) {
       return 0;
     }
     return slotMaximumTokenCountMap.get(slot);
   }

   public void setParameters(Map<String, Object> params) {
     if (TextRulerToolkit.DEBUG)
       saveParametersToTempFolder(params);

     // TODO try catch
     if (params.containsKey(WINDOW_SIZE_KEY))
       windowSize = (Integer) params.get(WINDOW_SIZE_KEY);

     if (params.containsKey(CURRENT_BEST_RULES_SIZE_KEY))
       maxCurrentBestRulesCount = (Integer) params.get(CURRENT_BEST_RULES_SIZE_KEY);

     if (params.containsKey(CURRENT_CONTEXTUAL_RULES_SIZE_KEY))
       maxCurrentContextualRulesCount = (Integer) params.get(CURRENT_CONTEXTUAL_RULES_SIZE_KEY);

     if (params.containsKey(MIN_COVERED_POSITIVES_PER_RULE_KEY))
       minCoveredPositives = (Integer) params.get(MIN_COVERED_POSITIVES_PER_RULE_KEY);

     if (params.containsKey(MAX_ERROR_THRESHOLD_KEY))
       maxErrorThreshold = (Float) params.get(MAX_ERROR_THRESHOLD_KEY);
   }

   protected String correctionRulesInputDirectory(TextRulerTarget target) {
     if (target.isLeftBoundary())
       return tempDirectory() + "leftCorrectionDocs";
     else
       return tempDirectory() + "rightCorrectionDocs";
   }

   protected boolean testTaggingRulesAndCreateCorrectionRulesExamples(TextRulerTarget target,
           int maxDistance) {
     try {
       File dir = new File(correctionRulesInputDirectory(target));
       if (!dir.exists())
         dir.mkdir();
       exampleDocuments.clearCurrentExamples();
       exampleDocuments.createExamplesForTarget(target);
       examples = exampleDocuments.getAllPositiveExamples();

       TextRulerExampleDocument[] sortedDocs = exampleDocuments
               .getSortedDocumentsInCacheOptimizedOrder();
       TypeSystem ts = sortedDocs[0].getCAS().getTypeSystem();
       Type tokensRootType = ts.getType(TextRulerToolkit.RUTA_ANY_TYPE_NAME);

       // String allRulesContent = getResultString();
       String allRulesContent = FileUtils.file2String(new File("/testinput/testrules/rules"
               + RutaEngine.SCRIPT_FILE_EXTENSION));
       FileUtils.saveString2File(allRulesContent, new File(getTempRulesFileName()));

       CAS testCAS = getTestCAS();
       for (TextRulerExampleDocument doc : sortedDocs) {
         TextRulerStatisticsCollector c = new TextRulerStatisticsCollector();
         doc.resetAndFillTestCAS(testCAS, target);
         CAS docCAS = doc.getCAS();
         ae.process(testCAS);
         compareOriginalDocumentWithTestCAS(doc, testCAS, target, c, true); // test whole ruleset and
         // collect negative
         // examples

         // now we have some covered positive examples that are good, and
         // maybe some negative examples
         // for that we might create Correction Rules... in order to do
         // that we have to create
         // ShiftExamples and map negative examples (incorrect inserted
         // boundaries) with a specific
         // distance to an original positive example...

         // TODO should that be done in both directions ? left and right
         // ?! what happes if we
         // find two potential examples, one left, one right ? --> for
         // now: use the nearer one. if
         // exactly the same distance, use the one where the wrong tag
         // would be IN the slot filler!
         List<TextRulerExample> correctTags = doc.getPositiveExamples();
         List<TextRulerExample> wrongTags = new ArrayList<TextRulerExample>(
                 c.getCoveredNegativeExamples());
         List<TextRulerShiftExample> newExamples = new ArrayList<TextRulerShiftExample>();
         for (TextRulerExample wrongTag : wrongTags) {
           // test, if there's a corresponding positive example
           // somewhere around (within maxDistance)
           List<AnnotationFS> left = TextRulerToolkit.getAnnotationsBeforePosition(docCAS, wrongTag
                   .getAnnotation().getBegin(), maxDistance, TextRulerToolkit
                   .getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);
           List<AnnotationFS> right = TextRulerToolkit.getAnnotationsAfterPosition(docCAS, wrongTag
                   .getAnnotation().getEnd(), maxDistance, TextRulerToolkit
                   .getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);

           // TODO stop after the first found match or create one bad
           // example for each found occurence ??!!
           // for now: stop after one ! so create only ONE bad
           // example...
           int leftDistance = 0;
           TextRulerExample leftCorrectTag = null;
           for (int i = left.size() - 1; i >= 0; i--) {
             leftDistance++;
             TextRulerAnnotation needle = TextRulerToolkit.convertToTargetAnnotation(left.get(i),
                     doc, target, docCAS.getTypeSystem());
             leftCorrectTag = TextRulerToolkit.exampleListContainsAnnotation(correctTags, needle);
             if (leftCorrectTag != null)
               break;
           }

           int rightDistance = 0;
           TextRulerExample rightCorrectTag = null;
           for (AnnotationFS fs : right) {
             rightDistance++;
             TextRulerAnnotation needle = TextRulerToolkit.convertToTargetAnnotation(fs, doc,
                     target, docCAS.getTypeSystem());
             rightCorrectTag = TextRulerToolkit.exampleListContainsAnnotation(correctTags, needle);
             if (rightCorrectTag != null)
               break;
           }

           TextRulerExample theCorrectTag = null;
           if (rightDistance < leftDistance && rightCorrectTag != null)
             theCorrectTag = rightCorrectTag;
           else if (rightDistance > leftDistance && leftCorrectTag != null)
             theCorrectTag = leftCorrectTag;
           else // use the one that would lie in the slot filler:
           {
             if (target.type == MLTargetType.SINGLE_LEFT_BOUNDARY && rightCorrectTag != null)
               theCorrectTag = rightCorrectTag;
             else
               theCorrectTag = leftCorrectTag;
           }

           if (theCorrectTag != null) {
             TextRulerToolkit.log("FOUND BAD EXAMPLE FOR SHIFTING !!");
             TextRulerShiftExample shiftExample = new TextRulerShiftExample(doc,
                     wrongTag.getAnnotation(), theCorrectTag.getAnnotation(), true, target);
             newExamples.add(shiftExample);
           }
         }
         TextRulerToolkit
                 .writeCAStoXMIFile(testCAS, dir + File.pathSeparator + doc.getCasFileName());
       }
       testCAS.reset();
     } catch (Exception e) {
       TextRulerPlugin.error(e);
       return false;
     }

     return true;
   }

   @Override
   public String getFileHeaderString(boolean complete) {
     return super.getFileHeaderString(complete) + "BOOLEAN redoContextualRules;\n\n";
   }

   @Override
   protected boolean checkForMandatoryTypes() {
     if (!super.checkForMandatoryTypes()) {
       return false;
     }

     CAS someCas = getTestCAS();
     TypeSystem ts = someCas.getTypeSystem();
 //    GlobalCASSource.releaseCAS(someCas);
     // check if all helper types are present:
     List<String> list = new ArrayList<String>();

     for (String eachSlot : slotNames) {
       list.add(new TextRulerTarget(eachSlot, MLTargetType.SINGLE_LEFT_BOUNDARY, this)
               .getSingleSlotTypeName());
       list.add(new TextRulerTarget(eachSlot, MLTargetType.SINGLE_RIGHT_BOUNDARY, this)
               .getSingleSlotTypeName());
     }

     boolean result = true;
     List<String> missingTypes = new ArrayList<String>();
     for (String s : list) {
       if (ts.getType(s) == null) {
         missingTypes.add(s);
         result = false;
       }
     }
     String missingString = "";
     for (String string : missingTypes) {
       missingString += string + ", ";
     }
     if (!StringUtils.isEmpty(missingString)) {
       missingString = missingString.substring(0, missingString.length() - 2);
     }
     if (!result) {
       sendStatusUpdateToDelegate("Error: Some Slot- or Helper-Types were not found in TypeSystem: "
               + missingString, TextRulerLearnerState.ML_ERROR, false);
     }
     return result;
   }

 }