trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/lp2/NaiveLP2.java - uima-ruta - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.apache.uima.ruta.textruler.learner.lp2;

 import java.io.File;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
 import java.util.Set;

 import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.Type;
 import org.apache.uima.cas.TypeSystem;
 import org.apache.uima.cas.text.AnnotationFS;
 import org.apache.uima.ruta.engine.RutaEngine;
 import org.apache.uima.ruta.textruler.TextRulerPlugin;
 import org.apache.uima.ruta.textruler.core.TextRulerAnnotation;
 import org.apache.uima.ruta.textruler.core.TextRulerExample;
 import org.apache.uima.ruta.textruler.core.TextRulerRule;
 import org.apache.uima.ruta.textruler.core.TextRulerRuleItem;
 import org.apache.uima.ruta.textruler.core.TextRulerRulePattern;
 import org.apache.uima.ruta.textruler.core.TextRulerTarget;
 import org.apache.uima.ruta.textruler.core.TextRulerTarget.MLTargetType;
 import org.apache.uima.ruta.textruler.core.TextRulerToolkit;
 import org.apache.uima.ruta.textruler.extension.TextRulerLearnerDelegate;
 import org.apache.uima.ruta.textruler.learner.lp2.LP2RuleItem.MLLP2ContextConstraint;
 import org.apache.uima.ruta.textruler.learner.lp2.LP2RuleItem.MLLP2OtherConstraint;
 import org.apache.uima.util.FileUtils;

 public class NaiveLP2 extends BasicLP2 {

   public NaiveLP2(String inputDir, String prePropTMFile, String tmpDir, String[] slotNames,
           Set<String> filterSet, boolean skip, TextRulerLearnerDelegate delegate) {
     super(inputDir, prePropTMFile, tmpDir, slotNames, filterSet, skip, delegate);
   }

   public static final boolean SAVE_DEBUG_INFO_IN_TEMPFOLDER = false;

   @Override
   protected void induceRulesFromExample(TextRulerExample e, int roundNumber) {
     LP2Rule baseRule = createInitialRuleForPositiveExample(e);
     List<LP2Rule> genRules = generalizeRule(baseRule);

     if (shouldAbort())
       return;

     List<LP2Rule> test = new ArrayList<LP2Rule>();

     // int i=1;
     // for (LP2Rule newRule : genRules)
     // {
     // if (shouldAbort())
     // return;
     // sendStatusUpdateToDelegate("Round "+roundNumber+" - Testing proposed generalization "+i+"/"+(genRules.size())+
     // "    - uncovered examples: "+
     // (examples.size()-coveredExamples.size() + " / "+examples.size()),
     // TextRulerLearnerState.ML_RUNNING, false);
     // i++;
     // testRuleOnDocumentSet(newRule, exampleDocuments);
     //
     // checkAndHandleNewRule(newRule);
     //
     // if (TextRulerToolkit.DEBUG)
     // test.add(newRule);
     // }
     // new cache and testCAS optimized rule testing:

     sendStatusUpdateToDelegate(
             "Round " + roundNumber + " - Testing " + (genRules.size())
                     + "generalizations... - uncovered examples: "
                     + (examples.size() - coveredExamples.size() + " / " + examples.size()),
             TextRulerLearnerState.ML_RUNNING, false);
     testRulesOnDocumentSet(new ArrayList<TextRulerRule>(genRules), exampleDocuments);

     for (LP2Rule newRule : genRules) {
       checkAndHandleNewRule(newRule);
       if (TextRulerToolkit.DEBUG)
         test.add(newRule);
     }

     if (TextRulerToolkit.DEBUG && SAVE_DEBUG_INFO_IN_TEMPFOLDER) {
       Collections.sort(test, new Comparator<LP2Rule>() {

         public int compare(LP2Rule o1, LP2Rule o2) {
           return o1.getRuleString().compareTo(o2.getRuleString());
         }

       });

       String startend = e.getTarget().type == MLTargetType.SINGLE_LEFT_BOUNDARY ? "left_"
               : "right_";
       File file = new File(tempDirectory() + startend + "generalizations" + roundNumber + RutaEngine.SCRIPT_FILE_EXTENSION);
       StringBuffer str = new StringBuffer();
       for (TextRulerRule rule : test) {
         str.append(rule.getCoveringStatistics() + "\t\t" + rule.getRuleString() + "\n");
       }
       try {
         FileUtils.saveString2File(str.toString(), file);
       } catch (Exception ex) {
         TextRulerPlugin.error(ex);
       }
     }

   }

   protected void checkAndHandleNewRule(LP2Rule rule) {
     boolean tooFewPositives = rule.getCoveringStatistics().getCoveredPositivesCount() < minCoveredPositives;
     boolean tooManyErrors = rule.getErrorRate() > maxErrorThreshold;

     boolean isBestRule = !(tooFewPositives || tooManyErrors);

     if (TextRulerToolkit.DEBUG && SAVE_DEBUG_INFO_IN_TEMPFOLDER)
       TextRulerToolkit.appendStringToFile(tempDirectory() + "bestcandidates"+RutaEngine.SCRIPT_FILE_EXTENSION,
               rule.getRuleString() + "\n");

     if (isBestRule) {
       currentBestRules.add(rule);
       currentBestRules.removeSubsumedRules();
       currentBestRules.cutToMaxSize();
     } else if (!tooFewPositives) {

       // test in context
       // in our TM representation, we simply can add a NEAR condition in
       // the MARKing rule item and retest it on the
       // corpus. we should do that for all kinds of tags we have, but
       // currently we only do it for the corresponding opening/closing
       // tag, since we do not have any information about other slots yet!
       // // TODO use all other slot tags! (see optimized version as well)

       if (true) {
         rule = rule.copy();
         LP2RuleItem item = rule.getMarkingRuleItem();
         // TextRulerToolkit.log("CONTEXTUAL RULE CANDIDATE: "+rule.getRuleString()+"  ;  "+rule.getCoveringStatistics());
         item.setContextConstraint(new MLLP2ContextConstraint(slotMaximumTokenCountMap.get(rule
                 .getTarget().getSingleSlotRawTypeName()), rule));
         rule.setIsContextualRule(true);

         rule.setNeedsCompile(true);

         if (TextRulerToolkit.DEBUG && SAVE_DEBUG_INFO_IN_TEMPFOLDER)
           TextRulerToolkit.appendStringToFile(tempDirectory() + "ctxcandidates"+RutaEngine.SCRIPT_FILE_EXTENSION,
                   rule.getRuleString());

         testRuleOnDocumentSet(rule, exampleDocuments); // not very
         // fast... but
         // works!
         boolean ctxTooFewPositives = rule.getCoveringStatistics().getCoveredPositivesCount() < minCoveredPositives;
         boolean ctxTooManyErrors = rule.getErrorRate() > maxErrorThreshold;
         boolean isGoodContextRule = !(ctxTooFewPositives || ctxTooManyErrors);
         if (isGoodContextRule) {
           currentContextualRules.add(rule);
           currentContextualRules.removeSubsumedRules();
           currentContextualRules.cutToMaxSize();
         }
       }

     }
   }

   protected List<LP2Rule> generalizeRule(LP2Rule baseRule) {
     List<LP2Rule> result = new ArrayList<LP2Rule>();
     TextRulerRulePattern rulePattern = new TextRulerRulePattern();
     TextRulerRulePattern prePattern = baseRule.getPreFillerPattern();

     for (int i = prePattern.size() - 1; i >= 0; i--) // we have to reverse
     // the order again!
     {
       rulePattern.add(prePattern.get(i));
     }
     rulePattern.addAll(baseRule.getPostFillerPattern());

     recursiveGeneralizeRule(baseRule, rulePattern, new TextRulerRulePattern(), result);
     TextRulerToolkit.log("GENERALIZATIONS: " + result.size());

     for (LP2Rule r : result)
       removeOutermostWildCardItemsFromRule(r);

     // for (LP2Rule r : result)
     // {
     // TextRulerToolkit.log("NEWRULE = "+r.getRuleString());
     // }

     return result;
   }

   protected LP2Rule createInitialRuleForPositiveExample(TextRulerExample example) {
     TextRulerTarget target = example.getTarget();
     LP2Rule rule = new LP2Rule(this, example.getTarget());
     CAS docCas = example.getDocumentCAS();
     TextRulerAnnotation exampleAnnotation = example.getAnnotation();
     TypeSystem ts = docCas.getTypeSystem();
     Type tokensRootType = ts.getType(TextRulerToolkit.RUTA_ANY_TYPE_NAME);
     int thePosition = target.type == MLTargetType.SINGLE_LEFT_BOUNDARY ? exampleAnnotation
             .getBegin() : exampleAnnotation.getEnd();

     List<AnnotationFS> leftContext = TextRulerToolkit.getAnnotationsBeforePosition(docCas,
             thePosition, windowSize,
             TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);
     List<AnnotationFS> rightContext = TextRulerToolkit.getAnnotationsAfterPosition(docCas,
             thePosition, windowSize,
             TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);

     // the left context has to be reversed since we get the arrayList from
     // the slot's point of view!
     for (int i = leftContext.size() - 1; i >= 0; i--) {
       TextRulerAnnotation annot = new TextRulerAnnotation(leftContext.get(i), example.getDocument());
       LP2RuleItem item = new LP2RuleItem();
       item.setWordConstraint(annot);
       if (item.getWordConstraint().isRegExpConstraint())
         item.addOtherConstraint(new MLLP2OtherConstraint(annot, annot));
       rule.addPreFillerItem(item);
     }

     for (AnnotationFS afs : rightContext) {
       TextRulerAnnotation annot = new TextRulerAnnotation(afs, example.getDocument());
       LP2RuleItem item = new LP2RuleItem();
       item.setWordConstraint(annot);
       if (item.getWordConstraint().isRegExpConstraint())
         item.addOtherConstraint(new MLLP2OtherConstraint(annot, annot));

       rule.addPostFillerItem(item);
     }
     TextRulerToolkit.log("INITIAL RULE: " + rule.getRuleString());
     return rule;
   }

   protected void recursiveGeneralizeRule(LP2Rule baseRule, TextRulerRulePattern allItems,
           TextRulerRulePattern currentPattern, List<LP2Rule> resultList) {
     if (currentPattern.size() == allItems.size()) {
       // create new Rule
       LP2Rule newRule = new LP2Rule(this, baseRule.getTarget());
       int preCount = baseRule.getPreFillerPattern().size();
       for (int i = 0; i < currentPattern.size(); i++) {
         if (i < preCount)
           newRule.addPreFillerItem(currentPattern.get(i));
         else
           newRule.addPostFillerItem(currentPattern.get(i));
       }
       // TextRulerToolkit.log("GEN: "+newRule.getRuleString());
       if (newRule.totalInnerConstraintCount() > 0) // skip the ANY ANY ANY
         // ANY... rule ! this
         // makes no sense in no
         // application!!
         resultList.add(newRule);
     } else {
       int index = currentPattern.size();
       TextRulerRuleItem baseItem = allItems.get(index);
       List<TextRulerRuleItem> itemGeneralizations = generalizeRuleItem((LP2RuleItem) baseItem);
       for (TextRulerRuleItem newItem : itemGeneralizations) {
         currentPattern.add(newItem);
         recursiveGeneralizeRule(baseRule, allItems, currentPattern, resultList);
         currentPattern.remove(currentPattern.size() - 1);
       }
     }
   }

   protected void recursiveGeneralizeRuleItem(LP2RuleItem baseItem,
           List<MLLP2OtherConstraint> otherConstraints, int currentConstraintIndex,
           List<MLLP2OtherConstraint> currentConstraintTuple, List<TextRulerRuleItem> result) {
     if (currentConstraintIndex > otherConstraints.size() - 1) {
       LP2RuleItem newItem;
       newItem = new LP2RuleItem();
       for (MLLP2OtherConstraint c : currentConstraintTuple)
         newItem.addOtherConstraint(c.copy());
       result.add(newItem);
     } else {
       MLLP2OtherConstraint currentConstraint = otherConstraints.get(currentConstraintIndex);
       // recurse WITH and WITHOUT this key:
       recursiveGeneralizeRuleItem(baseItem, otherConstraints, currentConstraintIndex + 1,
               currentConstraintTuple, result);
       currentConstraintTuple.add(currentConstraint);
       recursiveGeneralizeRuleItem(baseItem, otherConstraints, currentConstraintIndex + 1,
               currentConstraintTuple, result);
       currentConstraintTuple.remove(currentConstraintTuple.size() - 1);
     }
   }

   protected List<TextRulerRuleItem> generalizeRuleItem(LP2RuleItem baseItem) {
     List<TextRulerRuleItem> result = new ArrayList<TextRulerRuleItem>();

     // one with word constraint
     if (baseItem.getWordConstraint() != null) {
       LP2RuleItem newItem = new LP2RuleItem();
       newItem.setWordConstraint(baseItem.getWordConstraint().copy());
       result.add(newItem);
     }

     // all other combinations without word constraint
     // List<String> keys = new
     // ArrayList<String>(baseItem.getOtherConstraints().keySet());
     List<MLLP2OtherConstraint> constraints = baseItem.getOtherConstraints();
     recursiveGeneralizeRuleItem(baseItem, constraints, 0, new ArrayList<MLLP2OtherConstraint>(),
             result);
     return result;
   }

   protected void removeOutermostWildCardItemsFromRule(LP2Rule rule) {
     while (true) {
       LP2RuleItem item = (LP2RuleItem) rule.getOutermostPreFillerItem();
       if (item == null) // no more items left
         break;

       // if this rule is a RIGHT BOUNDARY rule, we must not remove the
       // last remaining pre filler item,
       // since this is used for marking the SLOT END BOUNDARY (= RIGHT
       // BOUNDARY)
       if ((rule.getTarget().type == MLTargetType.SINGLE_RIGHT_BOUNDARY)
               && (rule.getPreFillerPattern().size() == 1))
         break;

       if (item.totalConstraintCount() == 0)
         rule.removeOutermostPreFillerItem();
       else
         break;
     }
     while (true) {
       LP2RuleItem item = (LP2RuleItem) rule.getOutermostPostFillerItem();
       if (item == null) // no more items left
         break;

       // if this rule is a LEFT BOUNDARY rule, we must not remove the last
       // remaining post filler item,
       // since this is used for marking the SLOT START BOUNDARY (= LEFT
       // BOUNDARY)
       if ((rule.getTarget().type == MLTargetType.SINGLE_LEFT_BOUNDARY)
               && (rule.getPostFillerPattern().size() == 1))
         break;

       if (item.totalConstraintCount() == 0)
         rule.removeOutermostPostFillerItem();
       else
         break;
     }
   }

   @Override
   public boolean collectNegativeCoveredInstancesWhenTesting() {
     return false;
   }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	package org.apache.uima.ruta.textruler.learner.lp2;

	import java.io.File;
	import java.util.ArrayList;
	import java.util.Collections;
	import java.util.Comparator;
	import java.util.List;
	import java.util.Set;

	import org.apache.uima.cas.CAS;
	import org.apache.uima.cas.Type;
	import org.apache.uima.cas.TypeSystem;
	import org.apache.uima.cas.text.AnnotationFS;
	import org.apache.uima.ruta.engine.RutaEngine;
	import org.apache.uima.ruta.textruler.TextRulerPlugin;
	import org.apache.uima.ruta.textruler.core.TextRulerAnnotation;
	import org.apache.uima.ruta.textruler.core.TextRulerExample;
	import org.apache.uima.ruta.textruler.core.TextRulerRule;
	import org.apache.uima.ruta.textruler.core.TextRulerRuleItem;
	import org.apache.uima.ruta.textruler.core.TextRulerRulePattern;
	import org.apache.uima.ruta.textruler.core.TextRulerTarget;
	import org.apache.uima.ruta.textruler.core.TextRulerTarget.MLTargetType;
	import org.apache.uima.ruta.textruler.core.TextRulerToolkit;
	import org.apache.uima.ruta.textruler.extension.TextRulerLearnerDelegate;
	import org.apache.uima.ruta.textruler.learner.lp2.LP2RuleItem.MLLP2ContextConstraint;
	import org.apache.uima.ruta.textruler.learner.lp2.LP2RuleItem.MLLP2OtherConstraint;
	import org.apache.uima.util.FileUtils;

	public class NaiveLP2 extends BasicLP2 {

	public NaiveLP2(String inputDir, String prePropTMFile, String tmpDir, String[] slotNames,
	Set<String> filterSet, boolean skip, TextRulerLearnerDelegate delegate) {
	super(inputDir, prePropTMFile, tmpDir, slotNames, filterSet, skip, delegate);
	}

	public static final boolean SAVE_DEBUG_INFO_IN_TEMPFOLDER = false;

	@Override
	protected void induceRulesFromExample(TextRulerExample e, int roundNumber) {
	LP2Rule baseRule = createInitialRuleForPositiveExample(e);
	List<LP2Rule> genRules = generalizeRule(baseRule);

	if (shouldAbort())
	return;

	List<LP2Rule> test = new ArrayList<LP2Rule>();

	// int i=1;
	// for (LP2Rule newRule : genRules)
	// {
	// if (shouldAbort())
	// return;
	// sendStatusUpdateToDelegate("Round "+roundNumber+" - Testing proposed generalization "+i+"/"+(genRules.size())+
	// " - uncovered examples: "+
	// (examples.size()-coveredExamples.size() + " / "+examples.size()),
	// TextRulerLearnerState.ML_RUNNING, false);
	// i++;
	// testRuleOnDocumentSet(newRule, exampleDocuments);
	//
	// checkAndHandleNewRule(newRule);
	//
	// if (TextRulerToolkit.DEBUG)
	// test.add(newRule);
	// }
	// new cache and testCAS optimized rule testing:

	sendStatusUpdateToDelegate(
	"Round " + roundNumber + " - Testing " + (genRules.size())
	+ "generalizations... - uncovered examples: "
	+ (examples.size() - coveredExamples.size() + " / " + examples.size()),
	TextRulerLearnerState.ML_RUNNING, false);
	testRulesOnDocumentSet(new ArrayList<TextRulerRule>(genRules), exampleDocuments);

	for (LP2Rule newRule : genRules) {
	checkAndHandleNewRule(newRule);
	if (TextRulerToolkit.DEBUG)
	test.add(newRule);
	}

	if (TextRulerToolkit.DEBUG && SAVE_DEBUG_INFO_IN_TEMPFOLDER) {
	Collections.sort(test, new Comparator<LP2Rule>() {

	public int compare(LP2Rule o1, LP2Rule o2) {
	return o1.getRuleString().compareTo(o2.getRuleString());
	}

	});

	String startend = e.getTarget().type == MLTargetType.SINGLE_LEFT_BOUNDARY ? "left_"
	: "right_";
	File file = new File(tempDirectory() + startend + "generalizations" + roundNumber + RutaEngine.SCRIPT_FILE_EXTENSION);
	StringBuffer str = new StringBuffer();
	for (TextRulerRule rule : test) {
	str.append(rule.getCoveringStatistics() + "\t\t" + rule.getRuleString() + "\n");
	}
	try {
	FileUtils.saveString2File(str.toString(), file);
	} catch (Exception ex) {
	TextRulerPlugin.error(ex);
	}
	}

	}

	protected void checkAndHandleNewRule(LP2Rule rule) {
	boolean tooFewPositives = rule.getCoveringStatistics().getCoveredPositivesCount() < minCoveredPositives;
	boolean tooManyErrors = rule.getErrorRate() > maxErrorThreshold;

	boolean isBestRule = !(tooFewPositives \|\| tooManyErrors);

	if (TextRulerToolkit.DEBUG && SAVE_DEBUG_INFO_IN_TEMPFOLDER)
	TextRulerToolkit.appendStringToFile(tempDirectory() + "bestcandidates"+RutaEngine.SCRIPT_FILE_EXTENSION,
	rule.getRuleString() + "\n");

	if (isBestRule) {
	currentBestRules.add(rule);
	currentBestRules.removeSubsumedRules();
	currentBestRules.cutToMaxSize();
	} else if (!tooFewPositives) {

	// test in context
	// in our TM representation, we simply can add a NEAR condition in
	// the MARKing rule item and retest it on the
	// corpus. we should do that for all kinds of tags we have, but
	// currently we only do it for the corresponding opening/closing
	// tag, since we do not have any information about other slots yet!
	// // TODO use all other slot tags! (see optimized version as well)

	if (true) {
	rule = rule.copy();
	LP2RuleItem item = rule.getMarkingRuleItem();
	// TextRulerToolkit.log("CONTEXTUAL RULE CANDIDATE: "+rule.getRuleString()+" ; "+rule.getCoveringStatistics());
	item.setContextConstraint(new MLLP2ContextConstraint(slotMaximumTokenCountMap.get(rule
	.getTarget().getSingleSlotRawTypeName()), rule));
	rule.setIsContextualRule(true);

	rule.setNeedsCompile(true);

	if (TextRulerToolkit.DEBUG && SAVE_DEBUG_INFO_IN_TEMPFOLDER)
	TextRulerToolkit.appendStringToFile(tempDirectory() + "ctxcandidates"+RutaEngine.SCRIPT_FILE_EXTENSION,
	rule.getRuleString());

	testRuleOnDocumentSet(rule, exampleDocuments); // not very
	// fast... but
	// works!
	boolean ctxTooFewPositives = rule.getCoveringStatistics().getCoveredPositivesCount() < minCoveredPositives;
	boolean ctxTooManyErrors = rule.getErrorRate() > maxErrorThreshold;
	boolean isGoodContextRule = !(ctxTooFewPositives \|\| ctxTooManyErrors);
	if (isGoodContextRule) {
	currentContextualRules.add(rule);
	currentContextualRules.removeSubsumedRules();
	currentContextualRules.cutToMaxSize();
	}
	}

	}
	}

	protected List<LP2Rule> generalizeRule(LP2Rule baseRule) {
	List<LP2Rule> result = new ArrayList<LP2Rule>();
	TextRulerRulePattern rulePattern = new TextRulerRulePattern();
	TextRulerRulePattern prePattern = baseRule.getPreFillerPattern();

	for (int i = prePattern.size() - 1; i >= 0; i--) // we have to reverse
	// the order again!
	{
	rulePattern.add(prePattern.get(i));
	}
	rulePattern.addAll(baseRule.getPostFillerPattern());

	recursiveGeneralizeRule(baseRule, rulePattern, new TextRulerRulePattern(), result);
	TextRulerToolkit.log("GENERALIZATIONS: " + result.size());

	for (LP2Rule r : result)
	removeOutermostWildCardItemsFromRule(r);

	// for (LP2Rule r : result)
	// {
	// TextRulerToolkit.log("NEWRULE = "+r.getRuleString());
	// }

	return result;
	}

	protected LP2Rule createInitialRuleForPositiveExample(TextRulerExample example) {
	TextRulerTarget target = example.getTarget();
	LP2Rule rule = new LP2Rule(this, example.getTarget());
	CAS docCas = example.getDocumentCAS();
	TextRulerAnnotation exampleAnnotation = example.getAnnotation();
	TypeSystem ts = docCas.getTypeSystem();
	Type tokensRootType = ts.getType(TextRulerToolkit.RUTA_ANY_TYPE_NAME);
	int thePosition = target.type == MLTargetType.SINGLE_LEFT_BOUNDARY ? exampleAnnotation
	.getBegin() : exampleAnnotation.getEnd();

	List<AnnotationFS> leftContext = TextRulerToolkit.getAnnotationsBeforePosition(docCas,
	thePosition, windowSize,
	TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);
	List<AnnotationFS> rightContext = TextRulerToolkit.getAnnotationsAfterPosition(docCas,
	thePosition, windowSize,
	TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokensRootType);

	// the left context has to be reversed since we get the arrayList from
	// the slot's point of view!
	for (int i = leftContext.size() - 1; i >= 0; i--) {
	TextRulerAnnotation annot = new TextRulerAnnotation(leftContext.get(i), example.getDocument());
	LP2RuleItem item = new LP2RuleItem();
	item.setWordConstraint(annot);
	if (item.getWordConstraint().isRegExpConstraint())
	item.addOtherConstraint(new MLLP2OtherConstraint(annot, annot));
	rule.addPreFillerItem(item);
	}

	for (AnnotationFS afs : rightContext) {
	TextRulerAnnotation annot = new TextRulerAnnotation(afs, example.getDocument());
	LP2RuleItem item = new LP2RuleItem();
	item.setWordConstraint(annot);
	if (item.getWordConstraint().isRegExpConstraint())
	item.addOtherConstraint(new MLLP2OtherConstraint(annot, annot));

	rule.addPostFillerItem(item);
	}
	TextRulerToolkit.log("INITIAL RULE: " + rule.getRuleString());
	return rule;
	}

	protected void recursiveGeneralizeRule(LP2Rule baseRule, TextRulerRulePattern allItems,
	TextRulerRulePattern currentPattern, List<LP2Rule> resultList) {
	if (currentPattern.size() == allItems.size()) {
	// create new Rule
	LP2Rule newRule = new LP2Rule(this, baseRule.getTarget());
	int preCount = baseRule.getPreFillerPattern().size();
	for (int i = 0; i < currentPattern.size(); i++) {
	if (i < preCount)
	newRule.addPreFillerItem(currentPattern.get(i));
	else
	newRule.addPostFillerItem(currentPattern.get(i));
	}
	// TextRulerToolkit.log("GEN: "+newRule.getRuleString());
	if (newRule.totalInnerConstraintCount() > 0) // skip the ANY ANY ANY
	// ANY... rule ! this
	// makes no sense in no
	// application!!
	resultList.add(newRule);
	} else {
	int index = currentPattern.size();
	TextRulerRuleItem baseItem = allItems.get(index);
	List<TextRulerRuleItem> itemGeneralizations = generalizeRuleItem((LP2RuleItem) baseItem);
	for (TextRulerRuleItem newItem : itemGeneralizations) {
	currentPattern.add(newItem);
	recursiveGeneralizeRule(baseRule, allItems, currentPattern, resultList);
	currentPattern.remove(currentPattern.size() - 1);
	}
	}
	}

	protected void recursiveGeneralizeRuleItem(LP2RuleItem baseItem,
	List<MLLP2OtherConstraint> otherConstraints, int currentConstraintIndex,
	List<MLLP2OtherConstraint> currentConstraintTuple, List<TextRulerRuleItem> result) {
	if (currentConstraintIndex > otherConstraints.size() - 1) {
	LP2RuleItem newItem;
	newItem = new LP2RuleItem();
	for (MLLP2OtherConstraint c : currentConstraintTuple)
	newItem.addOtherConstraint(c.copy());
	result.add(newItem);
	} else {
	MLLP2OtherConstraint currentConstraint = otherConstraints.get(currentConstraintIndex);
	// recurse WITH and WITHOUT this key:
	recursiveGeneralizeRuleItem(baseItem, otherConstraints, currentConstraintIndex + 1,
	currentConstraintTuple, result);
	currentConstraintTuple.add(currentConstraint);
	recursiveGeneralizeRuleItem(baseItem, otherConstraints, currentConstraintIndex + 1,
	currentConstraintTuple, result);
	currentConstraintTuple.remove(currentConstraintTuple.size() - 1);
	}
	}

	protected List<TextRulerRuleItem> generalizeRuleItem(LP2RuleItem baseItem) {
	List<TextRulerRuleItem> result = new ArrayList<TextRulerRuleItem>();

	// one with word constraint
	if (baseItem.getWordConstraint() != null) {
	LP2RuleItem newItem = new LP2RuleItem();
	newItem.setWordConstraint(baseItem.getWordConstraint().copy());
	result.add(newItem);
	}

	// all other combinations without word constraint
	// List<String> keys = new
	// ArrayList<String>(baseItem.getOtherConstraints().keySet());
	List<MLLP2OtherConstraint> constraints = baseItem.getOtherConstraints();
	recursiveGeneralizeRuleItem(baseItem, constraints, 0, new ArrayList<MLLP2OtherConstraint>(),
	result);
	return result;
	}

	protected void removeOutermostWildCardItemsFromRule(LP2Rule rule) {
	while (true) {
	LP2RuleItem item = (LP2RuleItem) rule.getOutermostPreFillerItem();
	if (item == null) // no more items left
	break;

	// if this rule is a RIGHT BOUNDARY rule, we must not remove the
	// last remaining pre filler item,
	// since this is used for marking the SLOT END BOUNDARY (= RIGHT
	// BOUNDARY)
	if ((rule.getTarget().type == MLTargetType.SINGLE_RIGHT_BOUNDARY)
	&& (rule.getPreFillerPattern().size() == 1))
	break;

	if (item.totalConstraintCount() == 0)
	rule.removeOutermostPreFillerItem();
	else
	break;
	}
	while (true) {
	LP2RuleItem item = (LP2RuleItem) rule.getOutermostPostFillerItem();
	if (item == null) // no more items left
	break;

	// if this rule is a LEFT BOUNDARY rule, we must not remove the last
	// remaining post filler item,
	// since this is used for marking the SLOT START BOUNDARY (= LEFT
	// BOUNDARY)
	if ((rule.getTarget().type == MLTargetType.SINGLE_LEFT_BOUNDARY)
	&& (rule.getPostFillerPattern().size() == 1))
	break;

	if (item.totalConstraintCount() == 0)
	rule.removeOutermostPostFillerItem();
	else
	break;
	}
	}

	@Override
	public boolean collectNegativeCoveredInstancesWhenTesting() {
	return false;
	}

	}