trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/wien/Wien.java - uima-ruta - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
 */

 package org.apache.uima.ruta.textruler.learner.wien;

 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;

 import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.Type;
 import org.apache.uima.cas.TypeSystem;
 import org.apache.uima.cas.text.AnnotationFS;
 import org.apache.uima.ruta.textruler.TextRulerPlugin;
 import org.apache.uima.ruta.textruler.core.TextRulerAnnotation;
 import org.apache.uima.ruta.textruler.core.TextRulerBasicLearner;
 import org.apache.uima.ruta.textruler.core.TextRulerExample;
 import org.apache.uima.ruta.textruler.core.TextRulerExampleDocument;
 import org.apache.uima.ruta.textruler.core.TextRulerRuleItem;
 import org.apache.uima.ruta.textruler.core.TextRulerRulePattern;
 import org.apache.uima.ruta.textruler.core.TextRulerSlotPattern;
 import org.apache.uima.ruta.textruler.core.TextRulerTarget;
 import org.apache.uima.ruta.textruler.core.TextRulerToolkit;
 import org.apache.uima.ruta.textruler.extension.TextRulerLearnerDelegate;

 public class Wien extends TextRulerBasicLearner {

   TextRulerRulePattern hPattern;

   TextRulerRulePattern tPattern;

   Map<String, PatternPair> headTailCache = new HashMap<String, PatternPair>();

   Map<String, List<TextRulerRulePattern>> interTupelSeparatorsCache = new HashMap<String, List<TextRulerRulePattern>>();

   public static class PatternPair {
     public TextRulerRulePattern l = new TextRulerRulePattern();

     public TextRulerRulePattern r = new TextRulerRulePattern();
   }

   ArrayList<PatternPair> patternPairs = new ArrayList<PatternPair>();

   WienRule theRule;

   public enum constraint3ReturnType {
     C3_SUCCESS, C3_L1CandidateSuffixError, C3_TailCandidateH_L1Error, C3_TailCandidateRK_PrefixError, C3_TailCandidateNotFoundError, C3_TailCandidateSucceedsL1InTailError, C3_L1CandidateInterTupleSeparatorSuffixError, C3_TailCandidatePrecedesL1InterTupleSeparatorError
   };

   public Wien(String inputDir, String prePropTmFile, String tmpDir, String[] slotNames,
           Set<String> filterSet, boolean skip, TextRulerLearnerDelegate delegate) {
     super(inputDir, prePropTmFile, tmpDir, slotNames, filterSet, skip, delegate);
   }

   @Override
   public boolean collectNegativeCoveredInstancesWhenTesting() {
     return false;
   }

   @Override
   protected void doRun() {
     TextRulerToolkit.log("-- WIEN START");

     headTailCache.clear();
     interTupelSeparatorsCache.clear();

     for (int i = 0; i < slotNames.length; i++)
       patternPairs.add(new PatternPair());

     TextRulerTarget target = new TextRulerTarget(slotNames, this);

     exampleDocuments.createExamplesForTarget(target); // new multislot
     // target examples

     for (TextRulerExample e : exampleDocuments.getAllPositiveExamples()) {
       TextRulerToolkit.log("Example found: " + e);
     }

     try {
       boolean allOk = true;
       sendStatusUpdateToDelegate("Searching for right patterns...",
               TextRulerLearnerState.ML_RUNNING, false);
       if (!findRightPatterns())
         allOk = false;
       sendStatusUpdateToDelegate("Searching for left patterns...",
               TextRulerLearnerState.ML_RUNNING, false);
       if (!findLeftPatterns())
         allOk = false;
       sendStatusUpdateToDelegate("Searching for head, tail and left1 patterns...",
               TextRulerLearnerState.ML_RUNNING, false);
       if (!findHeadTailAndL1Patterns())
         allOk = false;
       // {
       // String s = "";
       // for (TextRulerRuleItem i : hPattern)
       // s += " "+i;
       // s += " ||||";
       // for (TextRulerRuleItem i : patternPairs.get(0).l)
       // s += " "+i;
       // s += " ||||";
       // for (TextRulerRuleItem i : tPattern)
       // s += " "+i;
       // TextRulerToolkit.log(s);
       // }

       if (allOk) {
         sendStatusUpdateToDelegate("Building multi-slot rule.", TextRulerLearnerState.ML_RUNNING,
                 false);
         theRule = new WienRule(this, target);
         List<TextRulerSlotPattern> rPatterns = theRule.getPatterns();
         int totalItemCount = 0;
         for (int k = 0; k < slotNames.length; k++) {
           WienRuleItem slotItem = new WienRuleItem((TextRulerAnnotation) null);
           TextRulerSlotPattern rP = new TextRulerSlotPattern();
           rPatterns.add(rP);
           PatternPair p = patternPairs.get(k);
           for (int i = 0; i < p.l.size(); i++) {
             WienRuleItem item = (WienRuleItem) p.l.get(i);
             if (k == 0 && i == 0) // the very first rule item:
             {
               item = item.copy();
               // old version:
               // item.addCondition("-NEAR,wien_tail,10000000,false");
               item.addCondition("-AFTER(wien_tail)");
               item.addCondition("-PARTOF(wien_rulemark)");
             }
             rP.preFillerPattern.add(item);
             totalItemCount++;
           }
           rP.fillerPattern.add(slotItem.copy());
           totalItemCount++;
           for (int i = 0; i < p.r.size(); i++) {
             WienRuleItem item = (WienRuleItem) p.r.get(i);
             totalItemCount++;
             if (k == slotNames.length - 1 && i == p.r.size() - 1) // the
             // very
             // last
             // item
             {
               item = item.copy();
               item.addAction("MARK(wien_rulemark, 1, " + totalItemCount + ")");
             }
             rP.postFillerPattern.add(item);
           }
           totalItemCount++; // the inter-slot ALL*? item has to be
           // counted as well!
         }
         sendStatusUpdateToDelegate("Done", TextRulerLearnerState.ML_DONE, true);
       } else
         sendStatusUpdateToDelegate("Done - Not all patterns could be learned!",
                 TextRulerLearnerState.ML_DONE, true);
     } catch (Exception e) {
       TextRulerPlugin.error(e);
       sendStatusUpdateToDelegate("Aborted due to Exception!", TextRulerLearnerState.ML_ERROR, true);
     }
     headTailCache.clear();
     interTupelSeparatorsCache.clear();
     TextRulerToolkit.log("-- WIEN END");
   }

   protected boolean findRightPatterns() {
     TextRulerExampleDocument doc = exampleDocuments.getDocuments().get(0);
     boolean allFound = true;
     for (int k = 0; k < slotNames.length; k++) {
       List<TextRulerRulePattern> rightContexts = getRightContextForSlot(doc, k);
       System.out.println(rightContexts.get(0));
       int shortest = Integer.MAX_VALUE;
       for (TextRulerRulePattern p : rightContexts)
         shortest = p.size() < shortest ? p.size() : shortest;
       boolean found = false;
       for (int len = 1; len <= shortest; len++) {
         TextRulerRulePattern subPattern = rightContexts.get(0).subPattern(0, len);
         if (testConstraint1(subPattern, k)) {
           // for (TextRulerRuleItem i : subPattern)
           // ((WienRuleItem)i).getWordConstraint().setGeneralizeLinkMarkUp(true);
           patternPairs.get(k).r = subPattern;
           TextRulerToolkit.log("right " + k + ": " + subPattern);
           found = true;
           break;
         }
       }
       if (!found)
         allFound = false;
     }
     return allFound;
   }

   protected boolean findLeftPatterns() {
     TextRulerExampleDocument doc = exampleDocuments.getDocuments().get(0);
     // skip l 0 !
     if (slotNames.length < 2)
       return true;
     boolean allFound = true;
     for (int k = 1; k < slotNames.length; k++) {
       List<TextRulerRulePattern> leftContexts = getLeftContextForSlot(doc, k);
       int shortest = Integer.MAX_VALUE;
       for (TextRulerRulePattern p : leftContexts)
         shortest = p.size() < shortest ? p.size() : shortest;
       TextRulerRulePattern sourcePattern = leftContexts.get(0);
       boolean found = false;
       for (int len = 1; len <= shortest; len++) {
         // get suffix:
         TextRulerRulePattern subPattern = sourcePattern.subPattern(sourcePattern.size() - len, len);
         if (testConstraint2(subPattern, k)) {
           patternPairs.get(k).l = subPattern;
           for (TextRulerRuleItem i : subPattern)
             ((WienRuleItem) i).getWordConstraint().setGeneralizeLinkMarkUp(true);
           TextRulerToolkit.log("left " + k + ": " + subPattern);
           found = true;
           break;
         }
       }
       if (!found)
         allFound = false;
     }
     return allFound;
   }

   protected boolean findHeadTailAndL1Patterns() {
     List<TextRulerExampleDocument> docs = exampleDocuments.getDocuments();
     TextRulerExampleDocument doc0 = docs.get(0);
     TextRulerRulePattern head = new TextRulerRulePattern();
     TextRulerRulePattern tail = new TextRulerRulePattern();
     getPageHeadAndTailPortion(doc0, head, tail);

     final class HLCandidate {
       public TextRulerRulePattern head = new TextRulerRulePattern();

       public TextRulerRulePattern l1 = new TextRulerRulePattern();
     }

     // a small optimization:
     // find out the maximum possible length for l1 in doc0 since l1 is much
     // smaller than the possible head length!
     List<TextRulerRulePattern> interTupleSeparators = getInterTupleSepatators(doc0);
     int shortestL1 = head.size() - 1;
     for (TextRulerRulePattern its : interTupleSeparators)
       shortestL1 = its.size() < shortestL1 ? its.size() : shortestL1;

     List<HLCandidate> hlCandidates = new ArrayList<HLCandidate>();
     // create candidates for each separation of the head and tail patterns:
     for (int separator = head.size() - 1; separator > 0; separator--) {
       HLCandidate c = new HLCandidate();
       for (int i = 0; i < head.size(); i++) {
         if (i < separator)
           c.head.add(head.get(i));
         else {
           WienRuleItem it = (WienRuleItem) head.get(i).copy();
           it.getWordConstraint().setGeneralizeLinkMarkUp(true);
           c.l1.add(it);
         }
       }
       hlCandidates.add(c);
       TextRulerToolkit.log(c.head.size() + " vs. " + c.l1.size());
       if (c.l1.size() >= shortestL1)
         break;
     }

     long total = 0;

     // get total h l1 t combination count:
     long tCand = (tail.size() * (tail.size() + 1)) / 2;
     for (HLCandidate c : hlCandidates) {
       total += ((c.head.size() - 1) * (c.head.size())) / 2;
     }
     total *= tCand;

     long current = 0;
     int oldPercent = -1;

     for (HLCandidate c : hlCandidates) {
       // for each "candidate" which represents a l1 suffix pattern of the
       // head tokens and a rest pattern for the h pattern,
       // we have to create every sub pattern of the remaining h pattern as
       // a h candidate:
       TextRulerRulePattern l1 = c.l1;
       TextRulerRulePattern h = null;

       boolean l1Sucks = false;

       for (int endI = c.head.size() - 1; endI > 0; endI--) {
         for (int startI = endI; startI > 0; startI--) {
           h = new TextRulerRulePattern();
           for (int i = startI; i <= endI; i++)
             h.add(c.head.get(i));

           // now for each h candidate we have to create each t
           // candidate:
           TextRulerRulePattern t = null;
           for (int tstartI = 0; tstartI < tail.size(); tstartI++) {
             for (int tendI = tstartI; tendI < tail.size(); tendI++) {
               int percent = Math.round(((float) current * 100 / total));
               if (percent != oldPercent) {
                 oldPercent = percent;
                 if (percent > 100)
                   percent = 100;
                 // TextRulerToolkit.log(current+" / "+total);
                 sendStatusUpdateToDelegate("Testing C3, " + percent + "%",
                         TextRulerLearnerState.ML_RUNNING, false);
               }
               if (shouldAbort())
                 return false;
               current++;

               t = new TextRulerRulePattern();
               for (int i = tstartI; i <= tendI; i++)
                 t.add(tail.get(i));

               // no we have a possible candidate triple: h, t and
               // l1:

               constraint3ReturnType c3Result = testConstraint3(h, t, l1);

               if (c3Result == constraint3ReturnType.C3_SUCCESS) {
                 hPattern = h;
                 tPattern = t;
                 patternPairs.get(0).l = l1;
                 return true;
               } else if (c3Result == constraint3ReturnType.C3_L1CandidateSuffixError
                       || c3Result == constraint3ReturnType.C3_L1CandidateInterTupleSeparatorSuffixError) {
                 l1Sucks = true;
                 current += tail.size() - tendI - 1;
                 break;
               } else if (c3Result == constraint3ReturnType.C3_TailCandidateH_L1Error
                       || c3Result == constraint3ReturnType.C3_TailCandidateSucceedsL1InTailError) {
                 // no special pruning options here... we simply
                 // have to test the next t-candidate
               } else if (c3Result == constraint3ReturnType.C3_TailCandidateRK_PrefixError
                       || c3Result == constraint3ReturnType.C3_TailCandidateNotFoundError) {
                 // all candidates with the same start item are
                 // bad, so leave this inner loop:
                 current += tail.size() - tendI - 1;
                 break;
               } else if (c3Result == constraint3ReturnType.C3_TailCandidatePrecedesL1InterTupleSeparatorError) {
                 // this is a problematic case... the cause could
                 // be L1 or the current Tail pattern,
                 // so we can't do nothing about it! just try the
                 // next t-candidate
               }
             }
             if (l1Sucks) {
               current += (tail.size() - tstartI - 1) * (tail.size() - tstartI) / 2;
               break;
             }
           }
           if (l1Sucks) {
             if (startI > 0)
               current += (startI - 1) * tCand;
             break;
           }
         }
         if (l1Sucks) {
           current += (endI * (endI + 1) / 2) * tCand;
           break;
         }
       }
     }
     return false;
   }

   protected void getPageHeadAndTailPortion(TextRulerExampleDocument doc, TextRulerRulePattern head,
           TextRulerRulePattern tail) {
     String key = doc.getCasFileName();
     if (headTailCache.containsKey(key)) {
       PatternPair p = headTailCache.get(key);
       head.addAll(p.l);
       tail.addAll(p.r);
     } else {
       CAS cas = doc.getCAS();
       TextRulerExample firstExample = doc.getPositiveExamples().get(0);
       TextRulerExample lastExample = doc.getPositiveExamples().get(
               doc.getPositiveExamples().size() - 1);
       TypeSystem ts = cas.getTypeSystem();
       Type tokenType = ts.getType(TextRulerToolkit.RUTA_ALL_TYPE_NAME);
       List<AnnotationFS> headTokens = TextRulerToolkit.getAnnotationsBeforePosition(cas,
               firstExample.getAnnotations()[0].getBegin(), 0, TextRulerToolkit
                       .getFilterSetWithSlotNames(slotNames, filterSet), tokenType);
       TextRulerAnnotation[] lastExampleAnnotations = lastExample.getAnnotations();
       List<AnnotationFS> tailTokens = TextRulerToolkit.getAnnotationsAfterPosition(cas,
               lastExampleAnnotations[lastExampleAnnotations.length - 1].getEnd(), 0,
               TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokenType);
       for (AnnotationFS afs : headTokens)
         head.add(new WienRuleItem(new TextRulerAnnotation(afs, doc)));
       for (AnnotationFS afs : tailTokens)
         tail.add(new WienRuleItem(new TextRulerAnnotation(afs, doc)));
       PatternPair p = new PatternPair();
       p.l.addAll(head);
       p.r.addAll(tail);
       headTailCache.put(key, p);
     }
   }

   protected List<TextRulerRulePattern> getInterTupleSepatators(TextRulerExampleDocument doc) {
     String key = doc.getCasFileName();
     if (interTupelSeparatorsCache.containsKey(key)) {
       return interTupelSeparatorsCache.get(key);
     } else {
       List<TextRulerRulePattern> result = new ArrayList<TextRulerRulePattern>();
       CAS cas = doc.getCAS();
       TypeSystem ts = cas.getTypeSystem();
       Type tokenType = ts.getType(TextRulerToolkit.RUTA_ALL_TYPE_NAME);
       List<TextRulerExample> examples = doc.getPositiveExamples();
       for (int i = 0; i < examples.size() - 1; i++) {
         // get separator between i'th and (i+1)'th example:
         TextRulerAnnotation[] exampleAnnotations1 = examples.get(i).getAnnotations();
         TextRulerAnnotation[] exampleAnnotations2 = examples.get(i + 1).getAnnotations();
         TextRulerAnnotation lastOf1 = exampleAnnotations1[exampleAnnotations1.length - 1];
         TextRulerAnnotation firstOf2 = exampleAnnotations2[0];
         List<AnnotationFS> theTokens = TextRulerToolkit.getAnnotationsWithinBounds(cas, lastOf1
                 .getEnd(), firstOf2.getBegin(), TextRulerToolkit.getFilterSetWithSlotNames(
                 slotNames, filterSet), tokenType);
         TextRulerRulePattern thePattern = new TextRulerRulePattern();
         for (AnnotationFS afs : theTokens)
           thePattern.add(new WienRuleItem(new TextRulerAnnotation(afs, doc)));
         if (thePattern.size() > 0)
           result.add(thePattern);

       }
       interTupelSeparatorsCache.put(key, result);
       return result;
     }
   }

   protected List<TextRulerRulePattern> getRightContextForSlot(TextRulerExampleDocument doc,
           int slotIndex) {
     List<TextRulerRulePattern> result = new ArrayList<TextRulerRulePattern>();
     CAS cas = doc.getCAS();
     TypeSystem ts = cas.getTypeSystem();
     Type tokenType = ts.getType(TextRulerToolkit.RUTA_ALL_TYPE_NAME);
     List<TextRulerExample> examples = doc.getPositiveExamples();
     boolean isLastSlot = slotIndex >= slotNames.length - 1;
     for (int ei = 0; ei < examples.size(); ei++) {
       boolean isLastExample = ei == examples.size() - 1;
       TextRulerExample e = examples.get(ei);
       // get stuff between slot slotIndex and slotIndex+1
       TextRulerAnnotation slotAnnotation = e.getAnnotations()[slotIndex];
       TextRulerAnnotation nextSlotAnnotation;

       if (!isLastSlot)
         nextSlotAnnotation = e.getAnnotations()[slotIndex + 1];
       else {
         if (!isLastExample) // the next slot annotation is the first
           // example annotation of the next template:
           nextSlotAnnotation = examples.get(ei + 1).getAnnotations()[0];
         else
           nextSlotAnnotation = null;
       }

       List<AnnotationFS> theTokens;
       if (nextSlotAnnotation == null)
         theTokens = TextRulerToolkit.getAnnotationsAfterPosition(cas, slotAnnotation.getEnd(), 0,
                 TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokenType);
       else
         theTokens = TextRulerToolkit.getAnnotationsWithinBounds(cas, slotAnnotation.getEnd(),
                 nextSlotAnnotation.getBegin(), TextRulerToolkit.getFilterSetWithSlotNames(
                         slotNames, filterSet), tokenType);
       TextRulerRulePattern thePattern = new TextRulerRulePattern();
       for (AnnotationFS afs : theTokens)
         thePattern.add(new WienRuleItem(new TextRulerAnnotation(afs, doc)));
       if (thePattern.size() > 0)
         result.add(thePattern);
     }
     return result;
   }

   protected List<TextRulerRulePattern> getLeftContextForSlot(TextRulerExampleDocument doc,
           int slotIndex) {
     if (slotIndex == 0)
       return null;
     List<TextRulerRulePattern> result = new ArrayList<TextRulerRulePattern>();
     CAS cas = doc.getCAS();
     TypeSystem ts = cas.getTypeSystem();
     Type tokenType = ts.getType(TextRulerToolkit.RUTA_ALL_TYPE_NAME);
     List<TextRulerExample> examples = doc.getPositiveExamples();

     boolean isFirstSlot = slotIndex == 0;
     for (int ei = 0; ei < examples.size(); ei++) {
       boolean isFirstExample = ei == 0;
       TextRulerExample e = examples.get(ei);
       // get stuff between slot slotIndex and slotIndex+1
       TextRulerAnnotation slotAnnotation = e.getAnnotations()[slotIndex];
       TextRulerAnnotation prevSlotAnnotation;

       if (!isFirstSlot)
         prevSlotAnnotation = e.getAnnotations()[slotIndex - 1];
       else {
         if (!isFirstExample)
           prevSlotAnnotation = examples.get(ei - 1).getAnnotations()[slotNames.length - 1];
         else
           prevSlotAnnotation = null;
       }

       List<AnnotationFS> theTokens;
       if (prevSlotAnnotation == null)
         theTokens = TextRulerToolkit.getAnnotationsBeforePosition(cas, slotAnnotation.getBegin(),
                 0, TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokenType);
       else
         theTokens = TextRulerToolkit.getAnnotationsWithinBounds(cas, prevSlotAnnotation.getEnd(),
                 slotAnnotation.getBegin(), TextRulerToolkit.getFilterSetWithSlotNames(slotNames,
                         filterSet), tokenType);
       TextRulerRulePattern thePattern = new TextRulerRulePattern();
       for (AnnotationFS afs : theTokens)
         thePattern.add(new WienRuleItem(new TextRulerAnnotation(afs, doc), true));
       if (thePattern.size() > 0)
         result.add(thePattern);
     }
     return result;
   }

   protected List<TextRulerRulePattern> getSlotFillerPatterns(TextRulerExampleDocument doc,
           int slotIndex) {
     List<TextRulerRulePattern> result = new ArrayList<TextRulerRulePattern>();
     CAS cas = doc.getCAS();
     TypeSystem ts = cas.getTypeSystem();
     Type tokenType = ts.getType(TextRulerToolkit.RUTA_ALL_TYPE_NAME);
     List<TextRulerExample> examples = doc.getPositiveExamples();
     for (TextRulerExample e : examples) {
       TextRulerAnnotation slotAnnotation = e.getAnnotations()[slotIndex];
       List<AnnotationFS> theTokens = TextRulerToolkit.getAnnotationsWithinBounds(cas,
               slotAnnotation.getBegin(), slotAnnotation.getEnd(), TextRulerToolkit
                       .getFilterSetWithSlotNames(slotNames, filterSet), tokenType);
       TextRulerRulePattern thePattern = new TextRulerRulePattern();
       for (AnnotationFS afs : theTokens)
         thePattern.add(new WienRuleItem(new TextRulerAnnotation(afs, doc)));
       if (thePattern.size() > 0)
         result.add(thePattern);
     }
     return result;
   }

   protected constraint3ReturnType testConstraint3(TextRulerRulePattern h, TextRulerRulePattern t,
           TextRulerRulePattern l1) {
     for (TextRulerExampleDocument doc : exampleDocuments.getDocuments()) {
       constraint3ReturnType r = testConstraint3(doc, h, t, l1);
       if (r != constraint3ReturnType.C3_SUCCESS)
         return r;
     }
     return constraint3ReturnType.C3_SUCCESS;
   }

   protected boolean testConstraint1(TextRulerExampleDocument doc, TextRulerRulePattern rk, int k) {
     List<TextRulerRulePattern> rightContexts = getRightContextForSlot(doc, k);
     for (TextRulerRulePattern rx : rightContexts) {
       if (rx.find(rk) != 0)
         return false;
     }
     List<TextRulerRulePattern> contents = getSlotFillerPatterns(doc, k);
     for (TextRulerRulePattern c : contents) {
       if (c.find(rk) >= 0)
         return false;
     }

     return true;
   }

   protected boolean testConstraint1(TextRulerRulePattern rk, int k) {
     for (TextRulerExampleDocument doc : exampleDocuments.getDocuments()) {
       if (!testConstraint1(doc, rk, k))
         return false;
     }
     return true;
   }

   protected boolean testConstraint2(TextRulerExampleDocument doc, TextRulerRulePattern lk, int k) {
     List<TextRulerRulePattern> leftContexts = getLeftContextForSlot(doc, k);
     for (TextRulerRulePattern lx : leftContexts) {
       if (lx.size() < lk.size())
         return false;
       int pos = lx.find(lk);
       if (pos < 0 || pos != lx.size() - lk.size())
         return false;
     }
     return true;
   }

   protected boolean testConstraint2(TextRulerRulePattern lk, int k) {
     for (TextRulerExampleDocument doc : exampleDocuments.getDocuments()) {
       if (!testConstraint2(doc, lk, k))
         return false;
     }
     return true;
   }

   protected constraint3ReturnType testConstraint3(TextRulerExampleDocument doc,
           TextRulerRulePattern h, TextRulerRulePattern t, TextRulerRulePattern l1) {
     final boolean logReasons = false;

     TextRulerRulePattern head = new TextRulerRulePattern();
     TextRulerRulePattern tail = new TextRulerRulePattern();

     getPageHeadAndTailPortion(doc, head, tail);

     // 1: l1 must be a proper suffix of the portion between the end of h and
     // the first slot filler:
     // (head / h) / l1 = l1

     int hPos = head.find(h);

     // TOOD precalculate this outside this method ?
     TextRulerRulePattern restForL1 = head.subPattern(hPos + h.size(), -1).copy();
     for (TextRulerRuleItem it : restForL1)
       ((WienRuleItem) it).getWordConstraint().setGeneralizeLinkMarkUp(true);
     int l1Pos = restForL1.find(l1);
     if (l1Pos < 0 || l1Pos != restForL1.size() - l1.size()) {
       TextRulerToolkit.logIf(logReasons, "REASON 1\n\tl1         \t" + l1 + "\n\trestforl1\t"
               + restForL1);
       return constraint3ReturnType.C3_L1CandidateSuffixError;
     }

     // 2: t must not occur in the subpattern after h and before l1
     if (l1Pos > 0) {
       TextRulerRulePattern patternBetweenHandL1 = restForL1.subPattern(0, l1Pos);
       if (patternBetweenHandL1.size() >= t.size()) {
         if (patternBetweenHandL1.find(t) >= 0) {
           TextRulerToolkit.logIf(logReasons, "REASON 2");
           return constraint3ReturnType.C3_TailCandidateH_L1Error;
         }
       }
     }

     // 2a: addons, not specified in WIEN paper !!
     TextRulerRulePattern lastSlotRightPattern = patternPairs.get(slotNames.length - 1).r;
     if (t.find(lastSlotRightPattern) == 0) // the right boundary of the last
     // slot may not be part of the
     // tail pattern!
     {
       TextRulerToolkit.logIf(logReasons, "REASON 3: " + lastSlotRightPattern + "\tTail: " + t);
       return constraint3ReturnType.C3_TailCandidateRK_PrefixError;
     }

     int tPos = tail.find(t);
     if (tPos < 0) {
       TextRulerToolkit.logIf(logReasons, "REASON 4");
       return constraint3ReturnType.C3_TailCandidateNotFoundError;
     } // this is an own constraint definition: if a document does not have
     // the tail in it,
     // what should we do then ? is this a n error or is this okay since the
     // document may not have any tail after the data ?

     // 3: l1 must not precede t in the page's tail:
     int l1tPos = tail.find(l1);
     if (l1tPos >= 0) // l1 occurs in the page's tail:
     {
       if (l1tPos < tPos) {
         TextRulerToolkit.logIf(logReasons, "REASON 5");
         return constraint3ReturnType.C3_TailCandidateSucceedsL1InTailError;
       }
     }

     List<TextRulerRulePattern> interTupleSeparators = getInterTupleSepatators(doc);

     for (TextRulerRulePattern itSep : interTupleSeparators) {
       // 4: l1 must be a proper suffix of each of the inter-tuple
       // separators:
       TextRulerRulePattern itSepCopy = itSep.copy();
       for (TextRulerRuleItem it : itSepCopy)
         ((WienRuleItem) it).getWordConstraint().setGeneralizeLinkMarkUp(true);
       int l1itsPos = itSepCopy.find(l1);
       if (l1itsPos < 0 || l1itsPos != itSepCopy.size() - l1.size()) {
         TextRulerToolkit.logIf(logReasons, "REASON 6: \n\tl1\t" + l1 + "\n\titSep\t" + itSep);
         return constraint3ReturnType.C3_L1CandidateInterTupleSeparatorSuffixError;
       }

       // 5: t must never precede l1 in any inter-tuple separator:
       int itstPos = itSep.find(t);
       if (itstPos >= 0 && itstPos < l1itsPos) {
         TextRulerToolkit.logIf(logReasons, "REASON 7");
         return constraint3ReturnType.C3_TailCandidatePrecedesL1InterTupleSeparatorError;
       }

     }
     return constraint3ReturnType.C3_SUCCESS;
   }

   public String getResultString() {
     if (theRule == null)
       return "<no results yet>";
     String result = getFileHeaderString(true) + "DECLARE wien_tail;\n" + "DECLARE wien_rulemark;\n"
             + "DECLARE wien_content;\n" + "BOOLEAN wien_redo;\n\n"
             + "// tail/head/content area stuff:\n";

     TextRulerRulePattern hCopy = hPattern.copy();

     ((WienRuleItem) hCopy.get(0)).addCondition("-PARTOF(wien_content)");
     result += hCopy + " ALL*?{->MARK(wien_content)};\n";

     TextRulerRulePattern tCopy = tPattern.copy();
     ((WienRuleItem) tCopy.get(0)).addCondition("PARTOF(wien_content)");

     result += tCopy + "{->MARK(wien_tail";
     if (tPattern.size() > 1)
       result += ", 1, " + tPattern.size();
     result += ")};\n\n";

     result += "BLOCK(findData) wien_content {\n"
             + "\t// find out if tail is before the next occurence of l1\n"
             + "\t"
             + theRule.getRuleString()
             + "\n"
             + "\tDocument{->ASSIGN(wien_redo, false)};\n"
             + "\twien_tail{PARTOF(wien_rulemark)->UNMARK(wien_tail), ASSIGN(wien_redo, true)}; // remove tail marks that are no longer relevant for us after the last rule !\n"
             + "\tDocument{IF(wien_redo)->CALL(filename.findData)};\n" + "}\n";

     result += "\n// cleaning up:\n" + "wien_tail{->UNMARK(wien_tail)};\n"
             + "wien_rulemark{->UNMARK(wien_rulemark)};\n"
             + "wien_content{->UNMARK(wien_content)};\n";
     return result;
   }

   public void setParameters(Map<String, Object> params) {
   }

 }