trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java - uima-ruta - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.apache.uima.ruta.resource;

 import java.io.BufferedReader;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;

 import org.apache.uima.cas.FSIterator;
 import org.apache.uima.cas.Type;
 import org.apache.uima.cas.text.AnnotationFS;
 import org.apache.uima.ruta.RutaStream;
 import org.apache.uima.ruta.type.RutaBasic;
 import org.springframework.core.io.FileSystemResource;
 import org.springframework.core.io.Resource;

 /**
  * Class MultiTreeWordList.
  *
  *
  */
 public class MultiTreeWordList implements RutaWordList {

   private static final String ENCODING = "UTF-8";

   private MultiTreeWordListPersistence persistence = new MultiTreeWordListPersistence();

   /** The root of the TreeWordList. */
   protected MultiTextNode root;

   /** The cost model we are using. */
   private EditDistanceCostMap costMap;

   /**
    * Default constructor.
    */
   public MultiTreeWordList() throws IOException {
     this(new String[] {});
   }

   /**
    * Default constructor uses just one file.
    *
    * @param pathname
    *          the pathname of the used file.
    */
   public MultiTreeWordList(String pathname) throws IOException {
     this(new FileSystemResource(pathname));
   }

   /**
    * @param lists Resources to load.
    * @throws IOException When there is a problem reading a resource.
    */
   public MultiTreeWordList(Resource... lists) throws IOException {
     this.root = new MultiTextNode();
     this.costMap = new EditDistanceCostMap();

     for (Resource list : lists) {
       // check if the resource is a directory
       File directory = null;
       try {
         directory = list.getFile();
       } catch (IOException e) {
         // resource is not on the file system
         directory = null;
       }

       if (directory != null && directory.isDirectory()) {
         // resource is a directory, load its content
         for (File data : directory.listFiles()) {
           load(new FileSystemResource(data));
         }
       } else {
         // resource is not a directory, load it normally
         load(list);
       }
     }
   }

   /**
    * Constructor from an open stream. This method will close the stream.
    *
    * @param stream
    *          the stream to read the file from.
    * @param name
    *          associated name
    */
   public MultiTreeWordList(InputStream stream, String name) throws IOException {
     this.root = new MultiTextNode();
     this.costMap = new EditDistanceCostMap();

     if (name.endsWith(".mtwl"))
       persistence.readMTWL(root, stream, ENCODING);
     if (name.endsWith(".txt"))
       buildNewTree(stream, name);
   }

   /**
    * Constructs a TreeWordList from a file with path = filename
    *
    * @param pathnames
    *          path of the file to create a TextWordList from
    */
   public MultiTreeWordList(String[] pathnames) throws IOException {
     for (String pathname : pathnames) {
       load(new FileSystemResource(pathname));
     }
   }

   /**
    * Load a resource in this word list.
    *
    * @param resource Resource to load. The resource's name must end with .txt or .mtwl.
    * @throws IOException When there is a problem reading the resource.
    */
   private void load(Resource resource) throws IOException {
     final String name = resource.getFilename();
     InputStream stream = null;
     try {
       stream = resource.getInputStream();
       if (name == null) {
         throw new IllegalArgumentException("List does not have a name.");
       } else if (name.endsWith(".txt")) {
         buildNewTree(stream, name);
       } else if (name.endsWith(".mtwl")) {
         persistence.readMTWL(root, stream, "UTF-8");
       } else {
         throw new IllegalArgumentException("File name should end with .mtwl or .txt, found " + name);
       }
     } finally {
       if (stream != null) {
         stream.close();
       }
     }
   }

   /**
    * Creates a new Tree in the existing treeWordList from a file with path pathname
    *
    * @param stream
    *          Input stream for the file containing the words for the treeWordList
    * @param name
    *          Associated name for the file
    */
   public void buildNewTree(InputStream stream, String name) throws IOException {
     BufferedReader br = new BufferedReader(new InputStreamReader(stream, ENCODING));
     String s = null;

     while ((s = br.readLine()) != null) {
       addWord(s.trim(), name);
     }
     stream.close();
     br.close();
   }

   /**
    * Add a new String into the MultiTreeWordList.
    *
    * @param s
    *          The String to add
    * @param type
    *          The type of the string.
    */
   public void addWord(String s, String type) {

     // Create Nodes from all chars of the strings besides the last one
     MultiTextNode pointer = root;

     for (Character each : s.toCharArray()) {

       MultiTextNode childNode = pointer.getChildNode(each);

       if (childNode == null) {
         childNode = new MultiTextNode(each, false);
         pointer.addChild(childNode);
       }

       pointer = childNode;
     }
     pointer.setWordEnd(s.length() > 0);
     pointer.addType(type);
   }

   /**
    * Returns all Types contained by the MultiTreeWordList.
    *
    * @return all Types contained by the MultiTreeWordList.
    */
   public Collection<String> getTypes() {
     return getTypeCone(root);
   }

   /**
    * Returns all types contained by the cone of the MultiTextNode node, including the types of node
    * itself.
    *
    * @param node
    *          The node where we start, the root of the cone.
    * @return all types contained by the cone of the MultiTextNode node, including the types of node
    *         itself.
    */
   public Collection<String> getTypeCone(MultiTextNode node) {

     List<String> returnList = new LinkedList<String>();

     if (node.getTypes() != null) {
       for (String s : node.getTypes()) {
         if (!returnList.contains(s)) {
           returnList.add(s);
         }
       }
     }

     for (Character c : node.getChildren().keySet()) {
       for (String s : getTypeCone(node.getChildNode(c))) {
         if (!returnList.contains(s)) {
           returnList.add(s);
         }
       }
     }

     return returnList;
   }

   /**
    * Returns all strings contained by the MultiTreeWordList.
    *
    * @return All strings contained by the MultiTreeWordList.
    */
   public Collection<String> keySet() {
     List<String> keySet = new LinkedList<String>(keySet(root, ""));
     Collections.sort(keySet);
     return keySet;
   }

   /**
    * Returns all strings contained by the cone of the MultiTextNode node and uses prefix as the
    * prefix of all the strings.
    *
    * @param node
    *          the node we are considering.
    * @param prefix
    *          the prefix until now.
    * @return All strings contained by the cone of the MultiTextNode node.
    */
   private Collection<String> keySet(MultiTextNode node, String prefix) {

     List<String> resultList = new LinkedList<String>();

     // Recursion stop.
     if (node.isWordEnd()) {
       resultList.add(prefix);
     }

     // Recursion step.
     for (Character c : node.getChildren().keySet()) {
       String temp = prefix + String.valueOf(c);
       resultList.addAll(keySet(node.getChildNode(c), temp));
     }

     return resultList;
   }

   /**
    * Returns all types of the very string s.
    *
    * @param s
    *          The string with the types.
    * @return All types from the very string s.
    */
   public Collection<String> getTypes(String s) {
     return getTypes(s, false);
   }

   /**
    * Returns the types of the string s.
    *
    * @param s
    *          The string with the types.
    * @param ignoreCase
    *          Indicates, whether we search case sensitive or not.
    * @return The types of the string s.
    */
   public Collection<String> getTypes(String s, boolean ignoreCase) {

     // Collection<Set<String>> types = editDistanceClever(root, s, "", 0.0,
     // 0,
     // ignoreCase, false, costMap, false, false).values();
     // Map<String, Set<String>> types = editDistanceClever(root, s, "", 0.0,
     // 0, ignoreCase, false, costMap, false, false);
     Map<String, Set<String>> types = editDistance(s, 0, ignoreCase, "");
     Set<String> returnSet = new HashSet<String>();

     for (Entry<String, Set<String>> each : types.entrySet()) {
       returnSet.addAll(each.getValue());
     }

     return returnSet;
   }

   /**
    * Returns a list of types which belong to a string.
    *
    * @param string
    *          The string which types we want to have.
    * @param ignoreCase
    *          Indicates whether we search case sensitive or not.
    * @param ignoreLength
    *          If the length of the string is less than of equal to this, we search case insensitive.
    * @param edit
    *          Indicates whether we use an edit distance or not.
    * @param distance
    *          The edit distance to a string contained by the MultiTreeWordList.
    * @param ignoreToken
    *          Characters which can be ignored.
    * @return Returns a list of types which belong to a string.
    */

   public List<String> contains(String string, boolean ignoreCase, int ignoreLength, boolean edit,
           double distance, String ignoreToken) {

     List<String> resultList = new LinkedList<String>();
     Map<String, Set<String>> editDistance;

     if (string.length() >= ignoreLength && ignoreCase) {
       editDistance = editDistance(string, (int) distance, true, ignoreToken, false);
     } else {
       editDistance = editDistance(string, (int) distance, false, ignoreToken, false);
     }
     for (Entry<String, Set<String>> each : editDistance.entrySet()) {
       resultList.addAll(each.getValue());
     }
     return resultList;
   }

   /**
    * Checks whether a string is contained by the MultiTreeWordList or not.
    *
    * @param string
    *          The string which is contained or not.
    * @param ignoreCase
    *          Indicates whether we search case sensitive or not.
    * @param ignoreLength
    *          If the length of the string is less than of equal to this, we search case insensitive.
    * @param edit
    *          Indicates whether we use an edit distance or not.
    * @param distance
    *          The edit distance to a string contained by the MultiTreeWordList.
    * @param ignoreToken
    *          Characters which can be ignored.
    * @return true, if the string is contained by the MultiTreeWordList, false otherwise.
    */
   public boolean containsBool(String string, boolean ignoreCase, int ignoreLength, boolean edit,
           double distance, String ignoreToken) {
     return editDistanceBool(root, string, "", distance, 0, ignoreCase, false, costMap);
   }

   /**
    * Checks whether the tree contains exaclty the string s.
    *
    * @param s
    *          The string which is contained or not.
    * @return True, if the TreeWordList contains exactly the string s, false otherwise.
    */
   public boolean contains(String s) {
     return contains(s, false);
   }

   /**
    * Checks whether the tree contains the string s.
    *
    * @param s
    *          The string which is contained or not.
    * @param ignoreCase
    *          Indicates whether we search case sensitive or not.
    * @return True, if the TreeWordList contains the string s, false otherwise.
    */
   public boolean contains(String s, boolean ignoreCase) {
     return contains(s, ignoreCase, 0, new char[] {}, 0, true);
   }

   /**
    * Checks if the MultiTreeWordList contains the string s.
    *
    * @param s
    *          The string which is contained or not.
    * @param ignoreCase
    *          Indicates whether we search case sensitive or not.
    * @param size
    *          The index of the string.
    * @param ignoreChars
    *          Characters which can be ignored.
    * @param maxIgnoreChars
    *          The maximum number of ignored characters.
    * @return true, if TreeWordList contains the string, false otherwise.
    */
   public boolean contains(String s, boolean ignoreCase, int size, char[] ignoreChars,
           int maxIgnoreChars, boolean ignoreWS) {

     EditDistanceCostMap edm = new EditDistanceCostMap();

     for (Character c : ignoreChars) {
       edm.setDeleteCosts(c, 0.0);
     }

     return editDistanceBool(root, s, "", maxIgnoreChars, 0, ignoreCase, false, edm);
   }

   /**
    * Checks if the MultiTreeWordList contains a prefix of the string s.
    *
    * @param s
    *          The string which is contained or not.
    * @param ignoreCase
    *          Indicates whether we search case sensitive or not.
    * @param size
    *          The index of the string.
    * @param ignoreChars
    *          Characters which can be ignored.
    * @param maxIgnoreChars
    *          The maximum number of ignored characters.
    * @return true, if TreeWordList contains a prefix of the string, false otherwise.
    */
   public boolean containsFragment(String s, boolean ignoreCase, int size, char[] ignoreChars,
           int maxIgnoreChars, boolean ignoreWS) {
     MultiTextNode pointer = root;
     return recursiveContains(pointer, s, 0, ignoreCase && s.length() > size, true, ignoreChars,
             maxIgnoreChars);
   }

   /**
    * Checks whether prefix of a string is contained by the MultiTreeWordList or not.
    *
    * @param string
    *          The string whose prefix is contained or not.
    * @param ignoreCase
    *          Indicates whether we search case sensitive or not.
    * @param ignoreLength
    *          If the length of the string is less than of equal to this, we search case insensitive.
    * @param edit
    *          Indicates whether we use an edit distance or not.
    * @param distance
    *          The edit distance to a string contained by the MultiTreeWordList.
    * @param ignoreToken
    *          Characters which can be ignored.
    * @return true, if a prefix of the string is contained by the MultiTreeWordList, false otherwise.
    */
   public boolean containsFragmentBool(String string, boolean ignoreCase, int ignoreLength,
           boolean edit, double distance, String ignoreToken) {

     if (string.length() >= ignoreLength && ignoreCase) {
       return editDistanceBool(root, string, "", distance, 0, true, true, costMap);
     } else {
       return editDistanceBool(root, string, "", distance, 0, false, true, costMap);
     }
   }

   /**
    * Returns a list of types which belong to a prefix of a string that is contained by the
    * MultiTreeWordList.
    *
    * @param string
    *          The string whose prefix's types we are interested in.
    * @param ignoreCase
    *          Indicates whether we search case sensitive or not.
    * @param ignoreLength
    *          If the length of the string is less than of equal to this, we search case insensitive.
    * @param edit
    *          Indicates whether we use an edit distance or not.
    * @param distance
    *          The edit distance to a string contained by the MultiTreeWordList.
    * @param ignoreToken
    *          Characters which can be ignored.
    * @return A list of types which belong to a prefix of a string that is contained by the
    *         MultiTreeWordList.
    */
   public List<String> containsFragment(String string, boolean ignoreCase, int ignoreLength,
           boolean edit, double distance, String ignoreToken) {

     List<String> resultList = new LinkedList<String>();
     Map<String, Set<String>> resultMap = null;

     if (string.length() >= ignoreLength && ignoreCase) {
       resultMap = editDistance(string, (int) distance, true, ignoreToken, true);
     } else {
       resultMap = editDistance(string, (int) distance, false, ignoreToken, true);
     }

     for (Set<String> set : resultMap.values()) {
       for (String s : set) {
         if (!resultList.contains(s)) {
           // resultList.addAll(resultMap.get(set));
           resultList.add(s);
         }
       }
     }

     return resultList;
   }

   /**
    * Returns true, if the MultiTreeWordList contains the string text, false otherwise.
    *
    * @param pointer
    *          The MultiTextNode we are looking at.
    * @param text
    *          The string which is contained or not.
    * @param index
    *          The index of the string text we checked until now.
    * @param ignoreCase
    *          Indicates whether we search case sensitive or not.
    * @param fragment
    *          Indicates whether we are looking for a prefix of the string text.
    * @param ignoreChars
    *          Characters which can be ignored.
    * @param maxIgnoreChars
    *          Maximum number of characters which are allowed to be ignored.
    * @return True, if the TreeWordList contains the string text, false otherwise.
    */
   private boolean recursiveContains(MultiTextNode pointer, String text, int index,
           boolean ignoreCase, boolean fragment, char[] ignoreChars, int maxIgnoreChars) {

     if (pointer == null) {
       return false;
     }

     if (index == text.length()) {
       return fragment || pointer.isWordEnd();
     }

     char charAt = text.charAt(index);
     boolean charAtIgnored = false;

     if (ignoreChars != null) {
       for (char each : ignoreChars) {
         if (each == charAt) {
           charAtIgnored = true;
           break;
         }
       }
       charAtIgnored &= index != 0;
     }

     int next = ++index;

     if (ignoreCase) {

       // Lower Case Node.
       MultiTextNode childNodeL = pointer.getChildNode(Character.toLowerCase(charAt));

       // Upper Case Node.
       MultiTextNode childNodeU = pointer.getChildNode(Character.toUpperCase(charAt));

       if (charAtIgnored && childNodeL == null && childNodeU == null) {
         // Character is ignored and does not appear.
         return recursiveContains(pointer, text, next, ignoreCase, fragment, ignoreChars,
                 maxIgnoreChars);
       } else {
         // Recursion.
         return recursiveContains(childNodeL, text, next, ignoreCase, fragment, ignoreChars,
                 maxIgnoreChars)
                 || recursiveContains(childNodeU, text, next, ignoreCase, fragment, ignoreChars,
                         maxIgnoreChars);
       }

     } else {
       // Case sensitive.
       MultiTextNode childNode = pointer.getChildNode(charAt);

       if (charAtIgnored && childNode == null) {
         // Recursion with incremented index.
         return recursiveContains(pointer, text, next, ignoreCase, fragment, ignoreChars,
                 maxIgnoreChars);
       } else {
         // Recursion with new node.
         return recursiveContains(childNode, text, next, ignoreCase, fragment, ignoreChars,
                 maxIgnoreChars);
       }
     }
   }

   public Collection<AnnotationFS> find(RutaStream stream, Map<String, Type> typeMap,
           boolean ignoreCase, int ignoreLength, boolean edit, double distance, String ignoreToken) {

     Collection<AnnotationFS> results = new HashSet<AnnotationFS>();
     stream.moveToFirst();
     FSIterator<AnnotationFS> streamPointer = stream.copy();

     while (stream.isValid()) {
       RutaBasic anchorBasic = (RutaBasic) stream.get();
       streamPointer.moveTo(anchorBasic);

       List<RutaBasic> basicsToAdd = new ArrayList<RutaBasic>();
       basicsToAdd.add(anchorBasic);
       String text = anchorBasic.getCoveredText();
       StringBuilder candidate = new StringBuilder(text);
       String lastCandidate = candidate.toString();
       List<AnnotationFS> interResults = new ArrayList<AnnotationFS>();

       while (streamPointer.isValid()) {

         List<String> types = containsFragment(candidate.toString(), ignoreCase, ignoreLength, edit,
                 distance, ignoreToken);

         if (!types.isEmpty()) {
           streamPointer.moveToNext();
           if (streamPointer.isValid()) {
             RutaBasic next = (RutaBasic) streamPointer.get();
             // List<String> contains = contains(candidate,
             // ignoreCase,
             // ignoreLength, edit, distance, ignoreToken);

             tryToCreateAnnotation(stream, results, basicsToAdd, candidate.toString(), interResults,
                     ignoreCase, ignoreLength, edit, distance, ignoreToken, typeMap);
             //
             lastCandidate = candidate.toString();
             candidate.append(next.getCoveredText());
             basicsToAdd.add(next);

           } else {
             // !streamPointer.isValid();
             tryToCreateAnnotation(stream, results, basicsToAdd, lastCandidate, interResults,
                     ignoreCase, ignoreLength, edit, distance, ignoreToken, typeMap);
           }
         } else {

           // containsFragment.isEmpty();
           // basicsToAdd.remove(basicsToAdd.size() - 1);
           // tryToCreateAnnotation(stream, results, basicsToAdd,
           // lastCandidate, interResults, ignoreCase,
           // ignoreLength, edit, distance, ignoreToken, typeMap);

           // breaks inner while()-loop.
           break;
         }

       }

       stream.moveToNext();
     }

     return results;
   }

   public List<AnnotationFS> find(RutaStream stream, boolean ignoreCase, int size,
           char[] ignoreChars, int maxIgnoredChars, boolean ignoreWS) {
     assert false;
     return new ArrayList<AnnotationFS>();
   }

   private void tryToCreateAnnotation(RutaStream stream, Collection<AnnotationFS> results,
           List<RutaBasic> basicsToAdd, String lastCandidate, List<AnnotationFS> interResult,
           boolean ignoreCase, int ignoreLength, boolean edit, double distance, String ignoreToken,
           Map<String, Type> map) {

     List<String> contains = contains(lastCandidate, ignoreCase, ignoreLength, edit, distance,
             ignoreToken);
     if (basicsToAdd.size() >= 1 || contains.isEmpty()) {
       for (String each : contains) {
         Type type = map.get(each);
         if (type != null) {
           int begin = basicsToAdd.get(0).getBegin();
           int end = basicsToAdd.get(basicsToAdd.size() - 1).getEnd();
           AnnotationFS newFS = stream.getCas().createAnnotation(type, begin, end);
           results.add(newFS);
         }
       }
     } else if (interResult != null && !interResult.isEmpty()) {
       results.addAll(interResult);
     }
   }

   /**
    * Returns a map with all strings with a specified edit distance to the string query as keys and
    * the files they belong to as values.
    *
    * @param query
    *          The query string.
    * @return A map with all strings with a specified edit distance to the string query as keys and
    *         the files they belong to as values.
    */
   public Map<String, Set<String>> editDistance(String query, int distance) {
     return editDistance(query, distance, false, "");
   }

   /**
    * Returns a map with all strings with a specified edit distance to the string query as keys and
    * the files they belong to as values.
    *
    * @param query
    *          The query string.
    * @param distance
    *          The specified edit distance.
    * @param ignoreCase
    *          Indicates whether we search case sensitive or not.
    * @return A map with all strings with a specified edit distance to the string query as keys and
    *         the files they belong to as values.
    */
   public Map<String, Set<String>> editDistance(String query, int distance, boolean ignoreCase,
           String ignoreToken) {
     return editDistance(query, distance, ignoreCase, ignoreToken, false);
   }

   /**
    * Returns a map with all strings with a specified edit distance to the string query as keys and
    * the files they belong to as values.
    *
    * @param query
    *          The query string.
    * @param distance
    *          The specified edit distance.
    * @param ignoreCase
    *          Indicates whether we search case sensitive or not.
    * @param fragment
    *          Indicates whether we search for fragments of the query string or not.
    * @return A map with all strings with a specified edit distance to the string query as keys and
    *         the files they belong to as values.
    */
   public Map<String, Set<String>> editDistance(String query, int distance, boolean ignoreCase,
           String ignoreToken, boolean fragment) {

     // The second alternative realizes the fragment functionality by
     // setting the insert costs of the ignored character to zero. This
     // is much more elegant and easier to maintain. I don't know if the
     // other way is faster, so I did not delete it yet.

     Map<Character, Double> oldInsertCosts = new HashMap<Character, Double>();
     EditDistanceCostMap edcm = new EditDistanceCostMap();

     // We need to store the old insert costs before we set them to zero.
     for (char c : ignoreToken.toCharArray()) {
       oldInsertCosts.put(c, edcm.getInsertCosts(c));
       edcm.setInsertCosts(c, 0.0);
     }

     Map<String, Set<String>> result = null;

     if (ignoreCase) {
       result = editDistanceClever(root, query.toLowerCase(), "", distance, 0, true, fragment, edcm,
               false, false);
     } else {
       result = editDistanceClever(root, query, "", distance, 0, false, fragment, edcm, false, false);
     }

     // Restoring of the old insert costs.
     for (Entry<Character, Double> c : oldInsertCosts.entrySet()) {
       edcm.setDeleteCosts(c.getKey(), c.getValue());
     }

     return result;
   }

   /**
    * Returns a map with all strings with a specified edit distance to the string query as keys and
    * the files they belong to as values.
    *
    * @param node
    *          The MultiTextNode which is under consideration at the moment.
    * @param query
    *          The query string.
    * @param result
    *          The result which matched until now.
    * @param distance
    *          The remaining edit distance.
    * @param index
    *          The index of the query string at the moment.
    * @param ignoreCase
    *          Indicates whether we search case sensitive or not.
    * @param fragment
    *          Indicates whether we search for fragments of the query string or not.
    * @param edm
    *          The edit distance cost map we are using.
    * @param lastActionInsert
    *          Indicates whether the last action was an insert action.
    * @param lastActionDelete
    *          Indicates whether the last action was a delete action.
    * @return A map with all strings with a specified edit distance to the string query as keys and
    *         the files they belong to as values.
    */
   private Map<String, Set<String>> editDistanceClever(MultiTextNode node, String query,
           String result, double distance, int index, boolean ignoreCase, boolean fragment,
           EditDistanceCostMap edm, boolean lastActionInsert, boolean lastActionDelete) {

     EditDistanceResultMap resultMap = new EditDistanceResultMap();

     if (!lastActionInsert) {
       // Delete.
       if (distance - edm.getDeleteCosts(node.getValue()) >= 0 && result.length() > 0) {
         resultMap.putAll(editDistanceClever(node, query, result,
                 distance - edm.getDeleteCosts(node.getValue()), index + 1, ignoreCase, fragment,
                 edm, false, true));
       }
     }

     // Recursion stop.
     if (node.isWordEnd() || fragment) {

       HashMap<String, Set<String>> temp = new HashMap<String, Set<String>>();

       double remainingInsertCosts = 0.0;

       // Accumulating remaining insert costs if the query is longer than
       // the word in the trie.
       for (int i = index; i < query.length(); i++) {
         remainingInsertCosts += edm.getInsertCosts(query.charAt(i));
       }

       if (remainingInsertCosts <= distance) {
         // if (remainingInsertCosts <= distance &&
         // !node.getTypes().isEmpty()) {
         // if (query.length() - index <= distance) {

         if (fragment) {
           temp.put(result, new HashSet<String>(getTypeCone(node)));
         } else {
           temp.put(result, new HashSet<String>(node.getTypes()));
         }

         resultMap.putAll(temp);
       }

       // Important: word end does not mean no children any more!
       if (node.getChildren() == null) {
         return resultMap;
       }
     }

     // Recursion.
     for (MultiTextNode tempNode : node.getChildren().values()) {

       if (index < query.length()) {
         if (ignoreCase) {
           if (Character.toLowerCase(tempNode.getValue()) == Character.toLowerCase(query
                   .charAt(index))) {
             resultMap.putAll(editDistanceClever(tempNode, query, result + tempNode.getValue(),
                     distance, index + 1, ignoreCase, fragment, edm, false, false));
           }
         } else {
           if (tempNode.getValue() == query.charAt(index)) {
             resultMap.putAll(editDistanceClever(tempNode, query, result + tempNode.getValue(),
                     distance, index + 1, ignoreCase, fragment, edm, false, false));
           }
         }
       }

       if (distance - edm.getReplaceCosts(node.getValue(), tempNode.getValue()) >= 0) {

         // Substitute.
         resultMap.putAll(editDistanceClever(tempNode, query, result + tempNode.getValue(), distance
                 - edm.getReplaceCosts(node.getValue(), tempNode.getValue()), index + 1, ignoreCase,
                 fragment, edm, false, false));
       }

       if (!lastActionDelete) {
         if (distance - edm.getInsertCosts(tempNode.getValue()) >= 0) {
           // Insert - use the same index twice.
           resultMap.putAll(editDistanceClever(tempNode, query, result + tempNode.getValue(),
                   distance - edm.getInsertCosts(tempNode.getValue()), index, ignoreCase, fragment,
                   edm, true, false));
         }
       }
     }

     return resultMap;
   }

   /**
    * Checks if a string is contained by the MultiTreeWordList.
    *
    * @param node
    *          The MultiTextNode which is under consideration at the moment.
    * @param query
    *          The query string.
    * @param result
    *          The result which matched until now.
    * @param distance
    *          The remaining edit distance.
    * @param index
    *          The index of the query string at the moment.
    * @param ignoreCase
    *          Indicates whether we search case sensitive or not.
    * @param fragment
    *          Indicates whether we search for fragments of the query string or not.
    * @param edm
    *          The edit distance cost map we are using.
    * @return A map with all strings with a specified edit distance to the string query as keys and
    *         the files they belong to as values.
    */
   private boolean editDistanceBool(MultiTextNode node, String query, String result,
           double distance, int index, boolean ignoreCase, boolean fragment, EditDistanceCostMap edm) {

     boolean deletion = false;
     boolean insertion = false;
     boolean substitution = false;
     boolean noop = false;

     // Recursion stop.
     if (fragment) {
       if (index == query.length()) {
         return true;
       }
     }

     if (node.isWordEnd()) {

       double remainingInsertCosts = 0.0;

       // Accumulating remaining insert costs if the query is longer than
       // the word in the trie.
       for (int i = index; i < query.length(); i++) {
         remainingInsertCosts += edm.getInsertCosts(query.charAt(i));
       }

       if (remainingInsertCosts <= distance) {
         // if (query.length() - index <= distance) {
         return true;
       }
     }

     // Delete.
     if (distance - edm.getDeleteCosts(node.getValue()) >= 0 && result.length() > 0) {
       deletion = editDistanceBool(node, query, result,
               distance - edm.getDeleteCosts(node.getValue()), index + 1, ignoreCase, fragment, edm);

       if (deletion) {
         return true;
       }
     }

     // Recursion.
     for (MultiTextNode tempNode : node.getChildren().values()) {

       if (index < query.length()) {
         if (ignoreCase) {
           if (Character.toLowerCase(tempNode.getValue()) == Character.toLowerCase(query
                   .charAt(index))) {
             noop = editDistanceBool(tempNode, query, result + tempNode.getValue(), distance,
                     index + 1, ignoreCase, fragment, edm);
           }
         } else {
           if (tempNode.getValue() == query.charAt(index)) {
             noop = editDistanceBool(tempNode, query, result + tempNode.getValue(), distance,
                     index + 1, ignoreCase, fragment, edm);
           }
         }

         if (noop) {
           return true;
         }
       }

       if (distance - edm.getReplaceCosts(node.getValue(), tempNode.getValue()) >= 0) {

         // Substitute.
         substitution = editDistanceBool(tempNode, query, result + tempNode.getValue(), distance
                 - edm.getReplaceCosts(node.getValue(), tempNode.getValue()), index + 1, ignoreCase,
                 fragment, edm);

         if (substitution) {
           return true;
         }
       }

       if (distance - edm.getInsertCosts(tempNode.getValue()) >= 0) {
         // Insert - use the same index twice.
         insertion = editDistanceBool(tempNode, query, result + tempNode.getValue(),
                 distance - edm.getInsertCosts(tempNode.getValue()), index, ignoreCase, fragment,
                 edm);

         if (insertion) {
           return true;
         }
       }

     }

     return false;
   }

   // private Map<String, Set<String>> editDistance(MultiTextNode node, String query, String result,
   // double distance, int index, boolean ignoreCase, String ignoreToken, boolean fragment,
   // EditDistanceCostMap edm) {
   //
   //
   // EditDistanceResultMap resultMap = new EditDistanceResultMap();
   //
   // // Delete.
   // if (distance - edm.getDeleteCosts(node.getValue()) >= 0 && result.length() > 0) {
   // resultMap.putAll(editDistance(node, query, result, distance
   // - edm.getDeleteCosts(node.getValue()), index + 1, ignoreCase, ignoreToken, fragment,
   // edm));
   // }
   //
   // // Recursion stop.
   // if (node.isWordEnd()) {
   //
   // HashMap<String, Set<String>> temp = new HashMap<String, Set<String>>();
   //
   // if (query.length() - index <= distance) {
   // temp.put(result, new HashSet<String>(node.getTypes()));
   // resultMap.putAll(temp);
   // }
   //
   // // Ignore token at the end of the word.
   // if (ignoreToken.contains(String.valueOf(node.getValue()))) {
   // temp.put(result, new HashSet<String>(node.getTypes()));
   // resultMap.putAll(temp);
   // }
   //
   // if (node.getChildren() == null) {
   // return resultMap;
   // }
   // }
   //
   // // Recursion.
   // for (MultiTextNode tempNode : node.getChildren().values()) {
   //
   // if (index < query.length()) {
   // if (ignoreCase) {
   // if (Character.toLowerCase(tempNode.getValue()) == Character.toLowerCase(query
   // .charAt(index))) {
   // resultMap.putAll(editDistance(tempNode, query, result + tempNode.getValue(), distance,
   // index + 1, ignoreCase, ignoreToken, fragment, edm));
   // }
   // } else {
   // if (tempNode.getValue() == query.charAt(index)) {
   // resultMap.putAll(editDistance(tempNode, query, result + tempNode.getValue(), distance,
   // index + 1, ignoreCase, ignoreToken, fragment, edm));
   // }
   // }
   // }
   //
   // if (distance - edm.getReplaceCosts(node.getValue(), tempNode.getValue()) >= 0) {
   //
   // // Substitute.
   // resultMap.putAll(editDistance(tempNode, query, result + tempNode.getValue(), distance
   // - edm.getReplaceCosts(node.getValue(), tempNode.getValue()), index + 1, ignoreCase,
   // ignoreToken, fragment, edm));
   // }
   //
   // // Ignore token.
   // if (ignoreToken.contains(String.valueOf(tempNode.getValue()))) {
   // resultMap.putAll(editDistance(tempNode, query, result + tempNode.getValue(), distance,
   // index, ignoreCase, ignoreToken, fragment, edm));
   // } else {
   // if (distance - edm.getInsertCosts(tempNode.getValue()) >= 0) {
   // // Insert - use the same index twice.
   // resultMap.putAll(editDistance(tempNode, query, result + tempNode.getValue(), distance
   // - edm.getInsertCosts(tempNode.getValue()), index, ignoreCase, ignoreToken,
   // fragment, edm));
   // }
   // }
   // }
   //
   // return resultMap;
   // }

   @Override
   public int hashCode() {
     final int prime = 31;
     int result = 1;
     result = prime * result + ((costMap == null) ? 0 : costMap.hashCode());
     result = prime * result + ((root == null) ? 0 : root.hashCode());
     return result;
   }

   @Override
   public boolean equals(Object obj) {
     if (this == obj)
       return true;
     if (obj == null)
       return false;
     if (getClass() != obj.getClass())
       return false;
     MultiTreeWordList other = (MultiTreeWordList) obj;
     if (costMap == null) {
       if (other.costMap != null)
         return false;
     } else if (!costMap.equals(other.costMap))
       return false;
     if (root == null) {
       if (other.root != null)
         return false;
     } else if (!root.equals(other.root))
       return false;
     return true;
   }

   public void createMTWLFile(String path) {
     persistence.createMTWLFile(root, path);
   }

 }