/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.uima.ruta.resource;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.commons.io.IOUtils;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.ruta.RutaStream;
import org.apache.uima.ruta.type.RutaBasic;
import org.springframework.core.io.FileSystemResource;
import org.springframework.core.io.Resource;

/**
 * Class MultiTreeWordList.
 * 
 * 
 */
public class MultiTreeWordList implements RutaWordList {

  private static final String ENCODING = "UTF-8";

  private MultiTreeWordListPersistence persistence = new MultiTreeWordListPersistence();

  /** The root of the TreeWordList. */
  protected MultiTextNode root;

  /** The cost model we are using. */
  private EditDistanceCostMap costMap;

  /**
   * Default constructor.
   * 
   * @throws IOException
   *           - should not happen but required by called constructor
   */
  public MultiTreeWordList() throws IOException {
    this(new String[] {}, null);
  }

  /**
   * Default constructor uses just one file.
   * 
   * @param pathname
   *          the pathname of the used file.
   * @param base
   *          the relative base
   * @throws IOException
   *           When there is a problem reading pathname.
   */
  public MultiTreeWordList(String pathname, File base) throws IOException {
    this(new FileSystemResource(pathname));
  }

  /**
   * @param lists
   *          Resources to load.
   * @throws IOException
   *           When there is a problem reading a resource.
   */
  public MultiTreeWordList(Resource... lists) throws IOException {
    this.root = new MultiTextNode();
    this.costMap = new EditDistanceCostMap();

    if (lists == null) {
      return;
    }

    for (Resource list : lists) {
      // check if the resource is a directory
      File directory = null;
      try {
        directory = list.getFile();
      } catch (IOException e) {
        // resource is not on the file system
        directory = null;
      }

      if (directory != null && directory.isDirectory()) {
        // resource is a directory, load its content
        for (File data : directory.listFiles()) {
          load(new FileSystemResource(data));
        }
      } else {
        // resource is not a directory, load it normally
        load(list);
      }
    }
  }

  /**
   * Constructor from an open stream. This method will close the stream.
   * 
   * @param stream
   *          the stream to read the file from.
   * @param name
   *          associated name
   * @throws IOException
   *           When there is a problem reading the input stream.
   */
  public MultiTreeWordList(InputStream stream, String name) throws IOException {
    this.root = new MultiTextNode();
    this.costMap = new EditDistanceCostMap();

    if (name.endsWith(".mtwl")) {
      persistence.readMTWL(root, stream, ENCODING);
    } else if (name.endsWith(".txt")) {
      buildNewTree(stream, name);
    }
  }

  /**
   * Constructs a TreeWordList from a file with path = filename
   * 
   * @param pathnames
   *          path of the file to create a TextWordList from
   * @param base
   *          - the relative base
   * @throws IOException
   *           When there is a problem reading a path.
   */
  public MultiTreeWordList(String[] pathnames, File base) throws IOException {
    this.root = new MultiTextNode();
    this.costMap = new EditDistanceCostMap();

    if (pathnames == null) {
      return;
    }

    for (String pathname : pathnames) {
      String name = getRelativePath(new File(pathname), base);
      load(new FileSystemResource(pathname), name);
    }
  }

  /**
   * @param files
   *          - the input files
   * @param base
   *          - the relative base
   * @throws IOException
   *           - When there is a problem reading the files.
   */
  public MultiTreeWordList(List<File> files, File base) throws IOException {
    this.root = new MultiTextNode();
    this.costMap = new EditDistanceCostMap();

    if (files == null) {
      return;
    }

    for (File file : files) {
      String name = getRelativePath(file, base);
      load(new FileSystemResource(file), name);
    }
  }

  private String getRelativePath(File file, File base) {
    if (base == null) {
      return file.getName();
    }
    Path filePath = file.toPath();
    Path basePath = base.toPath();
    Path relativize = basePath.relativize(filePath);
    String result = relativize.toString().replaceAll("\\\\", "/");
    return result;
  }

  /**
   * Load a resource in this word list.
   * 
   * @param resource
   *          Resource to load. The resource's name must end with .txt or .mtwl.
   * @throws IOException
   *           When there is a problem reading the resource.
   */

  private void load(Resource resource) throws IOException {
    load(resource, resource.getFilename());
  }

  /**
   * Load a resource in this word list.
   * 
   * @param resource
   *          Resource to load.
   * @param name
   *          - The resource's name must end with .txt or .mtwl.
   * @throws IOException
   *           When there is a problem reading the resource.
   */
  private void load(Resource resource, String name) throws IOException {
    InputStream stream = null;
    try {
      stream = resource.getInputStream();
      if (name == null) {
        throw new IllegalArgumentException("List does not have a name.");
      } else if (name.endsWith(".txt")) {
        buildNewTree(stream, name);
      } else if (name.endsWith(".mtwl")) {
        persistence.readMTWL(root, stream, "UTF-8");
      } else {
        throw new IllegalArgumentException(
                "File name should end with .mtwl or .txt, found " + name);
      }
    } finally {
      IOUtils.closeQuietly(stream);
    }
  }

  /**
   * Creates a new Tree in the existing treeWordList from a file with path pathname
   * 
   * @param stream
   *          - Input stream for the file containing the words for the treeWordList
   * @param name
   *          - Associated name for the file
   * @throws IOException
   *           When there is a problem reading the inputstream.
   */
  public void buildNewTree(InputStream stream, String name) throws IOException {
    BufferedReader br = new BufferedReader(new InputStreamReader(stream, ENCODING));
    String s = null;

    while ((s = br.readLine()) != null) {
      addWord(s.trim(), name);
    }
    stream.close();
    br.close();
  }

  /**
   * Add a new String into the MultiTreeWordList.
   * 
   * @param s
   *          The String to add
   * @param type
   *          The type of the string.
   */
  public void addWord(String s, String type) {

    // Create Nodes from all chars of the strings besides the last one
    MultiTextNode pointer = root;

    for (Character each : s.toCharArray()) {

      MultiTextNode childNode = pointer.getChildNode(each);

      if (childNode == null) {
        childNode = new MultiTextNode(each, false);
        pointer.addChild(childNode);
      }

      pointer = childNode;
    }
    pointer.setWordEnd(s.length() > 0);
    pointer.addType(type);
  }

  /**
   * Returns all Types contained by the MultiTreeWordList.
   * 
   * @return all Types contained by the MultiTreeWordList.
   */
  public Collection<String> getTypes() {
    return getTypeCone(root);
  }

  /**
   * Returns all types contained by the cone of the MultiTextNode node, including the types of node
   * itself.
   * 
   * @param node
   *          The node where we start, the root of the cone.
   * @return all types contained by the cone of the MultiTextNode node, including the types of node
   *         itself.
   */
  public Collection<String> getTypeCone(MultiTextNode node) {
    // TODO improve this method!
    List<String> returnList = new LinkedList<String>();

    if (node.getTypes() != null) {
      for (String s : node.getTypes()) {
        if (!returnList.contains(s)) {
          returnList.add(s);
        }
      }
    }

    for (Character c : node.getChildren().keySet()) {
      for (String s : getTypeCone(node.getChildNode(c))) {
        if (!returnList.contains(s)) {
          returnList.add(s);
        }
      }
    }

    return returnList;
  }

  /**
   * Returns all strings contained by the MultiTreeWordList.
   * 
   * @return All strings contained by the MultiTreeWordList.
   */
  public Collection<String> keySet() {
    List<String> keySet = new LinkedList<String>(keySet(root, ""));
    Collections.sort(keySet);
    return keySet;
  }

  /**
   * Returns all strings contained by the cone of the MultiTextNode node and uses prefix as the
   * prefix of all the strings.
   * 
   * @param node
   *          the node we are considering.
   * @param prefix
   *          the prefix until now.
   * @return All strings contained by the cone of the MultiTextNode node.
   */
  private Collection<String> keySet(MultiTextNode node, String prefix) {

    List<String> resultList = new LinkedList<String>();

    // Recursion stop.
    if (node.isWordEnd()) {
      resultList.add(prefix);
    }

    // Recursion step.
    for (Character c : node.getChildren().keySet()) {
      String temp = prefix + String.valueOf(c);
      resultList.addAll(keySet(node.getChildNode(c), temp));
    }

    return resultList;
  }

  /**
   * Returns all types of the very string s.
   * 
   * @param s
   *          The string with the types.
   * @return All types from the very string s.
   */
  public Collection<String> getTypes(String s) {
    return getTypes(s, false);
  }

  /**
   * Returns the types of the string s.
   * 
   * @param s
   *          The string with the types.
   * @param ignoreCase
   *          Indicates, whether we search case sensitive or not.
   * @return The types of the string s.
   */
  public Collection<String> getTypes(String s, boolean ignoreCase) {

    // Collection<Set<String>> types = editDistanceClever(root, s, "", 0.0,
    // 0,
    // ignoreCase, false, costMap, false, false).values();
    // Map<String, Set<String>> types = editDistanceClever(root, s, "", 0.0,
    // 0, ignoreCase, false, costMap, false, false);
    Map<String, Set<String>> types = editDistance(s, 0, ignoreCase, "");
    Set<String> returnSet = new HashSet<String>();

    for (Entry<String, Set<String>> each : types.entrySet()) {
      returnSet.addAll(each.getValue());
    }

    return returnSet;
  }

  /**
   * Returns a list of types which belong to a string.
   * 
   * @param string
   *          The string which types we want to have.
   * @param ignoreCase
   *          Indicates whether we search case sensitive or not.
   * @param ignoreLength
   *          If the length of the string is less than of equal to this, we search case insensitive.
   * @param edit
   *          Indicates whether we use an edit distance or not.
   * @param distance
   *          The edit distance to a string contained by the MultiTreeWordList.
   * @param ignoreToken
   *          Characters which can be ignored.
   * @return Returns a list of types which belong to a string.
   */

  @Override
  public List<String> contains(String string, boolean ignoreCase, int ignoreLength, boolean edit,
          double distance, String ignoreToken) {

    List<String> resultList = new LinkedList<String>();
    Map<String, Set<String>> editDistance;

    if (string.length() >= ignoreLength && ignoreCase) {
      editDistance = editDistance(string, (int) distance, true, ignoreToken, false);
    } else {
      editDistance = editDistance(string, (int) distance, false, ignoreToken, false);
    }
    for (Entry<String, Set<String>> each : editDistance.entrySet()) {
      resultList.addAll(each.getValue());
    }
    return resultList;
  }

  /**
   * Checks whether a string is contained by the MultiTreeWordList or not.
   * 
   * @param string
   *          The string which is contained or not.
   * @param ignoreCase
   *          Indicates whether we search case sensitive or not.
   * @param ignoreLength
   *          If the length of the string is less than of equal to this, we search case insensitive.
   * @param edit
   *          Indicates whether we use an edit distance or not.
   * @param distance
   *          The edit distance to a string contained by the MultiTreeWordList.
   * @param ignoreToken
   *          Characters which can be ignored.
   * @return true, if the string is contained by the MultiTreeWordList, false otherwise.
   */
  public boolean containsBool(String string, boolean ignoreCase, int ignoreLength, boolean edit,
          double distance, String ignoreToken) {
    return editDistanceBool(root, string, "", distance, 0, ignoreCase, false, costMap);
  }

  /**
   * Checks whether the tree contains exaclty the string s.
   * 
   * @param s
   *          The string which is contained or not.
   * @return True, if the TreeWordList contains exactly the string s, false otherwise.
   */
  public boolean contains(String s) {
    return contains(s, false);
  }

  /**
   * Checks whether the tree contains the string s.
   * 
   * @param s
   *          The string which is contained or not.
   * @param ignoreCase
   *          Indicates whether we search case sensitive or not.
   * @return True, if the TreeWordList contains the string s, false otherwise.
   */
  public boolean contains(String s, boolean ignoreCase) {
    return contains(s, ignoreCase, 0, new char[] {}, 0, true);
  }

  /**
   * Checks if the MultiTreeWordList contains the string s.
   * 
   * @param s
   *          The string which is contained or not.
   * @param ignoreCase
   *          Indicates whether we search case sensitive or not.
   * @param size
   *          The index of the string.
   * @param ignoreChars
   *          Characters which can be ignored.
   * @param maxIgnoreChars
   *          The maximum number of ignored characters.
   * @return true, if TreeWordList contains the string, false otherwise.
   */
  @Override
  public boolean contains(String s, boolean ignoreCase, int size, char[] ignoreChars,
          int maxIgnoreChars, boolean ignoreWS) {

    EditDistanceCostMap edm = new EditDistanceCostMap();

    for (Character c : ignoreChars) {
      edm.setDeleteCosts(c, 0.0);
    }

    return editDistanceBool(root, s, "", maxIgnoreChars, 0, ignoreCase, false, edm);
  }

  /**
   * Checks if the MultiTreeWordList contains a prefix of the string s.
   * 
   * @param s
   *          The string which is contained or not.
   * @param ignoreCase
   *          Indicates whether we search case sensitive or not.
   * @param size
   *          The index of the string.
   * @param ignoreChars
   *          Characters which can be ignored.
   * @param maxIgnoreChars
   *          The maximum number of ignored characters.
   * @return true, if TreeWordList contains a prefix of the string, false otherwise.
   */
  @Override
  public boolean containsFragment(String s, boolean ignoreCase, int size, char[] ignoreChars,
          int maxIgnoreChars, boolean ignoreWS) {
    MultiTextNode pointer = root;
    return recursiveContains(pointer, s, 0, ignoreCase && s.length() > size, true, ignoreChars,
            maxIgnoreChars);
  }

  /**
   * Checks whether prefix of a string is contained by the MultiTreeWordList or not.
   * 
   * @param string
   *          The string whose prefix is contained or not.
   * @param ignoreCase
   *          Indicates whether we search case sensitive or not.
   * @param ignoreLength
   *          If the length of the string is less than of equal to this, we search case insensitive.
   * @param edit
   *          Indicates whether we use an edit distance or not.
   * @param distance
   *          The edit distance to a string contained by the MultiTreeWordList.
   * @param ignoreToken
   *          Characters which can be ignored.
   * @return true, if a prefix of the string is contained by the MultiTreeWordList, false otherwise.
   */
  public boolean containsFragmentBool(String string, boolean ignoreCase, int ignoreLength,
          boolean edit, double distance, String ignoreToken) {

    if (string.length() >= ignoreLength && ignoreCase) {
      return editDistanceBool(root, string, "", distance, 0, true, true, costMap);
    } else {
      return editDistanceBool(root, string, "", distance, 0, false, true, costMap);
    }
  }

  /**
   * Returns a list of types which belong to a prefix of a string that is contained by the
   * MultiTreeWordList.
   * 
   * @param string
   *          The string whose prefix's types we are interested in.
   * @param ignoreCase
   *          Indicates whether we search case sensitive or not.
   * @param ignoreLength
   *          If the length of the string is less than of equal to this, we search case insensitive.
   * @param edit
   *          Indicates whether we use an edit distance or not.
   * @param distance
   *          The edit distance to a string contained by the MultiTreeWordList.
   * @param ignoreToken
   *          Characters which can be ignored.
   * @return A list of types which belong to a prefix of a string that is contained by the
   *         MultiTreeWordList.
   */
  @Override
  public List<String> containsFragment(String string, boolean ignoreCase, int ignoreLength,
          boolean edit, double distance, String ignoreToken) {

    List<String> resultList = new LinkedList<String>();
    Map<String, Set<String>> resultMap = null;

    if (!edit) {
      return recursiveContains2(root, string, 0, ignoreCase && string.length() > ignoreLength, true,
              ignoreToken.toCharArray(), ignoreLength);
    } else {
      if (string.length() >= ignoreLength && ignoreCase) {
        resultMap = editDistance(string, (int) distance, true, ignoreToken, true);
      } else {
        resultMap = editDistance(string, (int) distance, false, ignoreToken, true);
      }

      for (Set<String> set : resultMap.values()) {
        for (String s : set) {
          if (!resultList.contains(s)) {
            // resultList.addAll(resultMap.get(set));
            resultList.add(s);
          }
        }
      }
    }

    return resultList;
  }

  /**
   * Returns true, if the MultiTreeWordList contains the string text, false otherwise.
   * 
   * @param pointer
   *          The MultiTextNode we are looking at.
   * @param text
   *          The string which is contained or not.
   * @param index
   *          The index of the string text we checked until now.
   * @param ignoreCase
   *          Indicates whether we search case sensitive or not.
   * @param fragment
   *          Indicates whether we are looking for a prefix of the string text.
   * @param ignoreChars
   *          Characters which can be ignored.
   * @param maxIgnoreChars
   *          Maximum number of characters which are allowed to be ignored.
   * @return True, if the TreeWordList contains the string text, false otherwise.
   */
  private List<String> recursiveContains2(MultiTextNode pointer, String text, int index,
          boolean ignoreCase, boolean fragment, char[] ignoreChars, int maxIgnoreChars) {

    if (pointer == null) {
      return null;
    }

    if (index == text.length()) {
      if (pointer.isWordEnd()) {
        return new ArrayList<String>(pointer.getTypes());
      }
      if (fragment) {
        return Collections.emptyList();
      }
    }

    char charAt = text.charAt(index);
    boolean charAtIgnored = false;

    if (ignoreChars != null) {
      for (char each : ignoreChars) {
        if (each == charAt) {
          charAtIgnored = true;
          break;
        }
      }
      charAtIgnored &= index != 0;
    }

    int next = ++index;

    if (ignoreCase) {

      // Lower Case Node.
      MultiTextNode childNodeL = pointer.getChildNode(Character.toLowerCase(charAt));
      if (childNodeL == null) {
        childNodeL = skipWS(pointer, Character.toLowerCase(charAt));
      }

      // Upper Case Node.
      MultiTextNode childNodeU = pointer.getChildNode(Character.toUpperCase(charAt));
      if (childNodeU == null) {
        childNodeU = skipWS(pointer, Character.toUpperCase(charAt));
      }

      if (charAtIgnored && childNodeL == null && childNodeU == null) {
        // Character is ignored and does not appear.
        return recursiveContains2(pointer, text, next, ignoreCase, fragment, ignoreChars,
                maxIgnoreChars);
      } else {
        // Recursion.
        Collection<String> recursiveContainsL = recursiveContains2(childNodeL, text, next,
                ignoreCase, fragment, ignoreChars, maxIgnoreChars);
        Collection<String> recursiveContainsU = recursiveContains2(childNodeU, text, next,
                ignoreCase, fragment, ignoreChars, maxIgnoreChars);
        if (recursiveContainsL == null && recursiveContainsU == null) {
          return null;
        }
        List<String> result = new LinkedList<String>();
        if (recursiveContainsL != null) {
          result.addAll(recursiveContainsL);
        }
        if (recursiveContainsU != null) {
          result.addAll(recursiveContainsU);
        }
        return result;
      }

    } else {
      // Case sensitive.
      MultiTextNode childNode = pointer.getChildNode(charAt);

      if (charAtIgnored && childNode == null) {
        // Recursion with incremented index.
        return recursiveContains2(pointer, text, next, ignoreCase, fragment, ignoreChars,
                maxIgnoreChars);
      } else {
        // Recursion with new node.
        return recursiveContains2(childNode, text, next, ignoreCase, fragment, ignoreChars,
                maxIgnoreChars);
      }
    }
  }

  private MultiTextNode skipWS(MultiTextNode pointer, char charAt) {
    MultiTextNode childNode = pointer.getChildNode(' ');
    if (childNode != null) {
      MultiTextNode node = childNode.getChildNode(charAt);
      if (node == null) {
        return skipWS(childNode, charAt);
      } else {
        return node;
      }
    }
    return null;
  }

  /**
   * Returns true, if the MultiTreeWordList contains the string text, false otherwise.
   * 
   * @param pointer
   *          The MultiTextNode we are looking at.
   * @param text
   *          The string which is contained or not.
   * @param index
   *          The index of the string text we checked until now.
   * @param ignoreCase
   *          Indicates whether we search case sensitive or not.
   * @param fragment
   *          Indicates whether we are looking for a prefix of the string text.
   * @param ignoreChars
   *          Characters which can be ignored.
   * @param maxIgnoreChars
   *          Maximum number of characters which are allowed to be ignored.
   * @return True, if the TreeWordList contains the string text, false otherwise.
   */
  private boolean recursiveContains(MultiTextNode pointer, String text, int index,
          boolean ignoreCase, boolean fragment, char[] ignoreChars, int maxIgnoreChars) {

    if (pointer == null) {
      return false;
    }

    if (index == text.length()) {
      return fragment || pointer.isWordEnd();
    }

    char charAt = text.charAt(index);
    boolean charAtIgnored = false;

    if (ignoreChars != null) {
      for (char each : ignoreChars) {
        if (each == charAt) {
          charAtIgnored = true;
          break;
        }
      }
      charAtIgnored &= index != 0;
    }

    int next = ++index;

    if (ignoreCase) {

      // Lower Case Node.
      MultiTextNode childNodeL = pointer.getChildNode(Character.toLowerCase(charAt));

      // Upper Case Node.
      MultiTextNode childNodeU = pointer.getChildNode(Character.toUpperCase(charAt));

      if (charAtIgnored && childNodeL == null && childNodeU == null) {
        // Character is ignored and does not appear.
        return recursiveContains(pointer, text, next, ignoreCase, fragment, ignoreChars,
                maxIgnoreChars);
      } else {
        // Recursion.
        return recursiveContains(childNodeL, text, next, ignoreCase, fragment, ignoreChars,
                maxIgnoreChars)
                || recursiveContains(childNodeU, text, next, ignoreCase, fragment, ignoreChars,
                        maxIgnoreChars);
      }

    } else {
      // Case sensitive.
      MultiTextNode childNode = pointer.getChildNode(charAt);

      if (charAtIgnored && childNode == null) {
        // Recursion with incremented index.
        return recursiveContains(pointer, text, next, ignoreCase, fragment, ignoreChars,
                maxIgnoreChars);
      } else {
        // Recursion with new node.
        return recursiveContains(childNode, text, next, ignoreCase, fragment, ignoreChars,
                maxIgnoreChars);
      }
    }
  }

  @Override
  public Collection<AnnotationFS> find(RutaStream stream, Map<String, Object> typeMap,
          boolean ignoreCase, int ignoreLength, boolean edit, double distance, String ignoreToken) {

    Collection<AnnotationFS> results = new HashSet<AnnotationFS>();
    stream.moveToFirst();
    RutaStream streamPointer = stream.copy();

    while (stream.isValid()) {
      RutaBasic anchorBasic = (RutaBasic) stream.get();
      streamPointer.moveTo(anchorBasic);

      List<RutaBasic> basicsToAdd = new ArrayList<RutaBasic>();
      basicsToAdd.add(anchorBasic);
      String text = anchorBasic.getCoveredText();
      StringBuilder candidate = new StringBuilder(text);
      String lastCandidate = candidate.toString();

      if (text.length() != 1 || !ignoreToken.contains(text)) {

        List<AnnotationFS> interResults = new ArrayList<AnnotationFS>();

        while (streamPointer.isValid()) {

          boolean skip = false;
          String currentBasicText = basicsToAdd.get(basicsToAdd.size() - 1).getCoveredText();
          if (currentBasicText.length() == 1 && ignoreToken.contains(currentBasicText)) {
            skip = true;
          }
          List<String> types = null;
          if (!skip) {
            types = containsFragment(candidate.toString(), ignoreCase, ignoreLength, edit, distance,
                    ignoreToken);
          }
          if (skip || types != null) {
            streamPointer.moveToNext();
            if (streamPointer.isValid()) {
              RutaBasic next = (RutaBasic) streamPointer.get();
              // List<String> contains = contains(candidate,
              // ignoreCase,
              // ignoreLength, edit, distance, ignoreToken);
              if (!skip) {
                tryToCreateAnnotation(types, stream, results, basicsToAdd, candidate.toString(),
                        interResults, ignoreCase, ignoreLength, edit, distance, ignoreToken,
                        typeMap);
              }
              lastCandidate = candidate.toString();
              candidate.append(next.getCoveredText());
              basicsToAdd.add(next);

            } else {
              // !streamPointer.isValid();
              tryToCreateAnnotation(types, stream, results, basicsToAdd, lastCandidate,
                      interResults, ignoreCase, ignoreLength, edit, distance, ignoreToken, typeMap);
            }
          } else {

            // containsFragment.isEmpty();
            // basicsToAdd.remove(basicsToAdd.size() - 1);
            // tryToCreateAnnotation(stream, results, basicsToAdd,
            // lastCandidate, interResults, ignoreCase,
            // ignoreLength, edit, distance, ignoreToken, typeMap);

            // breaks inner while()-loop.
            break;
          }

        }
      }
      stream.moveToNext();
    }

    return results;
  }

  @Override
  public List<AnnotationFS> find(RutaStream stream, boolean ignoreCase, int size,
          char[] ignoreChars, int maxIgnoredChars, boolean ignoreWS) {
    assert false;
    return new ArrayList<AnnotationFS>();
  }

  private void tryToCreateAnnotation(List<String> types, RutaStream stream,
          Collection<AnnotationFS> results, List<RutaBasic> basicsToAdd, String lastCandidate,
          List<AnnotationFS> interResult, boolean ignoreCase, int ignoreLength, boolean edit,
          double distance, String ignoreToken, Map<String, Object> map) {
    if (basicsToAdd.size() >= 1 && types != null) {
      Set<String> set = new HashSet<String>(types);
      for (String each : set) {
        Object o = map.get(each);
        if (o instanceof Type) {
          Type type = (Type) o;
          int begin = basicsToAdd.get(0).getBegin();
          int end = basicsToAdd.get(basicsToAdd.size() - 1).getEnd();
          AnnotationFS newFS = stream.getCas().createAnnotation(type, begin, end);
          results.add(newFS);
        } else if (o instanceof List) {
          List<?> list = (List<?>) o;
          Type type = null;
          String featureString = null;
          Object value = each;
          if (list.size() == 2 || list.size() == 3) {
            if (list.get(0) instanceof Type) {
              type = (Type) list.get(0);
            }
            if (list.get(1) instanceof String) {
              featureString = (String) list.get(1);
            }
            if (list.size() == 3) {
              value = list.get(2);
            }

            if (type != null && featureString != null) {
              int begin = basicsToAdd.get(0).getBegin();
              int end = basicsToAdd.get(basicsToAdd.size() - 1).getEnd();
              AnnotationFS newFS = stream.getCas().createAnnotation(type, begin, end);
              Feature feature = type.getFeatureByBaseName(featureString);
              setFeatureValue(newFS, feature, value, stream);
              results.add(newFS);
            }
          }
        }
      }
    } else if (interResult != null && !interResult.isEmpty()) {
      results.addAll(interResult);
    }
  }

  private void setFeatureValue(AnnotationFS annotationFS, Feature feature, Object o,
          RutaStream stream) {
    TypeSystem typeSystem = stream.getCas().getTypeSystem();
    if (feature != null && o != null) {
      Type range = feature.getRange();
      String rangeName = range.getName();
      if (typeSystem.subsumes(typeSystem.getType(CAS.TYPE_NAME_STRING), range)
              && o instanceof String) {
        annotationFS.setStringValue(feature, (String) o);
      } else if (rangeName.equals(CAS.TYPE_NAME_INTEGER) && o instanceof Number) {
        annotationFS.setIntValue(feature, ((Number) o).intValue());
      } else if (rangeName.equals(CAS.TYPE_NAME_DOUBLE) && o instanceof Number) {
        annotationFS.setDoubleValue(feature, ((Number) o).doubleValue());
      } else if (rangeName.equals(CAS.TYPE_NAME_FLOAT) && o instanceof Number) {
        annotationFS.setFloatValue(feature, ((Number) o).floatValue());
      } else if (rangeName.equals(CAS.TYPE_NAME_BYTE) && o instanceof Number) {
        annotationFS.setByteValue(feature, ((Number) o).byteValue());
      } else if (rangeName.equals(CAS.TYPE_NAME_SHORT) && o instanceof Number) {
        annotationFS.setShortValue(feature, ((Number) o).shortValue());
      } else if (rangeName.equals(CAS.TYPE_NAME_LONG) && o instanceof Number) {
        annotationFS.setLongValue(feature, ((Number) o).longValue());
      } else if (rangeName.equals(CAS.TYPE_NAME_BOOLEAN) && o instanceof Boolean) {
        annotationFS.setBooleanValue(feature, (Boolean) o);
      } else if (typeSystem.subsumes(typeSystem.getType(CAS.TYPE_NAME_STRING), range)
              & o instanceof Type) {
        annotationFS.setStringValue(feature, ((Type) o).getName());
      }
    } else {
      throw new IllegalArgumentException(
              "Not able to assign feature value: " + o + " -> " + feature);
    }
  }

  /**
   * Returns a map with all strings with a specified edit distance to the string query as keys and
   * the files they belong to as values.
   * 
   * @param query
   *          - The query string.
   * @param distance
   *          - The specified edit distance.
   * @return A map with all strings with a specified edit distance to the string query as keys and
   *         the files they belong to as values.
   */
  public Map<String, Set<String>> editDistance(String query, int distance) {
    return editDistance(query, distance, false, "");
  }

  /**
   * Returns a map with all strings with a specified edit distance to the string query as keys and
   * the files they belong to as values.
   * 
   * @param query
   *          - The query string.
   * @param distance
   *          - The specified edit distance.
   * @param ignoreCase
   *          - Indicates whether we search case sensitive or not.
   * @param ignoreToken
   *          - Indicates the characters to ignore
   * @return A map with all strings with a specified edit distance to the string query as keys and
   *         the files they belong to as values.
   */
  public Map<String, Set<String>> editDistance(String query, int distance, boolean ignoreCase,
          String ignoreToken) {
    return editDistance(query, distance, ignoreCase, ignoreToken, false);
  }

  /**
   * Returns a map with all strings with a specified edit distance to the string query as keys and
   * the files they belong to as values.
   * 
   * @param query
   *          - The query string.
   * @param distance
   *          - The specified edit distance.
   * @param ignoreCase
   *          - Indicates whether we search case sensitive or not.
   * @param ignoreToken
   *          - the characters to ignore
   * @param fragment
   *          - Indicates whether we search for fragments of the query string or not.
   * @return A map with all strings with a specified edit distance to the string query as keys and
   *         the files they belong to as values.
   */
  public Map<String, Set<String>> editDistance(String query, int distance, boolean ignoreCase,
          String ignoreToken, boolean fragment) {

    // The second alternative realizes the fragment functionality by
    // setting the insert costs of the ignored character to zero. This
    // is much more elegant and easier to maintain. I don't know if the
    // other way is faster, so I did not delete it yet.

    Map<Character, Double> oldInsertCosts = new HashMap<Character, Double>();
    EditDistanceCostMap edcm = new EditDistanceCostMap();

    // We need to store the old insert costs before we set them to zero.
    for (char c : ignoreToken.toCharArray()) {
      oldInsertCosts.put(c, edcm.getInsertCosts(c));
      edcm.setInsertCosts(c, 0.0);
    }

    Map<String, Set<String>> result = null;

    if (ignoreCase) {
      result = editDistanceClever(root, query.toLowerCase(), "", distance, 0, true, fragment, edcm,
              false, false);
    } else {
      result = editDistanceClever(root, query, "", distance, 0, false, fragment, edcm, false,
              false);
    }

    // Restoring of the old insert costs.
    for (Entry<Character, Double> c : oldInsertCosts.entrySet()) {
      edcm.setDeleteCosts(c.getKey(), c.getValue());
    }

    return result;
  }

  /**
   * Returns a map with all strings with a specified edit distance to the string query as keys and
   * the files they belong to as values.
   * 
   * @param node
   *          - The MultiTextNode which is under consideration at the moment.
   * @param query
   *          - The query string.
   * @param result
   *          - The result which matched until now.
   * @param distance
   *          - The remaining edit distance.
   * @param index
   *          - The index of the query string at the moment.
   * @param ignoreCase
   *          - Indicates whether we search case sensitive or not.
   * @param fragment
   *          - Indicates whether we search for fragments of the query string or not.
   * @param edm
   *          - The edit distance cost map we are using.
   * @param lastActionInsert
   *          - Indicates whether the last action was an insert action.
   * @param lastActionDelete
   *          - Indicates whether the last action was a delete action.
   * @return A map with all strings with a specified edit distance to the string query as keys and
   *         the files they belong to as values.
   */
  private Map<String, Set<String>> editDistanceClever(MultiTextNode node, String query,
          String result, double distance, int index, boolean ignoreCase, boolean fragment,
          EditDistanceCostMap edm, boolean lastActionInsert, boolean lastActionDelete) {

    EditDistanceResultMap resultMap = new EditDistanceResultMap();

    if (!lastActionInsert) {
      // Delete.
      if (distance - edm.getDeleteCosts(node.getValue()) >= 0 && result.length() > 0) {
        resultMap.putAll(editDistanceClever(node, query, result,
                distance - edm.getDeleteCosts(node.getValue()), index + 1, ignoreCase, fragment,
                edm, false, true));
      }
    }

    // Recursion stop.
    if (node.isWordEnd() || fragment) {

      HashMap<String, Set<String>> temp = new HashMap<String, Set<String>>();

      double remainingInsertCosts = 0.0;

      // Accumulating remaining insert costs if the query is longer than
      // the word in the trie.
      for (int i = index; i < query.length(); i++) {
        remainingInsertCosts += edm.getInsertCosts(query.charAt(i));
      }

      if (remainingInsertCosts <= distance) {
        // if (remainingInsertCosts <= distance &&
        // !node.getTypes().isEmpty()) {
        // if (query.length() - index <= distance) {

        if (fragment) {
          temp.put(result, new HashSet<String>(getTypeCone(node)));
        } else {
          temp.put(result, new HashSet<String>(node.getTypes()));
        }

        resultMap.putAll(temp);
      }

      // Important: word end does not mean no children any more!
      if (node.getChildren() == null) {
        return resultMap;
      }
    }

    // Recursion.
    for (MultiTextNode tempNode : node.getChildren().values()) {

      if (index < query.length()) {
        if (ignoreCase) {
          if (Character.toLowerCase(tempNode.getValue()) == Character
                  .toLowerCase(query.charAt(index))) {
            resultMap.putAll(editDistanceClever(tempNode, query, result + tempNode.getValue(),
                    distance, index + 1, ignoreCase, fragment, edm, false, false));
          }
        } else {
          if (tempNode.getValue() == query.charAt(index)) {
            resultMap.putAll(editDistanceClever(tempNode, query, result + tempNode.getValue(),
                    distance, index + 1, ignoreCase, fragment, edm, false, false));
          }
        }
      }

      if (distance - edm.getReplaceCosts(node.getValue(), tempNode.getValue()) >= 0) {

        // Substitute.
        resultMap.putAll(editDistanceClever(tempNode, query, result + tempNode.getValue(),
                distance - edm.getReplaceCosts(node.getValue(), tempNode.getValue()), index + 1,
                ignoreCase, fragment, edm, false, false));
      }

      if (!lastActionDelete) {
        if (distance - edm.getInsertCosts(tempNode.getValue()) >= 0) {
          // Insert - use the same index twice.
          resultMap.putAll(editDistanceClever(tempNode, query, result + tempNode.getValue(),
                  distance - edm.getInsertCosts(tempNode.getValue()), index, ignoreCase, fragment,
                  edm, true, false));
        }
      }
    }

    return resultMap;
  }

  /**
   * Checks if a string is contained by the MultiTreeWordList.
   * 
   * @param node
   *          - The MultiTextNode which is under consideration at the moment.
   * @param query
   *          - The query string.
   * @param result
   *          - The result which matched until now.
   * @param distance
   *          - The remaining edit distance.
   * @param index
   *          - The index of the query string at the moment.
   * @param ignoreCase
   *          - Indicates whether we search case sensitive or not.
   * @param fragment
   *          - Indicates whether we search for fragments of the query string or not.
   * @param edm
   *          - The edit distance cost map we are using.
   * @return A map with all strings with a specified edit distance to the string query as keys and
   *         the files they belong to as values.
   */
  private boolean editDistanceBool(MultiTextNode node, String query, String result, double distance,
          int index, boolean ignoreCase, boolean fragment, EditDistanceCostMap edm) {

    boolean deletion = false;
    boolean insertion = false;
    boolean substitution = false;
    boolean noop = false;

    // Recursion stop.
    if (fragment) {
      if (index == query.length()) {
        return true;
      }
    }

    if (node.isWordEnd()) {

      double remainingInsertCosts = 0.0;

      // Accumulating remaining insert costs if the query is longer than
      // the word in the trie.
      for (int i = index; i < query.length(); i++) {
        remainingInsertCosts += edm.getInsertCosts(query.charAt(i));
      }

      if (remainingInsertCosts <= distance) {
        // if (query.length() - index <= distance) {
        return true;
      }
    }

    // Delete.
    if (distance - edm.getDeleteCosts(node.getValue()) >= 0 && result.length() > 0) {
      deletion = editDistanceBool(node, query, result,
              distance - edm.getDeleteCosts(node.getValue()), index + 1, ignoreCase, fragment, edm);

      if (deletion) {
        return true;
      }
    }

    // Recursion.
    for (MultiTextNode tempNode : node.getChildren().values()) {

      if (index < query.length()) {
        if (ignoreCase) {
          if (Character.toLowerCase(tempNode.getValue()) == Character
                  .toLowerCase(query.charAt(index))) {
            noop = editDistanceBool(tempNode, query, result + tempNode.getValue(), distance,
                    index + 1, ignoreCase, fragment, edm);
          }
        } else {
          if (tempNode.getValue() == query.charAt(index)) {
            noop = editDistanceBool(tempNode, query, result + tempNode.getValue(), distance,
                    index + 1, ignoreCase, fragment, edm);
          }
        }

        if (noop) {
          return true;
        }
      }

      if (distance - edm.getReplaceCosts(node.getValue(), tempNode.getValue()) >= 0) {

        // Substitute.
        substitution = editDistanceBool(tempNode, query, result + tempNode.getValue(),
                distance - edm.getReplaceCosts(node.getValue(), tempNode.getValue()), index + 1,
                ignoreCase, fragment, edm);

        if (substitution) {
          return true;
        }
      }

      if (distance - edm.getInsertCosts(tempNode.getValue()) >= 0) {
        // Insert - use the same index twice.
        insertion = editDistanceBool(tempNode, query, result + tempNode.getValue(),
                distance - edm.getInsertCosts(tempNode.getValue()), index, ignoreCase, fragment,
                edm);

        if (insertion) {
          return true;
        }
      }

    }

    return false;
  }

  @Override
  public int hashCode() {
    final int prime = 31;
    int result = 1;
    result = prime * result + ((costMap == null) ? 0 : costMap.hashCode());
    result = prime * result + ((root == null) ? 0 : root.hashCode());
    return result;
  }

  @Override
  public boolean equals(Object obj) {
    if (this == obj)
      return true;
    if (obj == null)
      return false;
    if (getClass() != obj.getClass())
      return false;
    MultiTreeWordList other = (MultiTreeWordList) obj;
    if (costMap == null) {
      if (other.costMap != null)
        return false;
    } else if (!costMap.equals(other.costMap))
      return false;
    if (root == null) {
      if (other.root != null)
        return false;
    } else if (!root.equals(other.root))
      return false;
    return true;
  }

  public void createMTWLFile(String path, boolean compress, String encoding) throws IOException {
    persistence.createMTWLFile(root, path, compress, encoding);
  }

}
