/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.uima.ruta.resource;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;

import org.apache.commons.text.StringEscapeUtils;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.internal.util.XMLUtils;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.ruta.RutaStream;
import org.apache.uima.ruta.type.RutaBasic;
import org.springframework.core.io.FileSystemResource;
import org.springframework.core.io.Resource;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;

public class TreeWordList implements RutaWordList {
  private TextNode root;

  private String name;

  private boolean dictRemoveWS = false;

  /**
   * Default constructor
   */
  public TreeWordList() {
    this.root = null;
  }

  /**
   * Constructs a TreeWordList from a resource.
   * 
   * @param resource
   *          - Resource to create a TextWordList from
   * @param dictRemoveWS
   *          - option to remove whitespaces fromt he resource
   * @throws IOException
   *           - when there is a problem reading the resource
   * @throws IllegalArgumentException
   *           - for an invalid name or file ending
   */
  public TreeWordList(Resource resource, boolean dictRemoveWS) throws IOException {
    this.dictRemoveWS = dictRemoveWS;
    final String name = resource.getFilename();
    InputStream stream = null;
    try {
      stream = resource.getInputStream();
      if (name == null) {
        throw new IllegalArgumentException("List does not have a name.");
      } else if (name.endsWith(".txt")) {
        buildNewTree(stream);
      } else if (name.endsWith(".twl")) {
        readXML(stream, "UTF-8");
      } else {
        throw new IllegalArgumentException("File name should end with .twl or .txt, found " + name);
      }
    } finally {
      if (stream != null) {
        stream.close();
      }
    }

    this.name = name;
  }

  /**
   * Constructs a TreeWordList from a file with path = filename
   * 
   * @param pathname
   *          - path of the file to create a TextWordList from
   * @param dictRemoveWS
   *          - remove whitespaces
   * @throws IOException
   *           - when there is a problem reading the pathname
   */
  public TreeWordList(String pathname, boolean dictRemoveWS) throws IOException {
    this(new FileSystemResource(pathname), dictRemoveWS);
  }

  /**
   * Constructs a TreeWordList from an open stream with a given name
   * 
   * @param stream
   *          path of the file to create a TextWordList from
   * @param name
   *          - the name of the resource in the stream
   * @param dictRemoveWS
   *          - remove whitespaces
   * @throws IOException
   *           - when there is a problem reading the stream
   */
  public TreeWordList(InputStream stream, String name, boolean dictRemoveWS) throws IOException {
    this.dictRemoveWS = dictRemoveWS;
    if (name.endsWith(".twl")) {
      readXML(stream, "UTF-8");
    }
    if (name.endsWith(".txt")) {
      buildNewTree(stream);
    }
    this.name = new File(name).getName();
  }

  public TreeWordList(List<String> data, boolean dictRemoveWS) {
    name = "local";
    this.dictRemoveWS = dictRemoveWS;
    buildNewTree(data);
  }

  public void buildNewTree(List<String> data) {
    this.root = new TextNode();

    if (data == null) {
      return;
    }

    for (String s : data) {
      addWord(s);
    }
  }

  /**
   * Creates a new Tree in the existing treeWordList from a file with path pathname
   * 
   * @param stream
   *          Open InputStream containing the word for the treeWordList, this method will close the
   *          stream.
   * @throws IOException
   *           - when there is a problem reading the stream
   */
  public void buildNewTree(InputStream stream) throws IOException {
    Scanner scan = new Scanner(stream, "UTF-8");
    // creating a new tree
    this.root = new TextNode();
    while (scan.hasNextLine()) {
      String s = scan.nextLine().trim();
      // HOTFIX for old formats
      if (s.endsWith("=")) {
        s = s.substring(0, s.length() - 1);
        s = s.trim();
      }
      addWord(s);
    }
    scan.close();
  }

  /**
   * Returns the root node of the tree
   * 
   * @return the root node
   */
  public TextNode getRoot() {
    return this.root;
  }

  /**
   * Add a new String into the TreeWordList
   * 
   * @param s
   *          The String to add
   */
  public void addWord(String s) {
    // Create Nodes from all chars of the strings besides the last one

    if (s == null) {
      return;
    }

    TextNode pointer = root;
    for (Character each : s.toCharArray()) {
      if (dictRemoveWS && Character.isWhitespace(each)) {
        continue;
      }
      TextNode childNode = pointer.getChildNode(each);
      if (childNode == null) {
        childNode = new TextNode(each, false);
        pointer.addChild(childNode);
      }
      pointer = childNode;
    }
    pointer.setWordEnd(s.length() > 0);
  }

  @Override
  public boolean contains(String s, boolean ignoreCase, int size, char[] ignoreChars,
          int maxIgnoreChars, boolean ignoreWS) {
    if (s == null) {
      return false;
    }
    TextNode pointer = root;
    return recursiveContains(pointer, s, 0, ignoreCase && s.length() > size, false, ignoreChars,
            maxIgnoreChars, ignoreWS);
  }

  @Override
  public boolean containsFragment(String s, boolean ignoreCase, int size, char[] ignoreChars,
          int maxIgnoreChars, boolean ignoreWS) {
    TextNode pointer = root;
    return recursiveContains(pointer, s, 0, ignoreCase && s.length() > size, true, ignoreChars,
            maxIgnoreChars, ignoreWS);
  }

  private boolean recursiveContains(TextNode pointer, String text, int index, boolean ignoreCase,
          boolean fragment, char[] ignoreChars, int maxIgnoreChars, boolean ignoreWS) {
    if (pointer == null) {
      return false;
    }
    if (index == text.length()) {
      return fragment || pointer.isWordEnd();
    }
    char charAt = text.charAt(index);
    boolean charAtIgnored = false;
    if (ignoreChars != null) {
      for (char each : ignoreChars) {
        if (each == charAt) {
          charAtIgnored = true;
          maxIgnoreChars--;
          break;
        }
      }
      charAtIgnored &= index != 0;
      if (maxIgnoreChars < 0) {
        return false;
      }
    }
    int next = ++index;

    boolean result = false;

    if (ignoreCase) {
      TextNode childNodeL = pointer.getChildNode(Character.toLowerCase(charAt));
      TextNode childNodeU = pointer.getChildNode(Character.toUpperCase(charAt));

      TextNode wsNode = pointer.getChildNode(' ');
      if (ignoreWS && wsNode != null) {
        result |= recursiveContains(wsNode, text, next - 1, ignoreCase, fragment, ignoreChars,
                maxIgnoreChars, ignoreWS);
      }

      if (childNodeL == null && ignoreWS) {
        childNodeL = skipWS(pointer, charAt);
      }
      if (childNodeU == null && ignoreWS) {
        childNodeU = skipWS(pointer, charAt);
      }
      if (charAtIgnored && childNodeL == null && childNodeU == null) {
        result |= recursiveContains(pointer, text, next, ignoreCase, fragment, ignoreChars,
                maxIgnoreChars, ignoreWS);
      } else {
        result |= recursiveContains(childNodeL, text, next, ignoreCase, fragment, ignoreChars,
                maxIgnoreChars, ignoreWS);
        if (childNodeL != childNodeU) { // Do not go into the same tree.
          result |= recursiveContains(childNodeU, text, next, ignoreCase, fragment, ignoreChars,
                  maxIgnoreChars, ignoreWS);
        }
      }
    } else {
      TextNode wsNode = pointer.getChildNode(' ');
      if (ignoreWS && wsNode != null) {
        result |= recursiveContains(wsNode, text, next - 1, ignoreCase, fragment, ignoreChars,
                maxIgnoreChars, ignoreWS);
      }

      TextNode childNode = pointer.getChildNode(charAt);
      if (childNode == null && ignoreWS) {
        childNode = skipWS(pointer, charAt);
      }
      if (charAtIgnored && childNode == null) {
        result |= recursiveContains(pointer, text, next, ignoreCase, fragment, ignoreChars,
                maxIgnoreChars, ignoreWS);
      } else {
        result |= recursiveContains(childNode, text, next, ignoreCase, fragment, ignoreChars,
                maxIgnoreChars, ignoreWS);
      }
    }
    return result;
  }

  private TextNode skipWS(TextNode pointer, char charAt) {
    TextNode childNode = pointer.getChildNode(' ');
    if (childNode != null) {
      TextNode node = childNode.getChildNode(charAt);
      if (node == null) {
        return skipWS(childNode, charAt);
      } else {
        return node;
      }
    }
    return null;
  }

  @Override
  public List<AnnotationFS> find(RutaStream stream, boolean ignoreCase, int size,
          char[] ignoreChars, int maxIgnoredChars, boolean ignoreWS) {
    ArrayList<AnnotationFS> results = new ArrayList<AnnotationFS>();
    stream.moveToFirst();
    RutaStream streamPointer = stream.copy();
    while (stream.isValid()) {
      RutaBasic anchorBasic = (RutaBasic) stream.get();
      streamPointer.moveTo(anchorBasic);

      List<RutaBasic> basicsToAdd = new ArrayList<RutaBasic>();
      basicsToAdd.add(anchorBasic);
      String text = anchorBasic.getCoveredText();
      StringBuilder candidate = new StringBuilder(text);
      // String lastCandidate = candidate.toString();
      Annotation interResult = null;
      while (streamPointer.isValid()) {
        if (containsFragment(candidate.toString(), ignoreCase, size, ignoreChars, maxIgnoredChars,
                ignoreWS)) {
          streamPointer.moveToNext();
          if (streamPointer.isValid()) {
            RutaBasic next = (RutaBasic) streamPointer.get();
            if (contains(candidate.toString(), ignoreCase, size, ignoreChars, maxIgnoredChars,
                    ignoreWS)) {
              interResult = new Annotation(stream.getJCas(), basicsToAdd.get(0).getBegin(),
                      basicsToAdd.get(basicsToAdd.size() - 1).getEnd());
            }
            // lastCandidate = candidate.toString();
            candidate.append(next.getCoveredText());
            basicsToAdd.add(next);
          } else {
            tryToCreateAnnotation(stream, ignoreCase, size, results, basicsToAdd,
                    candidate.toString(), interResult, ignoreChars, maxIgnoredChars, ignoreWS);
          }
        } else {
          basicsToAdd.remove(basicsToAdd.size() - 1);
          tryToCreateAnnotation(stream, ignoreCase, size, results, basicsToAdd,
                  candidate.toString(), interResult, ignoreChars, maxIgnoredChars, ignoreWS);
          break;
        }

      }
      stream.moveToNext();
    }
    return results;
  }

  public List<AnnotationFS> find(RutaStream stream, boolean ignoreCase, int size,
          boolean ignoreWS) {
    return find(stream, ignoreCase, size, null, 0, ignoreWS);
  }

  private void tryToCreateAnnotation(RutaStream stream, boolean ignoreCase, int size,
          ArrayList<AnnotationFS> results, List<RutaBasic> basicsToAdd, String lastCandidate,
          Annotation interResult, char[] ignoreChars, int maxIgnoredChars, boolean ignoreWS) {
    if (basicsToAdd.size() >= 1
            && contains(lastCandidate, ignoreCase, size, ignoreChars, maxIgnoredChars, ignoreWS)) {

      results.add(new Annotation(stream.getJCas(), basicsToAdd.get(0).getBegin(),
              basicsToAdd.get(basicsToAdd.size() - 1).getEnd()));
    } else if (interResult != null) {
      results.add(interResult);
    }
  }

  public void readXML(InputStream stream, String encoding) throws IOException {
    try {
      InputStream is = new BufferedInputStream(stream); // adds mark/reset support
      boolean isXml = MultiTreeWordListPersistence.isSniffedXmlContentType(is);
      if (!isXml) { // MTWL is encoded
        is = new ZipInputStream(is);
        ((ZipInputStream) is).getNextEntry(); // zip must contain a single entry
      }
      InputStreamReader streamReader = new InputStreamReader(is, encoding);
      this.root = new TextNode();

      XMLEventHandler handler = new XMLEventHandler(root);
      XMLReader reader = XMLUtils.createXMLReader();
      reader.setContentHandler(handler);
      reader.setErrorHandler(handler);

      reader.parse(new InputSource(streamReader));
    } catch (SAXException e) {
      throw new IllegalStateException(e);
    }
  }

  public void createTWLFile(String path, String encoding) throws IOException {
    createTWLFile(root, path, true, encoding);
  }

  public void createTWLFile(String path, boolean compressed, String encoding) throws IOException {
    createTWLFile(root, path, compressed, encoding);
  }

  public void createTWLFile(TextNode root, String path, boolean compressed, String encoding)
          throws IOException {
    if (compressed) {
      writeCompressedTWLFile(root, path, encoding);
    } else {
      writeUncompressedMTWLFile(root, path, encoding);
    }
  }

  private void writeCompressedTWLFile(TextNode root, String path, String encoding)
          throws IOException {
    FileOutputStream fos = new FileOutputStream(path);
    BufferedOutputStream bos = new BufferedOutputStream(fos);
    ZipOutputStream zos = new ZipOutputStream(bos);
    OutputStreamWriter writer = new OutputStreamWriter(zos, encoding);
    zos.putNextEntry(new ZipEntry(path));
    writeTWLFile(root, writer);
    writer.flush();
    zos.closeEntry();
    writer.close();
  }

  private void writeUncompressedMTWLFile(TextNode root, String path, String encoding)
          throws IOException {
    FileOutputStream output = new FileOutputStream(path);
    OutputStreamWriter writer = new OutputStreamWriter(output, encoding);
    writeTWLFile(root, writer);
    writer.close();
  }

  private void writeTWLFile(TextNode root, Writer writer) throws IOException {
    writer.write("<?xml version=\"1.0\" ?>");
    writer.write("<root>");
    for (TextNode child : root.getChildren().values()) {
      writeNode(writer, child);
    }
    writer.write("</root>");
  }

  public void writeNode(Writer writer, TextNode node) throws IOException {
    String value = StringEscapeUtils.escapeXml11(String.valueOf(node.getValue()));
    String output = "<node char=\"" + value + "\" isWordEnd=\"" + Boolean.toString(node.isWordEnd())
            + "\">";
    writer.write(output);
    for (TextNode child : node.getChildren().values()) {
      writeNode(writer, child);

    }
    writer.write("</node>");
  }

  @Override
  public String toString() {
    return name;
  }

  @Override
  public List<AnnotationFS> find(RutaStream stream, Map<String, Object> typeMap, boolean ignoreCase,
          int ignoreLength, boolean edit, double distance, String ignoreToken) {
    return null;
  }

  @Override
  public List<String> contains(String string, boolean ignoreCase, int ignoreLength, boolean edit,
          double distance, String ignoreToken) {
    return null;
  }

  @Override
  public List<String> containsFragment(String string, boolean ignoreCase, int ignoreLength,
          boolean edit, double distance, String ignoreToken) {
    return null;
  }

  public void startDocument() {

  }

  public void endDocument() {

  }

}
