/*
 * Copyright 2010 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// Author: jmarantz@google.com (Joshua Marantz)

#include "pagespeed/kernel/html/html_writer_filter.h"

#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/message_handler.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/base/writer.h"
#include "pagespeed/kernel/html/html_element.h"
#include "pagespeed/kernel/html/html_name.h"
#include "pagespeed/kernel/html/html_node.h"
#include "pagespeed/kernel/html/html_parse.h"

namespace net_instaweb {

static const int kDefaultMaxColumn = -1;

HtmlWriterFilter::HtmlWriterFilter(HtmlParse* html_parse)
    : html_parse_(html_parse),
      writer_(NULL),
      max_column_(kDefaultMaxColumn),
      case_fold_(false) {
  Clear();
}

HtmlWriterFilter::~HtmlWriterFilter() {
}

void HtmlWriterFilter::Clear() {
  lazy_close_element_ = NULL;
  column_ = 0;
  write_errors_ = 0;
}

void HtmlWriterFilter::TerminateLazyCloseElement() {
  if (lazy_close_element_ != NULL) {
    lazy_close_element_ = NULL;
    if (!writer_->Write(">", html_parse_->message_handler())) {
      ++write_errors_;
    }
    ++column_;
  }
}

void HtmlWriterFilter::EmitBytes(const StringPiece& str) {
  TerminateLazyCloseElement();

  // Search backward from the end for the last occurrence of a newline.
  column_ += str.size();  // if there are no newlines, bump up column counter.
  for (int i = str.size() - 1; i >= 0; --i) {
    if (str[i] == '\n') {
      column_ = str.size() - i - 1;  // found a newline; so reset the column.
      break;
    }
  }
  if (!writer_->Write(str, html_parse_->message_handler())) {
    ++write_errors_;
  }
}

void HtmlWriterFilter::EmitName(const HtmlName& name) {
  if (case_fold_) {
    name.value().CopyToString(&case_fold_buffer_);
    LowerString(&case_fold_buffer_);
    EmitBytes(case_fold_buffer_);
  } else {
    EmitBytes(name.value());
  }
}

void HtmlWriterFilter::StartElement(HtmlElement* element) {
  HtmlElement::Style element_style = GetElementStyle(element);
  if (element_style == HtmlElement::INVISIBLE) {
    return;
  }
  EmitBytes("<");
  EmitName(element->name());

  const HtmlElement::AttributeList& attrs = element->attributes();
  for (HtmlElement::AttributeConstIterator i(attrs.begin());
       i != attrs.end(); ++i) {
    const HtmlElement::Attribute& attribute = *i;
    // If the column has grown too large, insert a newline.  It's always safe
    // to insert whitespace in the middle of tag parameters.
    int attr_length = 1 + attribute.name_str().size();
    if (max_column_ > 0) {
      if (attribute.escaped_value() != NULL) {
        attr_length += 1 + strlen(attribute.escaped_value());
      }
      if ((column_ + attr_length) > max_column_) {
        EmitBytes("\n");
      }
    }
    EmitBytes(" ");
    EmitName(attribute.name());
    if (attribute.escaped_value() != NULL) {
      EmitBytes("=");
      StringPiece quote = attribute.quote_str();
      EmitBytes(quote);
      EmitBytes(attribute.escaped_value());
      EmitBytes(quote);
    }
  }

  // Attempt to briefly terminate any legal tag that was explicitly terminated
  // in the input.  Note that a rewrite pass might have injected events
  // between the begin/end of an element that was closed briefly in the input
  // html.  In that case it cannot be closed briefly.  It is up to this
  // code to validate BRIEF_CLOSE on each element.
  //
  // TODO(jmarantz): Add a rewrite pass that morphs EXPLICIT_CLOSE into 'brief'
  // when legal.  Such a change will introduce textual diffs between
  // input and output html that would cause htmlparse unit tests to require
  // a regold.  But the changes could be validated with the normalizer.
  if (element_style == HtmlElement::BRIEF_CLOSE) {
    lazy_close_element_ = element;
  } else {
    EmitBytes(">");
  }
}

// Compute the tag-closing style for an element. If the style was specified
// on construction, then we use that.  If the element was synthesized by
// a rewrite pass, then it's stored as AUTO_CLOSE, and we can determine
// whether the element is briefly closable or implicitly closed.
HtmlElement::Style HtmlWriterFilter::GetElementStyle(HtmlElement* element) {
  HtmlElement::Style style = element->style();
  if (style == HtmlElement::AUTO_CLOSE) {
    HtmlName::Keyword keyword = element->keyword();

    // Avoid writing closing-tag when original HTML was <li>1<li>2.  We want
    // the correct structure in our API but want to avoid spewing it in a
    // more verbose form than the original HTML had when the browser will
    // interpret it correctly as is.
    //
    // Note that programatically inserted tags that for which
    // IsOptionallyClosedTag is true will be explicitly closed by default.
    if (html_parse_->IsImplicitlyClosedTag(keyword) ||
        html_parse_->IsOptionallyClosedTag(keyword)) {
      style = HtmlElement::IMPLICIT_CLOSE;
    } else if (html_parse_->TagAllowsBriefTermination(keyword)) {
      style = HtmlElement::BRIEF_CLOSE;
    } else {
      style = HtmlElement::EXPLICIT_CLOSE;
    }
  }
  return style;
}

void HtmlWriterFilter::EndElement(HtmlElement* element) {
  HtmlElement::Style element_style = GetElementStyle(element);
  switch (element_style) {
    case HtmlElement::AUTO_CLOSE:
      // This cannot happen because GetElementStyle won't return AUTO_CLOSE.
      html_parse_->message_handler()->FatalError(
          __FILE__, __LINE__,
          "GetElementStyle should never return AUTO_CLOSE.");
      break;
    case HtmlElement::IMPLICIT_CLOSE:
      // Nothing new to write; the ">" was written in StartElement
      break;
    case HtmlElement::BRIEF_CLOSE:
      // even if the element is briefly closeable, if more text
      // got written after the element open, then we must
      // explicitly close it, so we fall through.
      if (lazy_close_element_ == element) {
        lazy_close_element_ = NULL;

        // If this attribute was unquoted, or lacked a value, then we'll need
        // to add a space here to ensure that HTML parsers don't interpret the
        // '/' in the '/>' as part of the attribute.
        if (!element->attributes().IsEmpty()) {
          const HtmlElement::Attribute& attribute =
              *element->attributes().Last();
          if ((attribute.escaped_value() == NULL) ||
              (attribute.quote_style() == HtmlElement::NO_QUOTE)) {
            EmitBytes(" ");
          }
        }
        EmitBytes("/>");
        break;
      }
      FALLTHROUGH_INTENDED;
    case HtmlElement::EXPLICIT_CLOSE:
      EmitBytes("</");
      EmitName(element->name());
      EmitBytes(">");
      break;
    case HtmlElement::INVISIBLE:
    case HtmlElement::UNCLOSED:
      // Nothing new to write; the ">" was written in StartElement
      break;
  }
}

void HtmlWriterFilter::Characters(HtmlCharactersNode* chars) {
  EmitBytes(chars->contents());
}

void HtmlWriterFilter::Cdata(HtmlCdataNode* cdata) {
  EmitBytes("<![CDATA[");
  EmitBytes(cdata->contents());
  EmitBytes("]]>");
}

void HtmlWriterFilter::Comment(HtmlCommentNode* comment) {
  EmitBytes("<!--");
  EmitBytes(comment->contents());
  EmitBytes("-->");
}

void HtmlWriterFilter::IEDirective(HtmlIEDirectiveNode* directive) {
  EmitBytes("<!--");
  EmitBytes(directive->contents());
  EmitBytes("-->");
}

void HtmlWriterFilter::Directive(HtmlDirectiveNode* directive) {
  EmitBytes("<!");
  EmitBytes(directive->contents());
  EmitBytes(">");
}

void HtmlWriterFilter::StartDocument() {
  Clear();
}

void HtmlWriterFilter::EndDocument() {
  EmitBytes("");  // flushes any lazily closed elements at end of the document.
}

void HtmlWriterFilter::Flush() {
  if (!writer_->Flush(html_parse_->message_handler())) {
    ++write_errors_;
  }
}

void HtmlWriterFilter::DetermineEnabled(GoogleString* disabled_reason) {
  set_is_enabled(true);
}

}  // namespace net_instaweb
