src/pagespeed/kernel/html/html_parse_test.cc - incubator-pagespeed-debian - Git at Google

 /*
  * Copyright 2010 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 // Author: jmarantz@google.com (Joshua Marantz)

 // Unit-test the html reader/writer to ensure that a few tricky
 // constructs come through without corruption.

 #include <vector>

 #include "pagespeed/kernel/base/basictypes.h"
 #include "pagespeed/kernel/base/gtest.h"
 #include "pagespeed/kernel/base/gmock.h"
 #include "pagespeed/kernel/base/message_handler.h"
 #include "pagespeed/kernel/base/mock_message_handler.h"
 #include "pagespeed/kernel/base/scoped_ptr.h"
 #include "pagespeed/kernel/base/string.h"
 #include "pagespeed/kernel/base/string_util.h"
 #include "pagespeed/kernel/base/string_writer.h"
 #include "pagespeed/kernel/html/disable_test_filter.h"
 #include "pagespeed/kernel/html/empty_html_filter.h"
 #include "pagespeed/kernel/html/explicit_close_tag.h"
 #include "pagespeed/kernel/html/html_element.h"
 #include "pagespeed/kernel/html/html_event.h"
 #include "pagespeed/kernel/html/html_filter.h"
 #include "pagespeed/kernel/html/html_name.h"
 #include "pagespeed/kernel/html/html_node.h"
 #include "pagespeed/kernel/html/html_parse.h"
 #include "pagespeed/kernel/html/html_parse_test_base.h"
 #include "pagespeed/kernel/html/html_testing_peer.h"
 #include "pagespeed/kernel/html/html_writer_filter.h"

 using testing::UnorderedElementsAre;

 namespace net_instaweb {

 class HtmlParseTest : public HtmlParseTestBase {
  protected:
   // Returns the contents wrapped in a Div.
   GoogleString Div(const StringPiece& text) {
     return StrCat("<div>", text, "</div>");
   }

   // For tag-pairs that auto-close, we expect the appearance
   // of tag2 to automatically close tag1.
   void ExpectAutoClose(const char* tag1, const char* tag2) {
     GoogleString test_case = StrCat("auto_close_", tag1, "_", tag2);
     ValidateExpected(
         test_case,
         Div(StrCat("<", tag1, ">x<", tag2, ">y")),
         Div(StrCat("<", tag1, ">x</", tag1, "><",
                    StrCat(tag2, ">y</", tag2, ">"))));
   }

   // For 2 tags that do not have a specified auto-close relationship,
   // we expect the appearance of tag2 to nest inside tag1.
   void ExpectNoAutoClose(const char* tag1, const char* tag2) {
     GoogleString test_case = StrCat("no_auto_close_", tag1, "_", tag2);
     ValidateExpected(
         test_case,
         Div(StrCat("<", tag1, ">x<", tag2, ">y")),
         Div(StrCat("<", tag1, ">x<", tag2, ">y</",
                    StrCat(tag2, "></", tag1, ">"))));
   }

   virtual bool AddBody() const { return true; }

   // Sends the input through the HtmlParse filter chain, flushing
   // at flush_index.  Leaves resulting output in output_buffer_.
   void ParseWithFlush(StringPiece input, int flush_index) {
     GoogleString this_id = StringPrintf("http://test.com/%d", flush_index);
     output_buffer_.clear();
     html_parse_.StartParse(this_id);
     html_parse_.ParseText(input.substr(0, flush_index));
     html_parse_.Flush();
     html_parse_.ParseText(input.substr(flush_index));
     html_parse_.FinishParse();
   }
 };

 class HtmlParseTestNoBody : public HtmlParseTestBase {
   virtual bool AddBody() const { return false; }
 };

 TEST_F(HtmlParseTest, AvoidFalseXmlComment) {
   ValidateNoChanges("avoid_false_xml_comment",
      "<script type=\"text/javascript\">\n"
      "// <!-- this looks like a comment but is not\n"
      "</script>");
 }

 TEST_F(HtmlParseTest, RetainBogusEndTag) {
   ValidateNoChanges("bogus_end_tag",
      "<script language=\"JavaScript\" type=\"text/javascript\">\n"
      "<!--\n"
      "var s = \"</retain_bogus_end_tag>\";\n"
      "// -->\n"
      "</script>");
 }

 TEST_F(HtmlParseTest, AmpersandInHref) {
   // Note that we will escape the "&" in the href.
   ValidateNoChanges("ampersand_in_href",
       "<a href=\"http://myhost.com/path?arg1=val1&arg2=val2\">Hello</a>");
 }

 TEST_F(HtmlParseTest, BooleanSpaceCloseInTag) {
   ValidateExpected("bool_space_close", "<a b >foo</a>", "<a b>foo</a>");
   ValidateNoChanges("bool_close", "<a b>foo</a>");
   ValidateExpected("space_close_sq", "<a b='c' >foo</a>", "<a b='c'>foo</a>");
   ValidateExpected("space_close_dq",
                    "<a b=\"c\" >foo</a>", "<a b=\"c\">foo</a>");
   ValidateExpected("space_close_nq", "<a b=c >foo</a>", "<a b=c>foo</a>");
   // Distilled from http://www.gougou.com/
   // Unclear exactly what we should do here, maybe leave it as it was without
   // the space?
   ValidateExpected("allow_semicolon",
                    "<a onclick='return m(this)'; >foo</a>",
                    "<a onclick='return m(this)' ;>foo</a>");
 }

 TEST_F(HtmlParseTest, EmbeddedNuls) {
   const char kHtml[] = "<script att\0r></script>";
   // Note: STATIC_STRLEN won't stop at embedded null.
   ValidateNoChanges("inner_mess", GoogleString(kHtml, STATIC_STRLEN(kHtml)));

   const char kHtml2[] = "<script\0y></script>";
   // Note: STATIC_STRLEN won't stop at embedded null.
   ValidateNoChanges("inner_mess2",
                     GoogleString(kHtml2, STATIC_STRLEN(kHtml2)));
 }

 class AttrValuesSaverFilter : public EmptyHtmlFilter {
  public:
   AttrValuesSaverFilter() { }

   virtual void StartElement(HtmlElement* element) {
     const HtmlElement::AttributeList& attrs = element->attributes();
     for (HtmlElement::AttributeConstIterator i(attrs.begin());
          i != attrs.end(); ++i) {
       const char* value = i->DecodedValueOrNull();
       if (i->decoding_error()) {
         value_ += "<ERROR>";
       } else if (value == NULL) {
         value_ += "(null)";
       } else {
         value_ += value;
       }
     }
   }

   const GoogleString& value() { return value_; }
   virtual const char* Name() const { return "attr_saver"; }

  private:
   GoogleString value_;

   DISALLOW_COPY_AND_ASSIGN(AttrValuesSaverFilter);
 };

 TEST_F(HtmlParseTest, EscapedSingleQuote) {
   AttrValuesSaverFilter attr_saver;
   html_parse_.AddFilter(&attr_saver);
   Parse("escaped_single_quote",
         "<img src='my&#39;single_quoted_image.jpg'/>");
   EXPECT_EQ("my'single_quoted_image.jpg", attr_saver.value());
 }

 TEST_F(HtmlParseTest, AttrDecodeError) {
   AttrValuesSaverFilter attr_saver;
   html_parse_.AddFilter(&attr_saver);
   Parse("attr_not_decodable", "<img src='muñecos'/>");
   EXPECT_EQ("<ERROR>", attr_saver.value());
 }

 TEST_F(HtmlParseTest, UnclosedQuote) {
   // In this test, the system automatically closes the 'a' tag, which
   // didn't really get closed in the input text.  The exact syntax
   // of the expected results not critical, as long as the parser recovers
   // and does not crash.
   //
   // TODO(jmarantz): test error reporting.
   ValidateNoChanges("unclosed_quote",
      "<div>\n"
      "  <a href=\"http://myhost.com/path?arg1=val1&arg2=val2>Hello</a>\n"
      "</div>\n"
      "<p>next token</p>"
      "</body></html>\n"
      "\"></a></div>");
 }

 TEST_F(HtmlParseTest, NestedDivInBr) {
   ValidateNoChanges("nested_div_in_br",
      "<br><div>hello</div></br>");
 }

 // bug 2465145 - Sequential defaulted attribute tags lost
 TEST_F(HtmlParseTest, SequentialDefaultedTagsLost) {
   // This test cannot work with libxml, but since we use our own
   // parser we can make it work.  See
   // https://bugzilla.gnome.org/show_bug.cgi?id=611655
   ValidateNoChanges("sequential_defaulted_attribute_tags_lost",
       "<select>\n"
       "  <option value=\"&amp;cat=244\">Other option</option>\n"
       "  <option value selected style=\"color: #ccc;\">Default option"
       "</option>\n"
       "</select>");

   // Illegal attribute "http://www.yahoo.com", per HTML5, is two attributes:
   // http: and "yahoo.com", with the slashes going into the ether.
   // (This is also how Chrome and Firefox parse it.)
   ValidateExpected(
       "yahoo",
       "<a href=\"#\" http://www.yahoo.com class=\"a b\">yahoo</a>",
       "<a href=\"#\" http: www.yahoo.com class=\"a b\">yahoo</a>");

   // Here's another interesting thing from the bug testcase.
   // Specifying a literal "&" without a recognized sequence
   // following it gets parsed correctly by libxml2, and then
   // re-encoded by our writer as &amp;.  That's fine; let's
   // make sure that doesn't change.
   ValidateNoChanges("amp_cat",
       "<option value=\"&cat=244\">other</option>");
 }

 // bug 2465201 : some html constructs do not need ';' termination.
 // Fixed by providing own lexer.
 TEST_F(HtmlParseTest, UnterminatedTokens) {
   // the termination semicolons should be added in the output.
   ValidateNoChanges("unterminated_tokens",
       "<p>Look at the non breaking space: \"&nbsp\"</p>");
 }

 // bug 2467040 : keep ampersands and quotes encoded
 TEST_F(HtmlParseTest, EncodeAmpersandsAndQuotes) {
   ValidateNoChanges("ampersands_in_text",
       "<p>This should be a string '&amp;amp;' not a single ampersand.</p>");
   ValidateNoChanges("ampersands_in_values",
       "<img alt=\"This should be a string '&amp;amp;' "
       "not a single ampersand.\"/>");
   ValidateNoChanges("quotes",
       "<p>Clicking <a href=\"javascript: alert(&quot;Alert works!&quot;);\">"
       "here</a> should pop up an alert box.</p>");
 }

 // bug 2508334 : encoding unicode in general
 TEST_F(HtmlParseTest, EncodeUnicode) {
   ValidateNoChanges("unicode_in_text",
       "<p>Non-breaking space: '&nbsp;'</p>\n"
       "<p>Alpha: '&alpha;'</p>\n"
       "<p>Unicode #54321: '&#54321;'</p>\n");
 }

 TEST_F(HtmlParseTest, ImplicitExplicitClose) {
   // The lexer/printer preserves the input syntax, making it easier
   // to diff inputs & outputs.
   //
   // TODO(jmarantz): But we can have a rewrite pass that eliminates
   // the superfluous "/>".
   ValidateNoChanges("one_brief_one_implicit_input",
       "<input type=\"text\" name=\"username\">"
       "<input type=\"password\" name=\"password\"/>");
 }

 TEST_F(HtmlParseTest, OpenBracketAfterQuote) {
   // Note: even though it looks like two input elements, in practice
   // it's parsed as one.
   const char input[] =
       "<input type=\"text\" name=\"username\""
       "<input type=\"password\" name=\"password\"/>";
   const char expected[] =
       "<input type=\"text\" name=\"username\""
       " <input type=\"password\" name=\"password\"/>";
       // Extra space 'between' attributes'
   ValidateExpected("open_bracket_after_quote", input, expected);
 }

 TEST_F(HtmlParseTest, OpenBracketUnquoted) {
   // '<' after unquoted attr value.
   // This is just a malformed attribute name, not a start of a new tag.
   const char input[] =
       "<input type=\"text\" name=username"
       "<input type=\"password\" name=\"password\"/>";
   ValidateNoChanges("open_bracket_unquoted", input);
 }

 TEST_F(HtmlParseTest, OpenBracketAfterEquals) {
   // '<' after equals sign. This is actually an attribute value,
   // not a start of a new tag.
   const char input[] =
       "<input type=\"text\" name="
       "<input type=\"password\" name=\"password\"/>";
   ValidateNoChanges("open_brack_after_equals", input);
 }

 TEST_F(HtmlParseTest, OpenBracketAfterName) {
   // '<' after after attr name.
   const char input[] =
       "<input type=\"text\" name"
       "<input type=\"password\" name=\"password\"/>";
   ValidateNoChanges("open_brack_after_name", input);
 }

 class HtmlParseTestNoBodyNoHtml : public HtmlParseTestNoBody {
  protected:
   virtual bool AddHtmlTags() const { return false; }

   void CheckOutput(int start_index, int end_index,
                    const GoogleString& input,
                    const GoogleString& expected_output) {
     for (int i = start_index; i < end_index; ++i) {
       SetupWriter();
       html_parse()->set_size_limit(i);
       html_parse()->StartParse("http://test.com/in.html");
       // Flush after every character.
       for (int j = 0; j < input.size(); ++j) {
         GoogleString x;
         x.push_back(input[j]);
         html_parse()->ParseText(StringPiece(x));
         html_parse()->Flush();
       }
       html_parse()->FinishParse();
       EXPECT_EQ(expected_output, output_buffer_);
     }
   }
 };

 TEST_F(HtmlParseTestNoBodyNoHtml, SizeLimit) {
   static const char input[] =
       "<html>"  // 6 chars
       "<input type=\"text\"/>"  // 20 chars
       "<script type=\"text/javascript\">alert('123');</script>"  // 53 chars
       "<!--[if IE]>...<![endif]-->"  // 27 chars
       "<table><tr><td>blah</td></tr></table>"  // 37 chars
       "</html>";  // 7 chars
   ValidateNoChanges("no_limit", input);

   static const char output_when_break_in_html[] =
       "<html></html>";

   for (int i = 1; i < 150; ++i) {
     // With no flushes, the output is just <html></html>
     html_parse_.set_size_limit(i);
     ValidateExpected("break_in_input", input,
                      output_when_break_in_html);
   }

   // Now test with flushes injected.

   CheckOutput(1, 6, input, output_when_break_in_html);

   static const char output_when_break_in_input[] =
       "<html><input type=\"text\"/></html>";
   CheckOutput(6, 26, input, output_when_break_in_input);

   static const char output_with_break_in_script_tag[] =
       "<html><input type=\"text\"/>"
       "<script type=\"text/javascript\"></script>"
       "</html>";
   CheckOutput(26, 57, input, output_with_break_in_script_tag);

   static const char output_with_break_in_script_text_or_later[] =
       "<html><input type=\"text\"/>"
       "<script type=\"text/javascript\">alert('123');</script>"
       "</html>";
   CheckOutput(57, 79, input, output_with_break_in_script_text_or_later);

   static const char output_with_break_in_comment[] =
       "<html><input type=\"text\"/>"
       "<script type=\"text/javascript\">alert('123');</script>"
       "<!--[if IE]>...<![endif]-->"
       "<table></table>"
       "</html>";
   CheckOutput(79, 113, input, output_with_break_in_comment);

   static const char output_with_break_in_tr[] =
       "<html><input type=\"text\"/>"
       "<script type=\"text/javascript\">alert('123');</script>"
       "<!--[if IE]>...<![endif]-->"
       "<table><tr></tr></table>"
       "</html>";
   CheckOutput(113, 117, input, output_with_break_in_tr);

   static const char output_with_break_in_td[] =
       "<html><input type=\"text\"/>"
       "<script type=\"text/javascript\">alert('123');</script>"
       "<!--[if IE]>...<![endif]-->"
       "<table><tr><td></td></tr></table>"
       "</html>";
   CheckOutput(117, 121, input, output_with_break_in_td);

   static const char output_with_break_in_td_text[] =
       "<html><input type=\"text\"/>"
       "<script type=\"text/javascript\">alert('123');</script>"
       "<!--[if IE]>...<![endif]-->"
       "<table><tr><td>blah</td></tr></table>"
       "</html>";
   CheckOutput(121, 160, input, output_with_break_in_td_text);
 }

 TEST_F(HtmlParseTest, OpenBracketAfterSpace) {
   // '<' after after unquoted attr value. Here name<input is an attribute
   // name.
   const char input[] =
       "<input type=\"text\" "
       "<input type=\"password\" name=\"password\"/>";
   ValidateNoChanges("open_brack_after_name", input);
 }

 TEST_F(HtmlParseTest, AutoClose) {
   ExplicitCloseTag close_tags;
   html_parse_.AddFilter(&close_tags);

   // Cover the simple cases.  E.g. dd is closed by tr, but not dd.
   ExpectNoAutoClose("dd", "tr");
   ExpectAutoClose("dd", "dd");

   ExpectAutoClose("dt", "dd");
   ExpectAutoClose("dt", "dt");
   ExpectNoAutoClose("dt", "rp");

   ExpectAutoClose("li", "li");
   ExpectNoAutoClose("li", "dt");

   ExpectAutoClose("optgroup", "optgroup");
   ExpectNoAutoClose("optgroup", "rp");

   ExpectAutoClose("option", "optgroup");
   ExpectAutoClose("option", "option");
   ExpectNoAutoClose("option", "rp");

   // <p> has an outrageous number of tags that auto-close it.
   ExpectNoAutoClose("p", "tr");  // tr is not listed in the auto-closers for p.
   ExpectAutoClose("p", "address");  // first closer of 28.
   ExpectAutoClose("p", "h2");       // middle closer of 28.
   ExpectAutoClose("p", "ul");       // last closer of 28.

   // Cover the remainder of the cases.
   ExpectAutoClose("rp", "rt");
   ExpectAutoClose("rp", "rp");
   ExpectNoAutoClose("rp", "dd");

   ExpectAutoClose("rt", "rt");
   ExpectAutoClose("rt", "rp");
   ExpectNoAutoClose("rt", "dd");

   ExpectAutoClose("tbody", "tbody");
   ExpectAutoClose("tbody", "tfoot");
   ExpectNoAutoClose("tbody", "dd");

   ExpectAutoClose("td", "td");
   ExpectAutoClose("td", "th");
   ExpectNoAutoClose("td", "rt");

   ExpectAutoClose("tfoot", "tbody");
   ExpectNoAutoClose("tfoot", "dd");

   ExpectAutoClose("th", "td");
   ExpectAutoClose("th", "th");
   ExpectNoAutoClose("th", "rt");

   ExpectAutoClose("thead", "tbody");
   ExpectAutoClose("thead", "tfoot");
   ExpectNoAutoClose("thead", "dd");

   ExpectAutoClose("tr", "tr");
   ExpectNoAutoClose("tr", "td");

   // http://www.w3.org/TR/html5/the-end.html#misnested-tags:-b-i-b-i


   // TODO(jmarantz): add more tests related to formatting keywords.
 }

 TEST_F(HtmlParseTest, BogusComment) {
   ValidateNoChanges("what_php",
                     "<?php include('includes/_pagebottom.tpl.php'); ?>");

   ValidateNoChanges("bad break", "</\na>");
 }

 namespace {

 class AnnotatingHtmlFilter : public EmptyHtmlFilter {
  public:
   AnnotatingHtmlFilter() : annotate_flush_(false) {}
   virtual ~AnnotatingHtmlFilter() {}

   virtual void StartElement(HtmlElement* element) {
     StrAppend(&buffer_, (buffer_.empty() ? "+" : " +"), element->name_str());

     bool first = true;
     const HtmlElement::AttributeList& attrs = element->attributes();
     for (HtmlElement::AttributeConstIterator i(attrs.begin());
          i != attrs.end(); ++i) {
       const HtmlElement::Attribute& attr = *i;
       StrAppend(&buffer_, (first ? ":" : ","), attr.name_str());
       const char* value = attr.DecodedValueOrNull();
       if (attr.decoding_error()) {
         StrAppend(&buffer_, "=<ERROR>");
       } else if (value != NULL) {
         StrAppend(&buffer_, "=", attr.quote_str(), value, attr.quote_str());
       }
       first = false;
     }
   }
   virtual void EndElement(HtmlElement* element) {
     StrAppend(&buffer_, " -", element->name_str());
     switch (element->style()) {
       case HtmlElement::AUTO_CLOSE:      buffer_ += "(a)"; break;
       case HtmlElement::IMPLICIT_CLOSE:  buffer_ += "(i)"; break;
       case HtmlElement::EXPLICIT_CLOSE:  buffer_ += "(e)"; break;
       case HtmlElement::BRIEF_CLOSE:     buffer_ += "(b)"; break;
       case HtmlElement::UNCLOSED:        buffer_ += "(u)"; break;
       case HtmlElement::INVISIBLE:       buffer_ += "(I)"; break;
     }
   }
   virtual void Characters(HtmlCharactersNode* characters) {
     StrAppend(&buffer_, (buffer_.empty() ? "'" : " '"), characters->contents(),
               "'");
   }

   virtual const char* Name() const { return "AnnotatingHtmlFilter"; }

   const GoogleString& buffer() const { return buffer_; }
   void Clear() { buffer_.clear(); }

   virtual void Flush() {
     if (annotate_flush_) {
       buffer_ += "[F]";
     }
   }

   void set_annotate_flush(bool x) { annotate_flush_ = x; }

  private:
   bool annotate_flush_;
   GoogleString buffer_;
 };

 }  // namespace

 class HtmlAnnotationTest : public HtmlParseTestNoBody {
  protected:
   virtual void SetUp() {
     HtmlParseTestNoBody::SetUp();
     html_parse_.AddFilter(&annotation_);
   }

   const GoogleString& annotation() { return annotation_.buffer(); }
   void ResetAnnotation() { annotation_.Clear(); }
   virtual bool AddHtmlTags() const { return false; }

  protected:
   AnnotatingHtmlFilter annotation_;
 };

 TEST_F(HtmlAnnotationTest, CorrectTaggify) {
   // Under HTML5 rules (and recent Chrome and FF practice), something like
   // <foo</bar> makes an element named foo<, with attribute named bar.
   // (See 12.2.4.10 Tag name state).
   //
   // However, we have to be careful not to turn just anything following <
   // into an element name, since sometimes there are <'s which are
   // meant to just be less than signs.
   //
   ValidateNoChanges("no_taggify_digit", "<p>1<2</p>");
   EXPECT_EQ("+p '1<2' -p(e)", annotation());
   ResetAnnotation();

   ValidateNoChanges("no_taggify_unicode", "<p>☃<☕</p>");
   EXPECT_EQ("+p '☃<☕' -p(e)", annotation());
   ResetAnnotation();

   ValidateExpected("letter",
                    "<p>x<y</p>", "<p>x<y< p>");  // lost the / since 'p' is attr.
   EXPECT_EQ("+p 'x' +y<:p -y<(u) -p(u)", annotation());
   ResetAnnotation();

   ValidateExpected("taggify_letter+digit",
                    "<p>x1<y2</p>", "<p>x1<y2< p>");
   EXPECT_EQ("+p 'x1' +y2<:p -y2<(u) -p(u)", annotation());
   ResetAnnotation();

   ValidateExpected("taggify_letter+unicode", "<p>x☃<y☕</p>",
                    "<p>x☃<y☕< p>");  // no / since p is attr on a y☕< element.
   EXPECT_EQ("+p 'x☃' +y☕<:p -y☕<(u) -p(u)", annotation());
   ResetAnnotation();

   ValidateNoChanges("no_taggify_digit+letter", "<p>1x<2y</p>");
   EXPECT_EQ("+p '1x<2y' -p(e)", annotation());
   ResetAnnotation();

   ValidateNoChanges("no_taggify_unicode+letter", "<p>☃x<☕y</p>");
   EXPECT_EQ("+p '☃x<☕y' -p(e)", annotation());
   ResetAnnotation();

   // Found on http://www.taobao.com/
   // Don't turn <1... -> <1...>
   ValidateNoChanges("taobao", "<a>1+1<1母婴全场加1元超值购</a>");
   EXPECT_EQ("+a '1+1<1母婴全场加1元超值购' -a(e)", annotation());
   ResetAnnotation();
 }

 TEST_F(HtmlAnnotationTest, WeirdAttributes) {
   // Just about everything can be an attribute
   ValidateNoChanges("weird_attr", "<a ,=\"foo\">");
   EXPECT_EQ("+a:,=\"foo\" -a(u)", annotation());
   ResetAnnotation();

   // ... even an equal sign
   ValidateNoChanges("weird_attr_equal", "<a ==\"foo\">");
   EXPECT_EQ("+a:==\"foo\" -a(u)", annotation());
   ResetAnnotation();
 }

 TEST_F(HtmlAnnotationTest, WeirdCloseCase) {
   // </> is nothing useful, but we preserve it as a literal.
   ValidateNoChanges("close_nothing", "</><foo>");
   EXPECT_EQ("'</>' +foo -foo(u)", annotation());
   ResetAnnotation();

   // <foo / > isn't an attempt at self-close, it just has a stray /
   // we can't represent.
   ValidateExpected("not_self_close", "<foo / >", "<foo>");
   EXPECT_EQ("+foo -foo(u)", annotation());
   ResetAnnotation();

   // <foo /> is a self-close.
   ValidateExpected("self_close", "<foo />", "<foo/>");
   EXPECT_EQ("+foo -foo(b)", annotation());
   ResetAnnotation();
 }

 TEST_F(HtmlAnnotationTest, UnbalancedMarkup) {
   // The second 'tr' closes the first one, and our HtmlWriter will not
   // implicitly close 'tr' because IsImplicitlyClosedTag is false, so
   // the markup is changed to add the missing tr.
   ValidateNoChanges("unbalanced_markup",
                     "<font><tr><i><font></i></font><tr></font>");

   // We use this (hopefully) self-explanatory annotation format to indicate
   // what's going on in the parse.
   EXPECT_EQ("+font -font(a) +tr +i +font -font(u) -i(e) '</font>' -tr(a) +tr "
             "'</font>' -tr(u)",
             annotation());
 }

 TEST_F(HtmlAnnotationTest, StrayCloseTr) {
   ValidateNoChanges("stray_tr",
                     "<table><tr><table></tr></table></tr></table>");

   // We use this (hopefully) self-explanatory annotation format to indicate
   // what's going on in the parse.
   EXPECT_EQ("+table +tr +table '</tr>' -table(e) -tr(e) -table(e)",
             annotation());
 }

 TEST_F(HtmlAnnotationTest, IClosedByOpenTr) {
   ValidateNoChanges("unclosed_i_tag", "<tr><i>a<tr>b");
   EXPECT_EQ("+tr +i 'a' -i(a) -tr(a) +tr 'b' -tr(u)", annotation());

   // TODO(jmarantz): morlovich points out that this is nowhere near
   // how a browser will handle this stuff... For a nighmarish testcase, try:
   //     data:text/html,<table><tr><td><i>a<tr>b
   //
   // The 'a' gets rendered in italics *after* the b.
   //
   // See also:
   // http://www.whatwg.org/specs/web-apps/current-work/multipage/
   // the-end.html#unexpected-markup-in-tables
   //
   // But note that these 2 are the same and do what I expect:
   //
   // data:text/html,<table><tr><td><i>a</td></tr></table>b
   // data:text/html,<table><tr><td><i>a</table>b
   //
   // the 'a' is italicized but the 'b' is not.  If I omit the 'td'
   // then the 'b' gets italicized.  This implies I suppose that 'i' is
   // closed by td but is not closed by tr or table.  And it is indeed
   // closed by the *implicit* closing of td.

   // http://www.w3.org/TR/html5/the-end.html#misnested-tags:-b-i-b-i
 }

 TEST_F(HtmlAnnotationTest, INotClosedByOpenTableExplicit) {
   ValidateNoChanges("explicit_close_tr", "<i>a<table><tr></tr></table>b");
   EXPECT_EQ("+i 'a' +table +tr -tr(e) -table(e) 'b' -i(u)", annotation());
 }

 TEST_F(HtmlAnnotationTest, INotClosedByOpenTableImplicit) {
   ValidateNoChanges("implicit_close_tr", "<i>a<table><tr></table>b");
   EXPECT_EQ("+i 'a' +table +tr -tr(u) -table(e) 'b' -i(u)", annotation());
 }

 TEST_F(HtmlAnnotationTest, AClosedByBInLi) {
   ValidateNoChanges("a_closed_by_b", "<li><a href='x'></b>");
   EXPECT_EQ("+li +a:href='x' '</b>' -a(u) -li(u)", annotation());
 }

 TEST_F(HtmlAnnotationTest, BClosedByTd) {
   ValidateNoChanges("b_closed_by_td", "<table><tr><td><b>1</table></b>");

   // The <b> gets closed by the </td>, which is automatically closed by
   // the td, which is automatically closed by the tr, which is automatically
   // closed by the tbody, which is automatically closed by the "</table>".
   // The actual "</b>" that appears here doesn't close any open tags, so
   // its rendered as literal characters.
   //
   // TODO(jmarantz): consider adding a new event-type to represent bogus
   // tags rather than using Characters.
   EXPECT_EQ("+table +tr +td +b '1' -b(u) -td(u) -tr(u) -table(e) '</b>'",
             annotation());
 }

 TEST_F(HtmlAnnotationTest, BNotClosedByTable) {
   ValidateNoChanges(
       "a_closed_by_b",
       "<table><tbody><tr><b><td>hello</tr></tbody></table>World</b>");

   // We do not create the same annotation Chrome does in this case.  Opening up
   // the inspector on
   // data:text/html,<table><tbody><tr><b><td>hello</tr></tbody></table>World</b>
   // shows us (ignoring html, head, and body tags for brevity):
   //      <b></b>
   //      <table>
   //        <tbody>
   //          <tr>
   //            <td>hello</td>
   //          </td>
   //        </tbody>
   //      </table>
   //      <b>World</b>
   // For us to replicate this structure, we'd have to move the 'b' tag ahead of
   // the <table> opening tag.  To do this we would need to buffer tables until
   // they reached the end-table tag.  This does not appear to be a good
   // tradeoff as tables might be large and buffering them would impact
   // the UX for all sites, as a defense against bad markup and filters that
   // care deeply about the structure of formatting elements in illegal DOM
   // positions.
   //
   // But note that this malformed markup will in fact pass through
   // parsing & serialization with byte accuracy.
 }

 TEST_F(HtmlAnnotationTest, StrayCloseTrInTable) {
   ValidateNoChanges("stray_close_tr",
                     "<div><table><tbody><td>1</td></tr></tbody></table></div>");
   EXPECT_EQ("+div +table +tbody +td '1' -td(e) '</tr>' -tbody(e) -table(e) "
             "-div(e)", annotation());
 }

 TEST_F(HtmlAnnotationTest, StrayCloseTrInTableWithUnclosedTd) {
   ValidateNoChanges("stray_close_tr_unclosed_td",
                     "<tr><table><td>1</tr></table>");
   EXPECT_EQ("+tr +table +td '1</tr>' -td(u) -table(e) -tr(u)", annotation());
   // TODO(jmarantz): the above is not quite DOM-accurate.  A 'tr' will
   // actually be synthesized around the <td>.  To solve this and
   // maintain byte accuracy we must synthesize an HtmlElement whose
   // opening-tag is invisible, and create a map that requires <td>
   // elements to be enclosed in <tr> etc.  See, in Chrome,
   // data:text/html,<tr><table><td>1</tr></table>
 }

 TEST_F(HtmlAnnotationTest, OverlappingStyleTags) {
   ValidateNoChanges("overlapping_style_tags", "n<b>b<i>bi</b>i</i>n");

   // TODO(jmarantz): The behavior of this sequence is well-specified, but
   // is not currently implemented by PSA.  We should have
   // EXPECT_EQ("'n' +b 'b' +i 'bi' -i(u) -b(e) +i* 'i' -i(e) 'n'",
   //           annotation());
   // Note that we will need to render a synthetic <i> that shows up in our
   // DOM tree but does not get serialized.  We have no current representation
   // for that, but we could easily add a bool to HtmlElement to suppress the
   // serialization of the open tag.  Above that's represented by "+i*".
   //
   // But we actually get this, which does not have the 'i' in italics.
   EXPECT_EQ("'n' +b 'b' +i 'bi' -i(u) -b(e) 'i</i>n'", annotation());

   // There is no real drawback to implementing this; but at the moment
   // no filters are likely to care.
 }

 TEST_F(HtmlAnnotationTest, AClosedByP) {
   ValidateNoChanges("a_closed_by_p", "<P>This is a <A>link<P>More");

   // According to Chrome("data:text/html,<P>This is a <A>link<P>More") the
   // structure should be something like this:
   //     "+p 'This is a' +a link -a -p +p +a more -a -p"
   // In this fashion a&p overlap together in a fashion similar to bold and
   // italic.
   //
   // But we actually product this markup:
   EXPECT_EQ("+P 'This is a ' +A 'link' +P 'More' -P(u) -A(u) -P(u)",
             annotation());
 }

 TEST_F(HtmlAnnotationTest, PFont) {
   ValidateNoChanges("p_font", "<P><FONT>a<P>b</FONT>");

   // TODO(jmarantz): The second <P> should force the close of
   // the first one, despite the intervening <font>.  In other words
   // we need to keep track of which formatting elements are active:
   // <p> does not nest but I suppose <font> likely does.
   //
   // Chrome("data:text/html,<P><FONT>a<P>b</FONT>") yields
   // "<p><font>a</font</p><p><font><b></font></p>"
   EXPECT_EQ("+P +FONT 'a' +P 'b' -P(u) -FONT(e) -P(u)", annotation());
 }

 TEST_F(HtmlAnnotationTest, HtmlTbodyCol) {
   // The spaces before the tag names are invalid.  Chrome parses these as
   // literals; our behavior is consistent.
   ValidateNoChanges("html_tbody_col", "< HTML> < TBODY> < COL SPAN=999999999>");
   EXPECT_EQ("'< HTML> < TBODY> < COL SPAN=999999999>'", annotation());
 }

 TEST_F(HtmlAnnotationTest, WeirdAttrQuotes) {
   // Note that in the expected results, a space was inserted before
   // 'position:absolute' and before 'Windings'.  I think this is correct.
   //
   // TODO(jmarantz): check in Chrome.
   ValidateExpected("weird_attr_quotes",
                     "<DIV STYLE=\"top:214px; left:139px;\""
                     "position:absolute; font-size:26px;\">"
                     "<NOBR><SPAN STYLE=\"font-family:\"Wingdings 2\";\">"
                    "</SPAN></NOBR></DIV>",
                    "<DIV STYLE=\"top:214px; left:139px;\" "
                    "position:absolute; font-size:26px;\">"
                    "<NOBR><SPAN STYLE=\"font-family:\" Wingdings 2\";\">"
                    "</SPAN></NOBR></DIV>");
   EXPECT_EQ("+DIV:STYLE=\"top:214px; left:139px;\",position:absolute;,"
             "font-size:26px;\" +NOBR "
             "+SPAN:STYLE=\"font-family:\",Wingdings,2\";\" "
             "-SPAN(e) -NOBR(e) -DIV(e)", annotation());
 }

 TEST_F(HtmlAnnotationTest, Misc) {
   //
   // 1. This is <B>bold, <I>bold italic, </b>italic, </i>normal text
   // 2. <P>This is a <A>link<P>More
   // 3. <P><FONT>a<P>b</FONT>
   // 7. <img title=="><script>alert('foo')</script>">
   // 8. < HTML> < TBODY> < COL SPAN=999999999>
   // 9. <DIV STYLE="top:214px; left:139px; position:absolute; font-size:26px;">
   //    <NOBR><SPAN STYLE="font-family:"Wingdings 2";"></SPAN></NOBR></DIV>
   // 10. <a href="http://www.cnn.com/"' title="cnn.com">cnn</a>
   // 11. do <![if !supportLists]>not<![endif]> lose this text
   // 12. <table><tr><td>row1<tr><td>row2</td>
   // 13. <table><tr><td>foo<td>bar<tr><td>baz<td>boo</table>
   // 14. <p>The quick <strong>brown fox</strong></p>\njumped over the\n
   //     <p>lazy</strong> dog.</p>
   // 15. <p> paragraph <h1> heading </h1>
   // 16. <a href="h">1<a>2</a></a>
   ValidateNoChanges("quote_balance", "<img title=\"><script>alert('foo')"
                     "</script>\">");
   EXPECT_EQ("+img:title=\"><script>alert('foo')</script>\" -img(i)",
             annotation());
 }

 TEST_F(HtmlAnnotationTest, DoubleEquals) {
   // Note that the attr-value is not in fact a quoted string.  The second
   // "=" begins the attr-value and its terminated by the ">".  The script
   // is not in the quote.  The closing quote and > are stray and rendered
   // as characters in our DOM.  We are byte accurate.  This behavior
   // was hand-confirmed as consistent with Chrome by typing
   //      data:text/html,<img title=="><script>alert('foo')</script>">
   // into the URL bar on 12/13/2011.  The "alert" popped up which is
   // consistent with the dom annotation below.
   ValidateNoChanges("double_equals",
                     "<img title==\"><script>alert('foo')</script>\">");
   EXPECT_EQ("+img:title==\" -img(i) +script 'alert('foo')' -script(e) '\">'",
             annotation());
 }

 TEST_F(HtmlAnnotationTest, AttrEqStartWithSlash) {
   // Note the "/>" here does *not* briefly end the 'body'; it's part of the
   // attribute.  Verified with chrome using
   // data:text/html,<body title=/>hello</body>
   ValidateNoChanges("attr_eq_starts_with_slash", "<body title=/>1</body>");
   EXPECT_EQ("+body:title=/ '1' -body(e)", annotation());
 }

 TEST_F(HtmlAnnotationTest, AttrEqEndsWithSlash) {
   // Note again the "/>" here does *not* briefly end the 'body'; it's part of
   // the attribute.  Verified with chrome using
   // data:text/html,<body title=x/>hello</body>
   ValidateNoChanges("attr_eq_ends_with_slash", "<body title=x/></body>");
   EXPECT_EQ("+body:title=x/ -body(e)", annotation());
 }

 TEST_F(HtmlAnnotationTest, TableForm) {
   ValidateNoChanges("table_form", "<table><form><input></table><input></form>");
   EXPECT_EQ("+table +form +input -input(i) -form(u) -table(e)"
             " +input -input(i) '</form>'",
             annotation());
 }

 TEST_F(HtmlAnnotationTest, ComplexQuotedAttribute) {
   ValidateNoChanges("complex_quoted_attr",
                     "<div x='\\'><img onload=alert(42)"
                     "src=http://json.org/img/json160.gif>'></div>");
   EXPECT_EQ("+div:x='\\' "
             "+img:onload=alert(42)src=http://json.org/img/json160.gif "
             "-img(i) ''>' -div(e)", annotation());
 }

 TEST_F(HtmlAnnotationTest, DivNbsp) {
   ValidateNoChanges("div_nbsp",
                     "<div&nbsp &nbsp style=\\-\\mo\\z\\-b\\i\\nd\\in\\g:\\url("
                     "//business\\i\\nfo.co.uk\\/labs\\/xbl\\/xbl\\.xml\\#xss)"
                     ">");
   EXPECT_EQ("'<div&nbsp &nbsp style=\\-\\mo\\z\\-b\\i\\nd\\in\\g:\\"
             "url(//business\\i\\nfo.co.uk\\/labs\\/xbl\\/xbl\\.xml\\#xss)>'",
             annotation());
 }

 TEST_F(HtmlAnnotationTest, ExtraQuote) {
   ValidateExpected(
       "extra_quote",
       "<a href=\"http://www.cnn.com/\"' title=\"cnn.com\">cnn</a>",
       "<a href=\"http://www.cnn.com/\" ' title=\"cnn.com\">cnn</a>");
 }

 TEST_F(HtmlAnnotationTest, TrNesting) {
   ValidateNoChanges("nesting", "<tr><td><tr a=b><td c=d></td></tr>");
   EXPECT_EQ("+tr +td -td(a) -tr(a) +tr:a=b +td:c=d -td(e) -tr(e)",
             annotation());
 }

 TEST_F(HtmlAnnotationTest, AttrEndingWithOpenAngle) {
   ValidateNoChanges("weird_attr", "<script src=foo<bar>Content");
   EXPECT_EQ("+script:src=foo<bar 'Content' -script(u)", annotation());
 }

 TEST_F(HtmlAnnotationTest, ScriptQuirkBasic) {
   ValidateNoChanges("script_quirk_1",
                     "<script><!--<script></script>a</script>b");
   EXPECT_EQ("+script '<!--<script></script>a' -script(e) 'b'", annotation());

   ResetAnnotation();
   ValidateNoChanges("script_quirk_2",
                     "<script><!--</script>a</script>b");
   EXPECT_EQ("+script '<!--' -script(e) 'a</script>b'", annotation());

   ResetAnnotation();
   ValidateNoChanges("script_quirk_3",
                     "<script><script></script>a</script>b");
   EXPECT_EQ("+script '<script>' -script(e) 'a</script>b'", annotation());

   ResetAnnotation();
   ValidateNoChanges("script_quirk_4",
                     "<script><!--<script>--></script>a</script>b");
   EXPECT_EQ("+script '<!--<script>-->' -script(e) 'a</script>b'", annotation());
 }

 TEST_F(HtmlAnnotationTest, ScriptQuirkCloseAttr) {
   // HTML5 script parsing is weird in that </script> actually gets attribute
   // parsing.
   ValidateExpected("script_quirk_close",
                    "<script></script a=\"foo>\">Bar",
                    "<script></script>Bar");
   EXPECT_EQ("+script -script(e) 'Bar'", annotation());

   ResetAnnotation();
   ValidateExpected("script_quirk_close2",
                    "<script></script a=\"foo>\" bar=\'>' bax>Bar",
                    "<script></script>Bar");
   EXPECT_EQ("+script -script(e) 'Bar'", annotation());


   ResetAnnotation();
   ValidateExpected("script_quirk_close_slash",
                    "<script></script a=\"foo>\"/>Bar",
                    "<script></script>Bar");
   EXPECT_EQ("+script -script(e) 'Bar'", annotation());
 }

 TEST_F(HtmlAnnotationTest, ScriptQuirkBriefClose) {
   // HTML5 script parsing --- closing </style />
   ValidateExpected("script_quirk_close_brief",
                    "<script></script/>Bar",
                    "<script></script>Bar");
   EXPECT_EQ("+script -script(e) 'Bar'", annotation());

   ResetAnnotation();
   ValidateExpected("script_quirk_close_brief",
                    "<script></script /foo>Bar",
                    "<script></script>Bar");
   EXPECT_EQ("+script -script(e) 'Bar'", annotation());
 }

 // TODO(jmarantz): fix this case; we lose the stray "=".
 // TEST_F(HtmlAnnotationTest, StrayEq) {
 //   ValidateNoChanges("stray_eq", "<a href='foo.html'=>b</a>");
 //   EXPECT_EQ("+a:href=foo.html -a(e)", annotation());
 // }

 TEST_F(HtmlAnnotationTest, FlushDoesNotBreakCharacterBlock) {
   annotation_.set_annotate_flush(true);
   html_parse_.StartParse("http://test.com/blank_flush.html");
   html_parse_.ParseText("<div></div>");  // will get flushed.
   html_parse_.ParseText("bytes:");       // will not get flushed till the end.
   html_parse_.Flush();
   html_parse_.ParseText(":more:");
   html_parse_.Flush();
   html_parse_.ParseText(":still more:");
   html_parse_.Flush();
   html_parse_.ParseText(":final bytes:");
   html_parse_.FinishParse();
   EXPECT_STREQ(
       "+div -div(e)[F][F][F] 'bytes::more::still more::final bytes:'[F]",
       annotation());
 }

 TEST_F(HtmlAnnotationTest, FlushDoesNotBreakScriptTag) {
   annotation_.set_annotate_flush(true);
   html_parse_.StartParse("http://test.com/blank_flush.html");
   html_parse_.ParseText("<script>");
   html_parse_.Flush();
   html_parse_.ParseText("a=b;");
   html_parse_.Flush();
   html_parse_.ParseText("c=d;");
   html_parse_.Flush();
   html_parse_.ParseText("</scr");
   html_parse_.Flush();
   html_parse_.ParseText("ipt><script>");
   html_parse_.Flush();
   html_parse_.ParseText("e=f;");
   html_parse_.Flush();
   html_parse_.ParseText("g=h;");
   // No explicit </script> but the lexer will help us close it.
   html_parse_.FinishParse();
   EXPECT_STREQ("[F][F][F][F] +script 'a=b;c=d;' -script(e)[F][F]"
                " +script 'e=f;g=h;' -script(u)[F]",  // "(u)" for unclosed.
                annotation());
 }

 TEST_F(HtmlAnnotationTest, FlushDoesNotBreakScriptTagWithComment) {
   SetupWriter();
   annotation_.set_annotate_flush(true);
   html_parse_.StartParse("http://test.com/blank_flush.html");
   html_parse_.ParseText("<script>");
   html_parse_.InsertComment("c1");
   html_parse_.Flush();
   html_parse_.ParseText("a=b;");
   html_parse_.Flush();
   html_parse_.ParseText("</script><script>");
   html_parse_.InsertComment("c2");
   html_parse_.Flush();
   html_parse_.ParseText("</script>");
   html_parse_.FinishParse();
   EXPECT_STREQ("[F][F] +script 'a=b;' -script(e)[F] +script -script(e)[F]",
                annotation());
   EXPECT_STREQ("<!--c1--><script>a=b;</script><!--c2--><script></script>",
                output_buffer_);
 }

 TEST_F(HtmlAnnotationTest, FlushDoesNotBreakStyleTag) {
   annotation_.set_annotate_flush(true);
   html_parse_.StartParse("http://test.com/blank_flush.html");
   html_parse_.ParseText("<style>");
   html_parse_.Flush();
   html_parse_.ParseText(".blue {color: ");
   html_parse_.Flush();
   html_parse_.ParseText("blue;}");
   html_parse_.Flush();
   html_parse_.ParseText("</style>");
   html_parse_.FinishParse();
   EXPECT_STREQ("[F][F][F] +style '.blue {color: blue;}' -style(e)[F]",
                annotation());
 }

 TEST_F(HtmlAnnotationTest, UnclosedScriptOnly) {
   SetupWriter();
   annotation_.set_annotate_flush(true);
   html_parse_.StartParse("http://test.com/blank_flush.html");
   html_parse_.ParseText("<script>");
   html_parse_.FinishParse();

   // Note that we will get an EndElement callback.  See -script(u) in annotation.
   // However we will not insert a </script> in the output, since there was none
   // in the input.
   EXPECT_STREQ("+script -script(u)[F]", annotation());
   EXPECT_STREQ("<script>", output_buffer_);
 }

 TEST_F(HtmlAnnotationTest, UnclosedScriptOnlyWithFlush) {
   SetupWriter();
   annotation_.set_annotate_flush(true);
   html_parse_.StartParse("http://test.com/blank_flush.html");
   html_parse_.ParseText("<script>");
   html_parse_.Flush();
   html_parse_.FinishParse();

   // Note that we will get an EndElement callback.  See -script(u) in annotation.
   // However we will not insert a </script> in the output, since there was none
   // in the input.
   EXPECT_STREQ("[F] +script -script(u)[F]", annotation());
   EXPECT_STREQ("<script>", output_buffer_);
 }

 TEST_F(HtmlAnnotationTest, NulInAttrName) {
   // Tests that we don't crash with an embedded NUL in an attribute name.
   SetupWriter();
   html_parse_.StartParse("http://test.com/nul_in_attr.html");
   html_parse_.ParseText("<img src");
   html_parse_.ParseText(StringPiece("\0", 1));
   html_parse_.ParseText("file:-1675375991 />");
   html_parse_.FinishParse();
 }

 TEST_F(HtmlParseTest, MakeName) {
   EXPECT_EQ(0, HtmlTestingPeer::symbol_table_size(&html_parse_));

   // Empty names are a corner case that we hope does not crash.  Note
   // that empty-string atoms are special-cased in the symbol table
   // and require no new allocated bytes.
   {
     HtmlName empty = html_parse_.MakeName("");
     EXPECT_EQ(0, HtmlTestingPeer::symbol_table_size(&html_parse_));
     EXPECT_EQ(HtmlName::kNotAKeyword, empty.keyword());
     EXPECT_EQ("", empty.value());
   }

   // When we make a name using its enum, there should be no symbol table growth.
   HtmlName body_symbol = html_parse_.MakeName(HtmlName::kBody);
   EXPECT_EQ(0, HtmlTestingPeer::symbol_table_size(&html_parse_));
   EXPECT_EQ(HtmlName::kBody, body_symbol.keyword());

   // When we make a name using the canonical form (all-lower-case) there
   // should still be no symbol table growth.
   HtmlName body_canonical = html_parse_.MakeName("body");
   EXPECT_EQ(0, HtmlTestingPeer::symbol_table_size(&html_parse_));
   EXPECT_EQ(HtmlName::kBody, body_canonical.keyword());

   // But when we introduce a new capitalization, we want to retain the
   // case, even though we do html keyword matching.  We will have to
   // store the new form in the symbol table so we'll be allocating
   // some bytes, including the nul terminator.
   HtmlName body_new_capitalization = html_parse_.MakeName("Body");
   EXPECT_EQ(4, HtmlTestingPeer::symbol_table_size(&html_parse_));
   EXPECT_EQ(HtmlName::kBody, body_new_capitalization.keyword());

   // Make a name out of something that is not a keyword.
   // This should also increase the symbol-table size.
   HtmlName non_keyword = html_parse_.MakeName("hiybbprqag");
   EXPECT_EQ(14, HtmlTestingPeer::symbol_table_size(&html_parse_));
   EXPECT_EQ(HtmlName::kNotAKeyword, non_keyword.keyword());

   // Empty names are a corner case that we hope does not crash.  Note
   // that empty-string atoms are special-cased in the symbol table
   // and require no new allocated bytes.
   {
     HtmlName empty = html_parse_.MakeName("");
     EXPECT_EQ(14, HtmlTestingPeer::symbol_table_size(&html_parse_));
     EXPECT_EQ(HtmlName::kNotAKeyword, empty.keyword());
     EXPECT_EQ("", empty.value());
   }
 }

 // bug 2508140 : <noscript> in <head>
 TEST_F(HtmlParseTestNoBody, NoscriptInHead) {
   // Some real websites (ex: google.com) have <noscript> in the <head> even
   // though this is technically illegal according to the HTML4 spec.
   // We should support the case in use.
   ValidateNoChanges("noscript_in_head",
       "<head><noscript><title>You don't have JS enabled :(</title>"
       "</noscript></head>");
 }

 TEST_F(HtmlParseTestNoBody, NoCaseFold) {
   // Case folding is off by default.  However, we don't keep the
   // closing-tag separate in the IR so we will always make that
   // match.
   ValidateExpected("no_case_fold",
                    "<DiV><Other xY='AbC' Href='dEf'>Hello</OTHER></diV>",
                    "<DiV><Other xY='AbC' Href='dEf'>Hello</Other></DiV>");
   // Despite the fact that we retain case, in our IR, and the cases did not
   // match between opening and closing tags, there should be no messages
   // warning about unmatched tags.
   EXPECT_EQ(0, message_handler_.TotalMessages());
 }

 TEST_F(HtmlParseTestNoBody, CaseFold) {
   SetupWriter();
   html_writer_filter_->set_case_fold(true);
   ValidateExpected("case_fold",
                    "<DiV><Other xY='AbC' Href='dEf'>Hello</OTHER></diV>",
                    "<div><other xy='AbC' href='dEf'>Hello</other></div>");
 }

 // Bool that is auto-initialized to false
 class Bool {
  public:
   Bool() : value_(false) {}
   Bool(bool value) : value_(value) {}  // Copy constructor // NOLINT
   const bool Test() const { return value_; }

  private:
   bool value_;
 };

 // Class simply keeps track of which handlers have been called.
 class HandlerCalledFilter : public HtmlFilter {
  public:
   HandlerCalledFilter() : enabled_value_(true) {}

   virtual void StartDocument() { called_start_document_ = true; }
   virtual void EndDocument() { called_end_document_ = true;}
   virtual void StartElement(HtmlElement* element) {
     called_start_element_ = true;
   }
   virtual void EndElement(HtmlElement* element) {
     called_end_element_ = true;
   }
   virtual void Cdata(HtmlCdataNode* cdata) { called_cdata_ = true; }
   virtual void Comment(HtmlCommentNode* comment) { called_comment_ = true; }
   virtual void IEDirective(HtmlIEDirectiveNode* directive) {
     called_ie_directive_ = true;
   }
   virtual void Characters(HtmlCharactersNode* characters) {
     called_characters_ = true;
   }
   virtual void Directive(HtmlDirectiveNode* directive) {
     called_directive_ = true;
   }
   virtual void Flush() { called_flush_ = true; }

   virtual void DetermineEnabled(GoogleString* disabled_reason) {
     set_is_enabled(enabled_value_);
   }

   virtual bool CanModifyUrls() { return false; }

   void SetEnabled(bool enabled_value) {
     enabled_value_  = enabled_value;
   }
   virtual const char* Name() const { return "HandlerCalled"; }

   Bool called_start_document_;
   Bool called_end_document_;
   Bool called_start_element_;
   Bool called_end_element_;
   Bool called_cdata_;
   Bool called_comment_;
   Bool called_ie_directive_;
   Bool called_characters_;
   Bool called_directive_;
   Bool called_flush_;

  private:
   bool enabled_value_;

   DISALLOW_COPY_AND_ASSIGN(HandlerCalledFilter);
 };

 class HandlerCalledTest : public HtmlParseTest {
  protected:
   HandlerCalledTest() {
     html_parse_.AddFilter(&handler_called_filter_);
     first_event_listener_ = new HandlerCalledFilter();
     second_event_listener_ = new HandlerCalledFilter();
     html_parse_.add_event_listener(first_event_listener_);
     html_parse_.add_event_listener(second_event_listener_);
   }

   HandlerCalledFilter handler_called_filter_;
   HandlerCalledFilter* first_event_listener_;
   HandlerCalledFilter* second_event_listener_;

  private:
   DISALLOW_COPY_AND_ASSIGN(HandlerCalledTest);
 };

 // Check that StartDocument and EndDocument were called for filters.
 TEST_F(HandlerCalledTest, StartEndDocumentCalled) {
   Parse("start_end_document_called", "");
   EXPECT_TRUE(handler_called_filter_.called_start_document_.Test());
   EXPECT_TRUE(handler_called_filter_.called_end_document_.Test());
   EXPECT_TRUE(first_event_listener_->called_start_document_.Test());
   EXPECT_TRUE(first_event_listener_->called_end_document_.Test());
   EXPECT_TRUE(second_event_listener_->called_start_document_.Test());
   EXPECT_TRUE(second_event_listener_->called_end_document_.Test());
 }

 // Check that StartDocument and EndDocument were called for filters.
 TEST_F(HandlerCalledTest, StartEndDocumentWithFilterDisabled) {
   handler_called_filter_.SetEnabled(false);
   Parse("start_end_document_called", "");
   EXPECT_FALSE(handler_called_filter_.called_start_document_.Test());
   EXPECT_FALSE(handler_called_filter_.called_end_document_.Test());
   EXPECT_TRUE(first_event_listener_->called_start_document_.Test());
   EXPECT_TRUE(first_event_listener_->called_end_document_.Test());
   EXPECT_TRUE(second_event_listener_->called_start_document_.Test());
   EXPECT_TRUE(second_event_listener_->called_end_document_.Test());

   handler_called_filter_.SetEnabled(true);
   Parse("start_end_document_called", "");
   EXPECT_TRUE(handler_called_filter_.called_start_document_.Test());
   EXPECT_TRUE(handler_called_filter_.called_end_document_.Test());
   EXPECT_TRUE(first_event_listener_->called_start_document_.Test());
   EXPECT_TRUE(first_event_listener_->called_end_document_.Test());
   EXPECT_TRUE(second_event_listener_->called_start_document_.Test());
   EXPECT_TRUE(second_event_listener_->called_end_document_.Test());
 }

 TEST_F(HandlerCalledTest, StartEndElementCalled) {
   Parse("start_end_element_called", "<p>...</p>");
   EXPECT_TRUE(handler_called_filter_.called_start_element_.Test());
   EXPECT_TRUE(handler_called_filter_.called_end_element_.Test());
   EXPECT_TRUE(first_event_listener_->called_start_element_.Test());
   EXPECT_TRUE(first_event_listener_->called_end_element_.Test());
   EXPECT_TRUE(second_event_listener_->called_start_element_.Test());
   EXPECT_TRUE(second_event_listener_->called_end_element_.Test());
 }

 TEST_F(HandlerCalledTest, CdataCalled) {
   Parse("cdata_called", "<![CDATA[...]]>");
   // Looks like a directive, but isn't.
   EXPECT_FALSE(handler_called_filter_.called_directive_.Test());
   EXPECT_TRUE(handler_called_filter_.called_cdata_.Test());
   EXPECT_FALSE(first_event_listener_->called_directive_.Test());
   EXPECT_TRUE(first_event_listener_->called_cdata_.Test());
   EXPECT_FALSE(second_event_listener_->called_directive_.Test());
   EXPECT_TRUE(second_event_listener_->called_cdata_.Test());
 }

 TEST_F(HandlerCalledTest, CommentCalled) {
   Parse("comment_called", "<!--...-->");
   EXPECT_TRUE(handler_called_filter_.called_comment_.Test());
   EXPECT_TRUE(first_event_listener_->called_comment_.Test());
   EXPECT_TRUE(second_event_listener_->called_comment_.Test());
 }

 TEST_F(HandlerCalledTest, IEDirectiveCalled1) {
   Parse("ie_directive_called", "<!--[if IE]>...<![endif]-->");
   // Looks like a comment, but isn't.
   EXPECT_FALSE(handler_called_filter_.called_comment_.Test());
   EXPECT_TRUE(handler_called_filter_.called_ie_directive_.Test());
   EXPECT_FALSE(first_event_listener_->called_comment_.Test());
   EXPECT_TRUE(first_event_listener_->called_ie_directive_.Test());
   EXPECT_FALSE(second_event_listener_->called_comment_.Test());
   EXPECT_TRUE(second_event_listener_->called_ie_directive_.Test());
 }

 TEST_F(HandlerCalledTest, IEDirectiveCalled2) {
   // See http://code.google.com/p/modpagespeed/issues/detail?id=136 and
   // http://msdn.microsoft.com/en-us/library/ms537512(VS.85).aspx#dlrevealed
   Parse("ie_directive_called", "<!--[if lte IE 8]>...<![endif]-->");
   EXPECT_FALSE(handler_called_filter_.called_comment_.Test());
   EXPECT_TRUE(handler_called_filter_.called_ie_directive_.Test());
   EXPECT_FALSE(first_event_listener_->called_comment_.Test());
   EXPECT_TRUE(first_event_listener_->called_ie_directive_.Test());
   EXPECT_FALSE(second_event_listener_->called_comment_.Test());
   EXPECT_TRUE(second_event_listener_->called_ie_directive_.Test());
 }

 TEST_F(HandlerCalledTest, IEDirectiveCalled3) {
   Parse("ie_directive_called", "<!--[if false]>...<![endif]-->");
   EXPECT_FALSE(handler_called_filter_.called_comment_.Test());
   EXPECT_TRUE(handler_called_filter_.called_ie_directive_.Test());
   EXPECT_FALSE(first_event_listener_->called_comment_.Test());
   EXPECT_TRUE(first_event_listener_->called_ie_directive_.Test());
   EXPECT_FALSE(second_event_listener_->called_comment_.Test());
   EXPECT_TRUE(second_event_listener_->called_ie_directive_.Test());
 }

 // Downlevel-revealed commments normally look like <![if foo]>...<![endif]>.
 // However, although most (non-IE) browsers will ignore those, they're
 // technically not valid, so some sites use the below trick (which is valid
 // HTML, and still works for IE).  For an explanation, see
 // http://en.wikipedia.org/wiki/Conditional_comment#
 // Downlevel-revealed_conditional_comment
 TEST_F(HandlerCalledTest, IEDirectiveCalledRevealedOpen) {
   Parse("ie_directive_called", "<!--[if !IE]><!-->");
   EXPECT_FALSE(handler_called_filter_.called_comment_.Test());
   EXPECT_TRUE(handler_called_filter_.called_ie_directive_.Test());
   EXPECT_FALSE(first_event_listener_->called_comment_.Test());
   EXPECT_TRUE(first_event_listener_->called_ie_directive_.Test());
   EXPECT_FALSE(second_event_listener_->called_comment_.Test());
   EXPECT_TRUE(second_event_listener_->called_ie_directive_.Test());
 }
 TEST_F(HandlerCalledTest, IEDirectiveCalledRevealedClose) {
   Parse("ie_directive_called", "<!--<![endif]-->");
   EXPECT_FALSE(handler_called_filter_.called_comment_.Test());
   EXPECT_TRUE(handler_called_filter_.called_ie_directive_.Test());
   EXPECT_FALSE(first_event_listener_->called_comment_.Test());
   EXPECT_TRUE(first_event_listener_->called_ie_directive_.Test());
   EXPECT_FALSE(second_event_listener_->called_comment_.Test());
   EXPECT_TRUE(second_event_listener_->called_ie_directive_.Test());
 }

 // Unit tests for event-list manipulation.  In these tests, we do not parse
 // HTML input text, but instead create two 'Characters' nodes and use the
 // event-list manipulation methods and make sure they render as expected.
 class EventListManipulationTest : public HtmlParseTest {
  protected:
   EventListManipulationTest() { }

   virtual void SetUp() {
     HtmlParseTest::SetUp();
     static const char kUrl[] = "http://html.parse.test/event_list_test.html";
     ASSERT_TRUE(html_parse_.StartParse(kUrl));
     node1_ = html_parse_.NewCharactersNode(NULL, "1");
     HtmlTestingPeer::AddEvent(&html_parse_,
                               new HtmlCharactersEvent(node1_, -1));
     node2_ = html_parse_.NewCharactersNode(NULL, "2");
     node3_ = html_parse_.NewCharactersNode(NULL, "3");
     // Note: the last 2 are not added in SetUp.
   }

   virtual void TearDown() {
     html_parse_.FinishParse();
     HtmlParseTest::TearDown();
   }

   void CheckExpected(const GoogleString& expected) {
     SetupWriter();
     html_parse()->ApplyFilter(html_writer_filter_.get());
     EXPECT_EQ(expected, output_buffer_);
   }

   HtmlCharactersNode* node1_;
   HtmlCharactersNode* node2_;
   HtmlCharactersNode* node3_;

  private:
   DISALLOW_COPY_AND_ASSIGN(EventListManipulationTest);
 };

 TEST_F(EventListManipulationTest, TestReplace) {
   EXPECT_TRUE(html_parse_.ReplaceNode(node1_, node2_));
   CheckExpected("2");
 }

 TEST_F(EventListManipulationTest, TestInsertNodeBeforeNode) {
   HtmlTestingPeer::set_coalesce_characters(&html_parse_, false);
   html_parse_.InsertNodeBeforeNode(node1_, node2_);
   CheckExpected("21");
   html_parse_.InsertNodeBeforeNode(node1_, node3_);
   CheckExpected("231");
 }

 TEST_F(EventListManipulationTest, TestInsertNodeAfterNode) {
   HtmlTestingPeer::set_coalesce_characters(&html_parse_, false);
   html_parse_.InsertNodeAfterNode(node1_, node2_);
   CheckExpected("12");
   html_parse_.InsertNodeAfterNode(node1_, node3_);
   CheckExpected("132");
 }

 TEST_F(EventListManipulationTest, TestInsertNodeBeforeCurrent) {
   HtmlTestingPeer::set_coalesce_characters(&html_parse_, false);
   html_parse_.InsertNodeBeforeCurrent(node2_);
   // Current is left at queue_.end() after the AddEvent.
   CheckExpected("12");

   HtmlTestingPeer::SetCurrent(&html_parse_, node1_);
   html_parse_.InsertNodeBeforeCurrent(node3_);
   CheckExpected("312");
 }

 TEST_F(EventListManipulationTest, TestInsertNodeAfterCurrent) {
   HtmlTestingPeer::set_coalesce_characters(&html_parse_, false);
   HtmlTestingPeer::SetCurrent(&html_parse_, node1_);
   html_parse_.InsertNodeAfterCurrent(node2_);
   // Note that if we call CheckExpected here it will mutate current_.
   html_parse_.InsertNodeAfterCurrent(node3_);
   CheckExpected("123");
 }

 TEST_F(EventListManipulationTest, TestDeleteOnly) {
   html_parse_.DeleteNode(node1_);
   CheckExpected("");
 }

 TEST_F(EventListManipulationTest, TestDeleteFirst) {
   HtmlTestingPeer::set_coalesce_characters(&html_parse_, false);
   HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node2_, -1));
   HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node3_, -1));
   html_parse_.DeleteNode(node1_);
   CheckExpected("23");
   html_parse_.DeleteNode(node2_);
   CheckExpected("3");
   html_parse_.DeleteNode(node3_);
   CheckExpected("");
 }

 TEST_F(EventListManipulationTest, TestDeleteLast) {
   HtmlTestingPeer::set_coalesce_characters(&html_parse_, false);
   HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node2_, -1));
   HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node3_, -1));
   html_parse_.DeleteNode(node3_);
   CheckExpected("12");
   html_parse_.DeleteNode(node2_);
   CheckExpected("1");
   html_parse_.DeleteNode(node1_);
   CheckExpected("");
 }

 TEST_F(EventListManipulationTest, TestDeleteMiddle) {
   HtmlTestingPeer::set_coalesce_characters(&html_parse_, false);
   HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node2_, -1));
   HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node3_, -1));
   html_parse_.DeleteNode(node2_);
   CheckExpected("13");
 }

 // Note that an unconditionally sanity check runs after every
 // filter, verifying that all the parent-pointers are correct.
 // CheckExpected applies the HtmlWriterFilter, so it runs the
 // parent-pointer check.
 TEST_F(EventListManipulationTest, TestAddParentToSequence) {
   HtmlTestingPeer::set_coalesce_characters(&html_parse_, false);
   HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node2_, -1));
   HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node3_, -1));
   HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv);
   EXPECT_TRUE(html_parse_.AddParentToSequence(node1_, node3_, div));
   CheckExpected("<div>123</div>");

   // Now interpose a span between the div and the Characters nodes.
   HtmlElement* span = html_parse_.NewElement(div, HtmlName::kSpan);
   EXPECT_TRUE(html_parse_.AddParentToSequence(node1_, node2_, span));
   CheckExpected("<div><span>12</span>3</div>");

   // Next, add an HTML block above the div.  Note that we pass 'div' in as
   // both 'first' and 'last'.
   HtmlElement* html = html_parse_.NewElement(NULL, HtmlName::kHtml);
   EXPECT_TRUE(html_parse_.AddParentToSequence(div, div, html));
   CheckExpected("<html><div><span>12</span>3</div></html>");
 }

 TEST_F(EventListManipulationTest, TestPrependChild) {
   HtmlTestingPeer::set_coalesce_characters(&html_parse_, false);
   HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv);
   html_parse_.InsertNodeBeforeCurrent(div);
   CheckExpected("1<div></div>");

   html_parse_.PrependChild(div, node2_);
   CheckExpected("1<div>2</div>");
   html_parse_.PrependChild(div, node3_);
   CheckExpected("1<div>32</div>");

   // TODO(sligocki): Test with elements that don't explicitly end like image.
 }

 TEST_F(EventListManipulationTest, TestAppendChild) {
   HtmlTestingPeer::set_coalesce_characters(&html_parse_, false);
   HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv);
   html_parse_.InsertNodeBeforeCurrent(div);
   CheckExpected("1<div></div>");

   html_parse_.AppendChild(div, node2_);
   CheckExpected("1<div>2</div>");
   html_parse_.AppendChild(div, node3_);
   CheckExpected("1<div>23</div>");

   // TODO(sligocki): Test with elements that don't explicitly end like image.
 }

 TEST_F(EventListManipulationTest, TestAddParentToSequenceDifferentParents) {
   HtmlTestingPeer::set_coalesce_characters(&html_parse_, false);
   HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node2_, -1));
   HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv);
   EXPECT_TRUE(html_parse_.AddParentToSequence(node1_, node2_, div));
   CheckExpected("<div>12</div>");
   HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node3_, -1));
   CheckExpected("<div>12</div>3");
   EXPECT_FALSE(html_parse_.AddParentToSequence(node2_, node3_, div));
 }

 TEST_F(EventListManipulationTest, TestDeleteGroup) {
   HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node2_, -1));
   HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv);
   EXPECT_TRUE(html_parse_.AddParentToSequence(node1_, node2_, div));
   CheckExpected("<div>12</div>");
   html_parse_.DeleteNode(div);
   CheckExpected("");
 }

 TEST_F(EventListManipulationTest, TestMoveElementIntoParent1) {
   HtmlElement* head = html_parse_.NewElement(NULL, HtmlName::kHead);
   EXPECT_TRUE(html_parse_.AddParentToSequence(node1_, node1_, head));
   CheckExpected("<head>1</head>");
   HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node2_, -1));
   HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv);
   EXPECT_TRUE(html_parse_.AddParentToSequence(node2_, node2_, div));
   CheckExpected("<head>1</head><div>2</div>");
   HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node3_, -1));
   CheckExpected("<head>1</head><div>2</div>3");
   HtmlTestingPeer::SetCurrent(&html_parse_, div);
   EXPECT_TRUE(html_parse_.MoveCurrentInto(head));
   CheckExpected("<head>1<div>2</div></head>3");
 }

 TEST_F(EventListManipulationTest, TestMoveElementIntoParent2) {
   HtmlTestingPeer::set_coalesce_characters(&html_parse_, false);
   HtmlElement* head = html_parse_.NewElement(NULL, HtmlName::kHead);
   EXPECT_TRUE(html_parse_.AddParentToSequence(node1_, node1_, head));
   CheckExpected("<head>1</head>");
   HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node2_, -1));
   HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node3_, -1));
   CheckExpected("<head>1</head>23");
   HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv);
   EXPECT_TRUE(html_parse_.AddParentToSequence(node3_, node3_, div));
   CheckExpected("<head>1</head>2<div>3</div>");
   HtmlTestingPeer::SetCurrent(&html_parse_, div);
   EXPECT_TRUE(html_parse_.MoveCurrentInto(head));
   CheckExpected("<head>1<div>3</div></head>2");
   EXPECT_TRUE(html_parse_.DeleteSavingChildren(div));
   CheckExpected("<head>13</head>2");
   EXPECT_TRUE(html_parse_.DeleteSavingChildren(head));
   CheckExpected("132");
 }

 TEST_F(EventListManipulationTest, TestDeleteSavingChildrenEnd) {
   HtmlTestingPeer::set_coalesce_characters(&html_parse_, false);
   HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv);
   EXPECT_TRUE(html_parse_.AddParentToSequence(node1_, node1_, div));
   CheckExpected("<div>1</div>");
   EXPECT_TRUE(html_parse_.DeleteSavingChildren(div));
   CheckExpected("1");
 }

 TEST_F(EventListManipulationTest, TestMoveCurrentBefore) {
   // Setup events.
   HtmlTestingPeer::set_coalesce_characters(&html_parse_, false);
   HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node2_, -1));
   HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv);
   EXPECT_TRUE(html_parse_.AddParentToSequence(node1_, node2_, div));
   HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node3_, -1));
   CheckExpected("<div>12</div>3");
   HtmlTestingPeer::SetCurrent(&html_parse_, node3_);

   // Test MoveCurrentBefore().
   EXPECT_TRUE(html_parse_.MoveCurrentBefore(node2_));
   CheckExpected("<div>132</div>");

 #ifdef NDEBUG
   // Test that current_ pointing to end() does not crash in non-debug build.
   // In debug build, there is a LOG(DFATAL), so we cannot run this.
   // NOTE: We do not expect this case ever to happen in normal code.
   EXPECT_FALSE(html_parse_.MoveCurrentBefore(node2_));
   CheckExpected("<div>132</div>");
 #endif

   // Test that current_ pointing to a containing object will not work.
   HtmlElement* span = html_parse_.NewElement(NULL, HtmlName::kSpan);
   EXPECT_TRUE(html_parse_.AddParentToSequence(div, div, span));
   CheckExpected("<span><div>132</div></span>");
   HtmlTestingPeer::SetCurrent(&html_parse_, span);

   EXPECT_FALSE(html_parse_.MoveCurrentBefore(node2_));
   CheckExpected("<span><div>132</div></span>");
 }

 TEST_F(EventListManipulationTest, TestCoalesceOnAdd) {
   CheckExpected("1");
   HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node2_, -1));
   CheckExpected("12");

   // this will coalesce node1 and node2 togethers.  So there is only
   // one node1_="12", and node2_ is gone.  Deleting node1_ will now
   // leave us empty
   html_parse_.DeleteNode(node1_);
   CheckExpected("");
 }

 TEST_F(EventListManipulationTest, TestCoalesceOnDelete) {
   CheckExpected("1");
   HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv);
   html_parse_.AddElement(div, -1);
   HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node2_, -1));
   HtmlTestingPeer testing_peer;
   testing_peer.SetNodeParent(node2_, div);
   html_parse_.CloseElement(div, HtmlElement::EXPLICIT_CLOSE, -1);
   HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node3_, -1));
   CheckExpected("1<div>2</div>3");

   // Removing the div, leaving the children intact...
   EXPECT_TRUE(html_parse_.DeleteSavingChildren(div));
   CheckExpected("123");

   // At this point, node1, node2, and node3 are automatically coalesced.
   // This means when we remove node1, all the content will disappear.
   html_parse_.DeleteNode(node1_);
   CheckExpected("");
 }

 TEST_F(EventListManipulationTest, TestHasChildren) {
   CheckExpected("1");
   HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv);
   html_parse_.AddElement(div, -1);
   EXPECT_FALSE(html_parse_.HasChildrenInFlushWindow(div));
   HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node2_, -1));
   HtmlTestingPeer testing_peer;
   testing_peer.SetNodeParent(node2_, div);

   // Despite having added a new element into the stream, the div is not
   // closed yet, so it's not recognized as a child.
   EXPECT_FALSE(html_parse_.HasChildrenInFlushWindow(div));

   html_parse_.CloseElement(div, HtmlElement::EXPLICIT_CLOSE, -1);
   EXPECT_TRUE(html_parse_.HasChildrenInFlushWindow(div));
   EXPECT_TRUE(html_parse_.DeleteNode(node2_));
   EXPECT_FALSE(html_parse_.HasChildrenInFlushWindow(div));
 }

 TEST_F(EventListManipulationTest, AppendComment) {
   html_parse_.InsertComment("hello");
   CheckExpected("1<!--hello-->");
 }

 TEST_F(EventListManipulationTest, AppendCommentWithEscaping) {
   html_parse_.InsertComment("<i>hello</i> <!--world-->");
   CheckExpected("1<!--&lt;i&gt;hello&lt;/i&gt; &lt;!--world--&gt;-->");
 }

 TEST_F(EventListManipulationTest, CommentBeforeDiv1) {
   HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv);
   html_parse_.AddElement(div, -1);
   html_parse_.InsertComment("hello");
   html_parse_.CloseElement(div, HtmlElement::EXPLICIT_CLOSE, -1);
   CheckExpected("1<!--hello--><div></div>");
 }

 TEST_F(EventListManipulationTest, CommentBeforeDiv2) {
   HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv);
   html_parse_.InsertComment("hello");
   html_parse_.AddElement(div, -1);
   html_parse_.CloseElement(div, HtmlElement::EXPLICIT_CLOSE, -1);
   CheckExpected("1<!--hello--><div></div>");
 }

 TEST_F(EventListManipulationTest, CommentAfterDiv) {
   HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv);
   html_parse_.AddElement(div, -1);
   html_parse_.CloseElement(div, HtmlElement::EXPLICIT_CLOSE, -1);
   html_parse_.InsertComment("hello");
   CheckExpected("1<div></div><!--hello-->");
 }

 TEST_F(EventListManipulationTest, CommentAfterFirstDiv) {
   HtmlElement* div1 = html_parse_.NewElement(NULL, HtmlName::kDiv);
   html_parse_.AddElement(div1, -1);
   html_parse_.CloseElement(div1, HtmlElement::EXPLICIT_CLOSE, -1);
   HtmlElement* div2 = html_parse_.NewElement(NULL, HtmlName::kDiv);
   html_parse_.AddElement(div2, -1);
   html_parse_.CloseElement(div2, HtmlElement::EXPLICIT_CLOSE, -1);
   HtmlTestingPeer::SetCurrent(&html_parse_, div1);
   html_parse_.InsertComment("hello");
   CheckExpected("1<div></div><!--hello--><div></div>");
 }

 class InsertCommentOnFirstDivFilter : public EmptyHtmlFilter {
  public:
   InsertCommentOnFirstDivFilter(bool at_start, HtmlParse* parse)
       : html_parse_(parse),
         at_start_(at_start),
         first_(true) {
   }

   virtual void StartDocument() { first_ = true; }
   virtual void StartElement(HtmlElement* element) { Insert(true, element); }
   virtual void EndElement(HtmlElement* element) { Insert(false, element); }
   virtual const char* Name() const { return "InsertCommentOnFirstDivFilter"; }

  private:
   void Insert(bool at_start, HtmlElement* element) {
     if (first_ && (at_start == at_start_) &&
         (element->keyword() == HtmlName::kDiv)) {
       html_parse_->InsertComment("hello");
       first_ = false;
     }
   }


  private:
   HtmlParse* html_parse_;
   bool at_start_;
   bool first_;

   DISALLOW_COPY_AND_ASSIGN(InsertCommentOnFirstDivFilter);
 };

 TEST_F(HtmlParseTestNoBody, CommentInsideFirstDiv) {
   InsertCommentOnFirstDivFilter insert_at_first_div(true, &html_parse_);
   html_parse_.AddFilter(&insert_at_first_div);
   SetupWriter();
   ValidateExpected("comment_inside_first_div",
                    "1<div>2</div>3<div>4</div>5",
                    "1<!--hello--><div>2</div>3<div>4</div>5");
 }

 TEST_F(HtmlParseTestNoBody, CommentAfterFirstDiv) {
   InsertCommentOnFirstDivFilter insert_at_first_div(false, &html_parse_);
   html_parse_.AddFilter(&insert_at_first_div);
   SetupWriter();
   ValidateExpected("comment_inside_first_div",
                    "1<div>2</div>3<div>4</div>5",
                    "1<div>2</div><!--hello-->3<div>4</div>5");
 }

 TEST_F(HtmlParseTestNoBody, InsertCommentFromEmpty) {
   html_parse_.InsertComment("hello");
   SetupWriter();
   html_parse()->ApplyFilter(html_writer_filter_.get());
   EXPECT_EQ("<!--hello-->", output_buffer_);
 }

 TEST_F(HtmlParseTestNoBody, InsertCommentFromFlushInLargeCharactersBlock) {
   SetupWriter();
   html_parse_.StartParse("http://test.com/blank_flush.html");
   html_parse_.ParseText("<style>bytes:");
   // This should be inserted before <style>.
   EXPECT_TRUE(html_parse_.InsertComment("FLUSH1"));
   html_parse_.Flush();
   html_parse_.ParseText(":more:");
   html_parse_.Flush();
   html_parse_.ParseText(":still more:");
   // We are inside a literal block, so it's not safe to insert a comment here.
   // This should not show up in output_buffer_.
   EXPECT_FALSE(html_parse_.InsertComment("FLUSH2"));
   html_parse_.Flush();
   html_parse_.ParseText(":final bytes:</style>");
   EXPECT_TRUE(html_parse_.InsertComment("FLUSH3"));
   html_parse_.FinishParse();

   EXPECT_EQ("<!--FLUSH1--><style>bytes::more::still more::final bytes:</style>"
             "<!--FLUSH3-->",
             output_buffer_);
 }

 TEST_F(HtmlParseTestNoBody, InsertCommentFromFlushInEmptyCharactersBlock) {
   SetupWriter();
   html_parse_.StartParse("http://test.com/blank_flush.html");
   html_parse_.ParseText("<style>");
   // This should be inserted before <style>.
   EXPECT_TRUE(html_parse_.InsertComment("FLUSH1"));
   EXPECT_TRUE(html_parse_.InsertComment("FLUSH2"));
   html_parse_.Flush();
   html_parse_.ParseText("</style>");
   EXPECT_TRUE(html_parse_.InsertComment("FLUSH3"));
   html_parse_.FinishParse();

   EXPECT_EQ("<!--FLUSH1--><!--FLUSH2--><style></style><!--FLUSH3-->",
             output_buffer_);
 }

 // Unit tests for attribute manipulation.
 // Goal is to make sure we don't (eg) read deallocated storage
 // while manipulating attribute values.
 class AttributeManipulationTest : public HtmlParseTest {
  protected:
   AttributeManipulationTest() { }

   virtual void SetUp() {
     HtmlParseTest::SetUp();
     static const char kUrl[] =
         "http://html.parse.test/attribute_manipulation_test.html";
     ASSERT_TRUE(html_parse_.StartParse(kUrl));
     node_ = html_parse_.NewElement(NULL, HtmlName::kA);
     html_parse_.AddElement(node_, 0);
     html_parse_.AddAttribute(node_, HtmlName::kHref, "http://www.google.com/");
     node_->AddAttribute(html_parse_.MakeName(HtmlName::kId), "37",
                         HtmlElement::NO_QUOTE);
     node_->AddAttribute(html_parse_.MakeName(HtmlName::kClass), "search!",
                         HtmlElement::SINGLE_QUOTE);
     // Add a binary attribute (one without value).
     node_->AddAttribute(html_parse_.MakeName(HtmlName::kSelected), NULL,
                         HtmlElement::NO_QUOTE);
     html_parse_.CloseElement(node_, HtmlElement::BRIEF_CLOSE, 0);
   }

   virtual void TearDown() {
     html_parse_.FinishParse();
     HtmlParseTest::TearDown();
   }

   void CheckExpected(const GoogleString& expected) {
     SetupWriter();
     html_parse_.ApplyFilter(html_writer_filter_.get());
     EXPECT_EQ(expected, output_buffer_);
   }

   int NumAttributes(HtmlElement* element) {
     int size = 0;
     const HtmlElement::AttributeList& attrs = element->attributes();
     for (HtmlElement::AttributeConstIterator i(attrs.begin());
          i != attrs.end(); ++i) {
       ++size;
     }

     return size;
   }

   HtmlElement::Attribute* AttributeAt(HtmlElement* element, int index) {
     int pos = 0;
     HtmlElement::AttributeList* attrs = element->mutable_attributes();
     for (HtmlElement::AttributeIterator i(attrs->begin());
          i != attrs->end(); ++i) {
       if (pos == index) {
         return i.Get();
       }
       ++pos;
     }
     return NULL;
   }

   HtmlElement* node_;

  private:
   DISALLOW_COPY_AND_ASSIGN(AttributeManipulationTest);
 };

 TEST_F(AttributeManipulationTest, PropertiesAndDeserialize) {
   StringPiece google("http://www.google.com/");
   StringPiece number37("37");
   StringPiece search("search!");
   EXPECT_EQ(4, NumAttributes(node_));
   EXPECT_EQ(google, node_->AttributeValue(HtmlName::kHref));
   EXPECT_EQ(number37, node_->AttributeValue(HtmlName::kId));
   EXPECT_EQ(search, node_->AttributeValue(HtmlName::kClass));
   // Returns NULL for attributes that do not exist ...
   EXPECT_TRUE(NULL == node_->AttributeValue(HtmlName::kNotAKeyword));
   // ... and for attributes which have no value.
   EXPECT_TRUE(NULL == node_->AttributeValue(HtmlName::kSelected));
   // Returns NULL for attributes that do not exist.
   EXPECT_TRUE(NULL == node_->FindAttribute(HtmlName::kNotAKeyword));
   // Returns an attribute reference for attributes without values.
   HtmlElement::Attribute* selected = node_->FindAttribute(HtmlName::kSelected);
   EXPECT_TRUE(NULL != selected);
   EXPECT_TRUE(NULL == selected->DecodedValueOrNull());
   EXPECT_EQ(google, node_->AttributeValue(HtmlName::kHref));
   EXPECT_EQ(number37, node_->AttributeValue(HtmlName::kId));
   EXPECT_EQ(search, node_->AttributeValue(HtmlName::kClass));
   EXPECT_EQ(google, node_->FindAttribute(HtmlName::kHref)->escaped_value());
   EXPECT_EQ(number37, node_->FindAttribute(HtmlName::kId)->escaped_value());
   EXPECT_EQ(search, node_->FindAttribute(HtmlName::kClass)->escaped_value());
   CheckExpected("<a href=\"http://www.google.com/\" id=37 class='search!'"
                 " selected />");
 }

 TEST_F(AttributeManipulationTest, AddAttribute) {
   html_parse_.AddAttribute(node_, HtmlName::kLang, "ENG-US");
   CheckExpected("<a href=\"http://www.google.com/\" id=37 class='search!'"
                 " selected lang=\"ENG-US\"/>");
 }

 TEST_F(AttributeManipulationTest, DeleteAttribute) {
   node_->DeleteAttribute(HtmlName::kId);
   CheckExpected("<a href=\"http://www.google.com/\" class='search!'"
                 " selected />");
   node_->DeleteAttribute(HtmlName::kSelected);
   CheckExpected("<a href=\"http://www.google.com/\" class='search!'/>");
 }

 TEST_F(AttributeManipulationTest, ModifyAttribute) {
   HtmlElement::Attribute* href =
       node_->FindAttribute(HtmlName::kHref);
   EXPECT_TRUE(href != NULL);
   href->SetValue("google");
   href->set_quote_style(HtmlElement::SINGLE_QUOTE);
   html_parse_.SetAttributeName(href, HtmlName::kSrc);
   CheckExpected("<a src='google' id=37 class='search!' selected />");
 }

 TEST_F(AttributeManipulationTest, ModifyKeepAttribute) {
   HtmlElement::Attribute* href =
       node_->FindAttribute(HtmlName::kHref);
   EXPECT_TRUE(href != NULL);
   // This apparently do-nothing call to SetValue exposed an allocation bug.
   href->SetValue(href->DecodedValueOrNull());
   href->set_quote_style(href->quote_style());
   href->set_name(href->name());
   CheckExpected("<a href=\"http://www.google.com/\" id=37 class='search!'"
                 " selected />");
 }

 TEST_F(AttributeManipulationTest, BadUrl) {
   EXPECT_FALSE(html_parse_.StartParse(")(*&)(*&(*"));

   // To avoid having the TearDown crash, restart the parse.
   html_parse_.StartParse("http://www.example.com");
 }

 TEST_F(AttributeManipulationTest, CloneElement) {
   HtmlElement* clone = html_parse_.CloneElement(node_);

   // The clone is identical (but not the same object).
   EXPECT_NE(clone, node_);
   EXPECT_EQ(HtmlName::kA, clone->keyword());
   EXPECT_EQ(node_->style(), clone->style());
   EXPECT_EQ(4, NumAttributes(clone));
   EXPECT_EQ(HtmlName::kHref, AttributeAt(clone, 0)->keyword());
   EXPECT_STREQ("http://www.google.com/",
                AttributeAt(clone, 0)->DecodedValueOrNull());
   EXPECT_EQ(HtmlName::kId, AttributeAt(clone, 1)->keyword());
   EXPECT_STREQ("37", AttributeAt(clone, 1)->DecodedValueOrNull());
   EXPECT_EQ(HtmlName::kClass, AttributeAt(clone, 2)->keyword());
   EXPECT_STREQ("search!", AttributeAt(clone, 2)->DecodedValueOrNull());
   EXPECT_EQ(HtmlName::kSelected, AttributeAt(clone, 3)->keyword());
   EXPECT_EQ(NULL, AttributeAt(clone, 3)->DecodedValueOrNull());

   HtmlElement::Attribute* id = clone->FindAttribute(HtmlName::kId);
   ASSERT_TRUE(id != NULL);
   id->SetValue("38");

   // Clone is not added initially, and the original is not touched.
   CheckExpected("<a href=\"http://www.google.com/\" id=37 class='search!'"
                 " selected />");

   // Looks sane when added.
   html_parse_.InsertNodeBeforeNode(node_, clone);
   CheckExpected("<a href=\"http://www.google.com/\" id=38 class='search!'"
                 " selected />"
                 "<a href=\"http://www.google.com/\" id=37 class='search!'"
                 " selected />");
 }

 TEST_F(HtmlParseTest, NoDisabledFilter) {
   std::vector<GoogleString> disabled_filters;
   ASSERT_TRUE(disabled_filters.empty());

   html_parse_.SetDynamicallyDisabledFilterList(&disabled_filters);

   DisableTestFilter filter("not_disabled_filter", true, "Ignored reason");
   html_parse_.AddFilter(&filter);

   Parse("not_disabled_filter", "<!-- Empty body -->");

   EXPECT_TRUE(disabled_filters.empty());
 }

 TEST_F(HtmlParseTest, DisabledFilters) {
   std::vector<GoogleString> disabled_filters;
   ASSERT_TRUE(disabled_filters.empty());

   html_parse_.SetDynamicallyDisabledFilterList(&disabled_filters);

   DisableTestFilter filter1("not_disabled_filter1", true, "Ignored reason");
   html_parse_.AddFilter(&filter1);

   DisableTestFilter disabled_filter1("disabled_filter1", false, "");
   html_parse_.AddFilter(&disabled_filter1);

   DisableTestFilter filter2("not_disabled_filter2", true, "Ignored reason");
   html_parse_.AddFilter(&filter2);

   DisableTestFilter disabled_filter2("disabled_filter2", false, "");
   html_parse_.AddFilter(&disabled_filter2);

   DisableTestFilter filter3("not_disabled_filter3", true, "Ignored reason");
   html_parse_.AddFilter(&filter3);

   Parse("disabled_filter", "<!-- Empty body -->");

   EXPECT_THAT(disabled_filters,
               UnorderedElementsAre(disabled_filter1.ExpectedDisabledMessage(),
                                    disabled_filter2.ExpectedDisabledMessage()));
 }

 TEST_F(HtmlParseTest, DisabledFilterWithReason) {
   std::vector<GoogleString> disabled_filters;
   ASSERT_TRUE(disabled_filters.empty());
   html_parse_.SetDynamicallyDisabledFilterList(&disabled_filters);

   const GoogleString disabled_reason("Some reason");
   DisableTestFilter filter("disabled_filter_with_reason", false,
                            disabled_reason);
   html_parse_.AddFilter(&filter);

   Parse("disabled_filter_with_reason", "<!-- Empty body -->");

   EXPECT_THAT(disabled_filters,
               UnorderedElementsAre(filter.ExpectedDisabledMessage()));
 }

 class CountingCallbacksFilter : public EmptyHtmlFilter {
  public:
   CountingCallbacksFilter()
       : num_start_elements_(0),
         num_end_elements_(0),
         num_char_elements_(0) {
   }
   int num_start_elements() const { return num_start_elements_; }
   int num_end_elements() const { return num_end_elements_; }
   int num_char_elements() const { return num_char_elements_; }

  protected:
   virtual void StartDocument() {
     num_start_elements_ = 0;
     num_end_elements_ = 0;
     num_char_elements_ = 0;
   }

   virtual void StartElement(HtmlElement* element) {
     ++num_start_elements_;
   }

   virtual void EndElement(HtmlElement* element) {
     ++num_end_elements_;
   }

   virtual void Characters(HtmlCharactersNode* characters) {
     ++num_char_elements_;
   }

   virtual const char* Name() const { return "CountingCallbacksFilter"; }

  private:
   int num_start_elements_;
   int num_end_elements_;
   int num_char_elements_;

   DISALLOW_COPY_AND_ASSIGN(CountingCallbacksFilter);
 };

 // Checks that deleting nodes does not change the expected order of
 // HTML parse events. We delete any node of del_node_type_, but we
 // only delete it when we see a tag of type del_from_type_ (and
 // del_from_start_tag indicates whether we do it when we see the start
 // tag or the end tag of del_from_type). Can be configured to remove
 // nodes using DeleteSavingChildren, DeleteNode, or
 // MakeElementInvisible.
 class DeleteNodesFilter : public CountingCallbacksFilter {
  public:
   explicit DeleteNodesFilter(HtmlParse* html_parse)
       : html_parse_(html_parse),
         delete_node_type_(HtmlName::kNotAKeyword),
         delete_from_type_(HtmlName::kNotAKeyword),
         delete_on_open_tag_(false),
         save_children_(true),
         make_invisible_(false),
         num_deleted_elements_(0),
         flushes_preventing_delete_(0) {
   }

   void set_delete_node_type(HtmlName::Keyword keyword) {
     delete_node_type_ = keyword;
   }

   void set_save_children(bool x) { save_children_ = x; }
   void set_make_invisible(bool x) { make_invisible_ = x; }

   void set_delete_from_type(HtmlName::Keyword keyword) {
     delete_from_type_ = keyword;
   }

   void set_delete_on_open_tag(bool del_from_start) {
     delete_on_open_tag_ = del_from_start;
   }

   int num_deleted_elements() const { return num_deleted_elements_; }
   int flushes_preventing_delete() const { return flushes_preventing_delete_; }

  protected:
   virtual void StartDocument() {
     pending_deletes_.clear();
     num_deleted_elements_ = 0;
     flushes_preventing_delete_ = 0;
     // Note: we do not clear save_children_ or make_invisible_ here because
     // we re-use these settings when repeating tests with different flush
     // windows.
   }

   virtual void StartElement(HtmlElement* element) {
     CountingCallbacksFilter::StartElement(element);
     if (element->keyword() == delete_node_type_) {
       pending_deletes_.push_back(element);
     }
     if (delete_on_open_tag_ && element->keyword() == delete_from_type_) {
       DeleteElements();
     }
   }

   virtual void EndElement(HtmlElement* element) {
     CountingCallbacksFilter::EndElement(element);
     if (!delete_on_open_tag_ && element->keyword() == delete_from_type_) {
       DeleteElements();
     }
   }

   virtual void Flush() {
     // We can't delete an element that has been flushed.
     for (int i = 0, n = pending_deletes_.size(); i < n; ++i) {
       ++flushes_preventing_delete_;
     }
     pending_deletes_.clear();
   }

   virtual const char* Name() const { return "DeleteNodesFilter"; }

  private:
   void DeleteElements() {
     for (int i = 0, n = pending_deletes_.size(); i < n; ++i) {
       HtmlElement* element = pending_deletes_[i];
       bool success = make_invisible_
           ? html_parse_->MakeElementInvisible(element)
           : (save_children_
              ? html_parse_->DeleteSavingChildren(element)
              : html_parse_->DeleteNode(element));
       if (success) {
         ++num_deleted_elements_;
       }
     }
     pending_deletes_.clear();
   }

   HtmlParse* html_parse_;
   std::vector<HtmlElement*> pending_deletes_;
   HtmlName::Keyword delete_node_type_;
   HtmlName::Keyword delete_from_type_;
   bool delete_on_open_tag_;
   bool save_children_;
   bool make_invisible_;
   int num_deleted_elements_;
   int flushes_preventing_delete_;

   DISALLOW_COPY_AND_ASSIGN(DeleteNodesFilter);
 };

 class HtmlParseDeleteTest : public HtmlParseTest {
  protected:
   HtmlParseDeleteTest()
       : delete_filter_(html_parse()),
         total_successes_(0),
         total_failures_(0) {
     html_parse()->AddFilter(&delete_filter_);
     SetupWriter();
   }

   void DeleteTest(StringPiece input,
                   StringPiece expected_output_if_deletes_worked) {
     for (int i = 0, n = input.size(); i < n; ++i) {
       ParseWithFlush(input, i);
       if (delete_filter_.num_deleted_elements() != 0) {
         EXPECT_STREQ(expected_output_if_deletes_worked,
                      output_buffer_) << " flush " << i;
         ++total_successes_;
       } else {
         EXPECT_STREQ(input, output_buffer_) << " flush " << i;
         ++total_failures_;
       }
       output_buffer_.clear();
     }
   }

   DeleteNodesFilter delete_filter_;
   int total_successes_;
   int total_failures_;
 };

 TEST_F(HtmlParseDeleteTest, DeleteAtStartAcrossFlush) {
   delete_filter_.set_delete_on_open_tag(true);
   delete_filter_.set_save_children(false);
   delete_filter_.set_delete_node_type(HtmlName::kDiv);
   delete_filter_.set_delete_from_type(HtmlName::kDiv);
   const StringPiece kInput("1<div id=a>hello</div>2");
   DeleteTest(kInput, "12");

   // We can utilize the infrastructure in DeferCurrentNode to make it
   // possible to delete nodes from their StartElement even if their
   // EndElement is not in the flush window
   EXPECT_EQ(0, total_failures_);

   // If the both the StartElement and EndElement are visible, then
   // we should successfully eliminate the div and its contents.  That
   // will happen every time.
   EXPECT_EQ(kInput.size(), total_successes_);
 }

 TEST_F(HtmlParseDeleteTest, DeleteAtEndAcrossFlush) {
   delete_filter_.set_delete_on_open_tag(false);
   delete_filter_.set_save_children(false);
   delete_filter_.set_delete_node_type(HtmlName::kDiv);
   delete_filter_.set_delete_from_type(HtmlName::kDiv);
   DeleteTest("1<div id=a>hello</div>2", "12");

   // If the flush happened in the middle of the div, then we will
   // fail.  That will happen at least sometimes.
   EXPECT_LT(0, total_failures_);

   // If the both the StartElement and EndElement are visible, then
   // we should successfully eliminate the div and its contents.  That
   // will happen at least sometimes.
   EXPECT_LT(0, total_successes_);
 }

 TEST_F(HtmlParseDeleteTest, InvisibleAtStart) {
   delete_filter_.set_delete_on_open_tag(true);
   delete_filter_.set_make_invisible(true);
   delete_filter_.set_delete_node_type(HtmlName::kDiv);
   delete_filter_.set_delete_from_type(HtmlName::kDiv);
   const StringPiece kInput("1<div id=a>hello</div>2");
   DeleteTest(kInput, "1hello2");

   // It is always possible to make nodes invisible as long as their
   // StartElement has not been flushed.
   EXPECT_EQ(0, total_failures_);
   EXPECT_EQ(kInput.size(), total_successes_);
 }

 TEST_F(HtmlParseDeleteTest, InvisibleAtEnd) {
   delete_filter_.set_delete_on_open_tag(false);
   delete_filter_.set_make_invisible(true);
   delete_filter_.set_delete_node_type(HtmlName::kDiv);
   delete_filter_.set_delete_from_type(HtmlName::kDiv);
   DeleteTest("1<div id=a>hello</div>2", "1hello2");

   // If the flush happened in the middle of the div, then we will
   // fail.  That will happen at least sometimes.
   EXPECT_LT(0, total_failures_);

   // If the both the StartElement and EndElement are visible, then
   // we should successfully eliminate the div and its contents.  That
   // will happen at least sometimes.
   EXPECT_LT(0, total_successes_);
 }

 class EventListOrderTest : public HtmlParseTest {
  protected:
   EventListOrderTest()
       : delete_nodes_filter_(&html_parse_) {
     html_parse_.AddFilter(&delete_nodes_filter_);
   }

   virtual bool AddBody() const { return false; }
   virtual bool AddHtmlTags() const { return false; }

   DeleteNodesFilter delete_nodes_filter_;

  private:
   DISALLOW_COPY_AND_ASSIGN(EventListOrderTest);
 };

 TEST_F(EventListOrderTest, DeleteSavingChildrenCalledOnOpen) {
   delete_nodes_filter_.set_delete_on_open_tag(true);
   delete_nodes_filter_.set_delete_node_type(HtmlName::kDiv);
   delete_nodes_filter_.set_delete_from_type(HtmlName::kDiv);
   ValidateExpected("delete_saving_children_open",
                    "<div><p>1</p></div><span>2</span>",
                    "<p>1</p><span>2</span>");
   EXPECT_EQ(delete_nodes_filter_.num_start_elements(), 3);
   EXPECT_EQ(delete_nodes_filter_.num_end_elements(), 2);
   EXPECT_EQ(delete_nodes_filter_.num_char_elements(), 2);
   EXPECT_EQ(delete_nodes_filter_.num_deleted_elements(), 1);
 }

 TEST_F(EventListOrderTest, DeleteSavingChildrenCalledOnClose) {
   delete_nodes_filter_.set_delete_on_open_tag(false);
   delete_nodes_filter_.set_delete_node_type(HtmlName::kDiv);
   delete_nodes_filter_.set_delete_from_type(HtmlName::kDiv);
   ValidateExpected("delete_saving_children_close",
                    "<div><p>1</p></div><span>2</span>",
                    "<p>1</p><span>2</span>");
   EXPECT_EQ(delete_nodes_filter_.num_start_elements(), 3);
   EXPECT_EQ(delete_nodes_filter_.num_end_elements(), 3);
   EXPECT_EQ(delete_nodes_filter_.num_char_elements(), 2);
   EXPECT_EQ(delete_nodes_filter_.num_deleted_elements(), 1);
 }

 TEST_F(EventListOrderTest, DeleteSavingChildrenCalledInner) {
   delete_nodes_filter_.set_delete_on_open_tag(true);
   delete_nodes_filter_.set_delete_node_type(HtmlName::kDiv);
   delete_nodes_filter_.set_delete_from_type(HtmlName::kP);
   ValidateExpected("delete_saving_children_inner",
                    "<div><p>1</p></div><span>2</span>",
                    "<p>1</p><span>2</span>");
   EXPECT_EQ(delete_nodes_filter_.num_start_elements(), 3);
   EXPECT_EQ(delete_nodes_filter_.num_end_elements(), 2);
   EXPECT_EQ(delete_nodes_filter_.num_char_elements(), 2);
   EXPECT_EQ(delete_nodes_filter_.num_deleted_elements(), 1);
 }

 TEST_F(EventListOrderTest, DeleteSavingChildrenCalledOuter) {
   delete_nodes_filter_.set_delete_on_open_tag(true);
   delete_nodes_filter_.set_delete_node_type(HtmlName::kDiv);
   delete_nodes_filter_.set_delete_from_type(HtmlName::kSpan);
   ValidateExpected("delete_saving_children_outer",
                    "<div><p>1</p></div><span>2</span>",
                    "<p>1</p><span>2</span>");
   EXPECT_EQ(delete_nodes_filter_.num_start_elements(), 3);
   EXPECT_EQ(delete_nodes_filter_.num_end_elements(), 3);
   EXPECT_EQ(delete_nodes_filter_.num_char_elements(), 2);
   EXPECT_EQ(delete_nodes_filter_.num_deleted_elements(), 1);
 }

 TEST_F(EventListOrderTest, DeleteSavingChildrenCalledInnerMiddle) {
   delete_nodes_filter_.set_delete_on_open_tag(false);
   delete_nodes_filter_.set_delete_node_type(HtmlName::kDiv);
   delete_nodes_filter_.set_delete_from_type(HtmlName::kP);
   ValidateExpected("delete_saving_children_inner_middle",
                    "<div><p>1</p>2<span>3</span></div><span>4</span>",
                    "<p>1</p>2<span>3</span><span>4</span>");
   EXPECT_EQ(delete_nodes_filter_.num_start_elements(), 4);
   EXPECT_EQ(delete_nodes_filter_.num_end_elements(), 3);
   EXPECT_EQ(delete_nodes_filter_.num_char_elements(), 4);
   EXPECT_EQ(delete_nodes_filter_.num_deleted_elements(), 1);
 }

 TEST_F(EventListOrderTest, DeleteSavingChildrenCalledInnerEnd) {
   delete_nodes_filter_.set_delete_on_open_tag(false);
   delete_nodes_filter_.set_delete_node_type(HtmlName::kDiv);
   delete_nodes_filter_.set_delete_from_type(HtmlName::kP);
   ValidateExpected("delete_saving_children_inner_end",
                    "<div><p>1</p></div><span>2</span>",
                    "<p>1</p><span>2</span>");
   EXPECT_EQ(delete_nodes_filter_.num_start_elements(), 3);
   EXPECT_EQ(delete_nodes_filter_.num_end_elements(), 2);
   EXPECT_EQ(delete_nodes_filter_.num_char_elements(), 2);
   EXPECT_EQ(delete_nodes_filter_.num_deleted_elements(), 1);
 }

 TEST_F(EventListOrderTest, DeleteSavingChildrenCalledInnerDeep) {
   delete_nodes_filter_.set_delete_on_open_tag(false);
   delete_nodes_filter_.set_delete_node_type(HtmlName::kDiv);
   delete_nodes_filter_.set_delete_from_type(HtmlName::kP);
   ValidateExpected("delete_saving_children_inner_deep",
                    "<div><a><p>1</p>2<span>3</span></a></div><span>4</span>",
                    "<a><p>1</p>2<span>3</span></a><span>4</span>");
   EXPECT_EQ(delete_nodes_filter_.num_start_elements(), 5);
   EXPECT_EQ(delete_nodes_filter_.num_end_elements(), 4);
   EXPECT_EQ(delete_nodes_filter_.num_char_elements(), 4);
   EXPECT_EQ(delete_nodes_filter_.num_deleted_elements(), 1);
 }

 TEST_F(EventListOrderTest, DeleteSavingChildrenCalledOuterDistant) {
   delete_nodes_filter_.set_delete_on_open_tag(false);
   delete_nodes_filter_.set_delete_node_type(HtmlName::kDiv);
   delete_nodes_filter_.set_delete_from_type(HtmlName::kA);
   ValidateExpected("delete_saving_children_outer_distant",
                    "<div><p>1</p></div><span>2</span><a>3</a>",
                    "<p>1</p><span>2</span><a>3</a>");
   EXPECT_EQ(delete_nodes_filter_.num_start_elements(), 4);
   EXPECT_EQ(delete_nodes_filter_.num_end_elements(), 4);
   EXPECT_EQ(delete_nodes_filter_.num_char_elements(), 3);
   EXPECT_EQ(delete_nodes_filter_.num_deleted_elements(), 1);
 }

 // Filter to remove nodes during parsing and restore them sometime later.
 class RestoreNodesFilter : public CountingCallbacksFilter {
  public:
   explicit RestoreNodesFilter(HtmlParse* html_parse)
       : html_parse_(html_parse),
         outstanding_deferred_elements_(0),
         num_deletes_(0),
         restore_on_open_(false) {
   }

   // Establishes the ID or text of an element to defer, and the ID of an
   // element to move after.
   void MoveOnStart(const char* id_or_text, const char* restore_point) {
     remove_map_[id_or_text] = restore_point;
   }

   void DeleteOnStart(const char* id_or_text) {
     delete_set_.insert(id_or_text);
   }

   void set_restore_on_open(bool restore) {
     restore_on_open_ = restore;
   }

   // Returns the number of nodes that have been deferred, but not yet restored.
   bool AllRestored() const { return restore_map_.empty(); }
   int outstanding_deferred_elements() const {
     return outstanding_deferred_elements_;
   }
   int num_deletes() const { return num_deletes_; }

  protected:
   virtual void StartDocument() {
     CountingCallbacksFilter::StartDocument();
     restore_map_.clear();
     outstanding_deferred_elements_ = 0;
     num_deletes_ = 0;
   }

   virtual void Characters(HtmlCharactersNode* node) {
     CountingCallbacksFilter::Characters(node);
     const GoogleString& text = node->contents();
     if (!MaybeRemoveNode(text, node) &&
         !MaybeDeleteNode(text, node)) {
       MaybeRestoreNode(text);
     }
   }

   virtual void StartElement(HtmlElement* element) {
     CountingCallbacksFilter::StartElement(element);
     const char* id = FindId(element);
     if (id != NULL) {
       if (!MaybeRemoveNode(id, element)) {
         MaybeDeleteNode(id, element);
       }
       if (restore_on_open_) {
         MaybeRestoreNode(id);
       }
     }
   }

   virtual void EndElement(HtmlElement* element) {
     CountingCallbacksFilter::EndElement(element);
     const char* id = FindId(element);
     if (id != NULL && !restore_on_open_) {
       MaybeRestoreNode(id);
     }
   }
   virtual const char* Name() const { return "RestoreNodesFilter"; }

  private:
   typedef std::map<GoogleString, HtmlNode*> RestoreMap;

   const char* FindId(HtmlElement* element) {
     const HtmlElement::Attribute* attr = element->FindAttribute("id");
     if (attr == NULL) {
       return NULL;
     }
     return attr->DecodedValueOrNull();
   }

   bool MaybeRemoveNode(const GoogleString& id, HtmlNode* node) {
     StringStringMap::iterator p = remove_map_.find(id);
     if (p != remove_map_.end()) {
       const GoogleString& restore_id = p->second;
       EXPECT_TRUE(restore_map_[restore_id] == NULL);
       restore_map_[restore_id] = node;
       html_parse_->DeferCurrentNode();
       if (dynamic_cast<HtmlElement*>(node) != NULL) {
         ++outstanding_deferred_elements_;
       }
       return true;
     }
     return false;
   }

   bool MaybeDeleteNode(const GoogleString& id, HtmlNode* node) {
     if (delete_set_.find(id) != delete_set_.end() &&
         html_parse_->DeleteNode(node)) {
       ++num_deletes_;
       return true;
     }
     return false;
   }

   void MaybeRestoreNode(const GoogleString& id) {
     RestoreMap::iterator p = restore_map_.find(id);
     if (p != restore_map_.end()) {
       HtmlNode* restore_node = p->second;
       html_parse_->RestoreDeferredNode(restore_node);
       restore_map_.erase(p);
       if (dynamic_cast<HtmlElement*>(restore_node) != NULL) {
         --outstanding_deferred_elements_;
       }
     }
   }

   HtmlParse* html_parse_;
   StringStringMap remove_map_;
   StringSet delete_set_;
   RestoreMap restore_map_;
   int outstanding_deferred_elements_;
   int num_deletes_;
   bool restore_on_open_;

   DISALLOW_COPY_AND_ASSIGN(RestoreNodesFilter);
 };

 class HtmlRestoreTest : public HtmlParseTest {
  protected:
   HtmlRestoreTest()
       : upstream_writer_filter_(&html_parse_),
         upstream_writer_(&upstream_buffer_),
         restore_nodes_filter_(&html_parse_),
         expect_restored_(true) {
     // We are interested in the effect on deferring nodes on (a) the
     // filter that does the deferring, (b) upstream filters and (c)
     // downstream filters.  Downstream is covered by the normal
     // HtmlWriterFilter filter that gets installed by SetupWriter.
     // But Upstream is interesting too, especially when a FLUSH occurs
     // while a deferred node is open.  Sowe simply install another
     // writer filter before the RestoreNodesFilter, which should see
     // the input unmodified.
     html_parse_.AddFilter(&upstream_writer_filter_);
     html_parse_.AddFilter(&pre_counts_filter_);
     upstream_writer_filter_.set_writer(&upstream_writer_);
     html_parse_.AddFilter(&restore_nodes_filter_);
     html_parse_.AddFilter(&post_counts_filter_);
   }

   virtual bool AddBody() const { return false; }
   virtual bool AddHtmlTags() const { return false; }

   // Runs a test like ValidateExpected, but puts one or two Flush
   // calls at aribtrary points in the text, covering all n^2 places
   // to put the two flushes.
   //
   // Don't call this with an especially large 'before', otherwise the
   // time taken will grow quadratically.  Calling this with 70 byte
   // inputs appears to be OK, taking <300ms to run even in a debug build.
   void RunTestsWithManyFlushWindows(StringPiece before,
                                     StringPiece expected) {
     SetupWriter();
     int before_size = before.size();
     for (int flush1 = 0; flush1 < before_size; ++flush1) {
       for (int flush2 = flush1; flush2 < before_size; ++flush2) {
         GoogleString this_id =
             StringPrintf("http://test.com/%d_%d", flush1, flush2);
         html_parse_.StartParse(this_id);
         if (flush1 != 0) {
           html_parse_.ParseText(before.substr(0, flush1));
         }
         if (flush2 != flush1) {
           html_parse_.Flush();
           html_parse_.ParseText(before.substr(flush1, flush2 - flush1));
         }
         if (flush2 != before_size) {
           html_parse_.Flush();
           html_parse_.ParseText(before.substr(flush2));
         }
         html_parse_.FinishParse();
         ASSERT_STREQ(expected, output_buffer_) << this_id;
         output_buffer_.clear();
         ASSERT_STREQ(before, upstream_buffer_) << this_id;
         upstream_buffer_.clear();

         // If we expect that everything that was removed was restored, then the
         // start/end/char-counts should all match before and during
         // the RestoreNodes filter.
         if (expect_restored_) {
           EXPECT_TRUE(restore_nodes_filter_.AllRestored()) << this_id;
           if (restore_nodes_filter_.num_deletes() == 0) {
             ASSERT_EQ(pre_counts_filter_.num_start_elements(),
                       restore_nodes_filter_.num_start_elements()) << this_id;
             ASSERT_EQ(pre_counts_filter_.num_end_elements(),
                       restore_nodes_filter_.num_end_elements()) << this_id;
             ASSERT_EQ(pre_counts_filter_.num_start_elements(),
                       post_counts_filter_.num_start_elements()) << this_id;
             ASSERT_EQ(pre_counts_filter_.num_end_elements(),
                       post_counts_filter_.num_end_elements()) << this_id;
           }
           ASSERT_EQ(pre_counts_filter_.num_char_elements(),
                     restore_nodes_filter_.num_char_elements()) << this_id;

           // We use ASSERT_GE here because some of the tests will result in
           // characters being coalesced on the defer or on the restore.
           ASSERT_GE(pre_counts_filter_.num_char_elements(),
                     post_counts_filter_.num_char_elements()) << this_id;

           // Of course, start and end element count must be balanced,
           // as long as all deferred nodes were restored.
           ASSERT_EQ(restore_nodes_filter_.num_start_elements(),
                     (restore_nodes_filter_.num_end_elements() +
                      restore_nodes_filter_.num_deletes())) << this_id;
         } else {
           // Otherwise there will be an extra Start tag for every
           // unrestored element.
           EXPECT_FALSE(restore_nodes_filter_.AllRestored()) << this_id;
           ASSERT_EQ(
               restore_nodes_filter_.num_start_elements(),
               (restore_nodes_filter_.num_end_elements() +
                restore_nodes_filter_.outstanding_deferred_elements()))
               << this_id;
         }

         // Note that only the restore_nodes_filter itself can have mismatched
         // start/end callback-counts.  Filters running before or after that one
         // see a balanced set of callbacks.
         ASSERT_EQ(pre_counts_filter_.num_start_elements(),
                   pre_counts_filter_.num_end_elements()) << this_id;
         ASSERT_EQ(post_counts_filter_.num_start_elements(),
                   post_counts_filter_.num_end_elements()) << this_id;
       }
     }
   }

   void TestTwoFilters(const char* src1, const char* dest1,
                       const char* src2, const char* dest2,
                       const char* node_to_delete,
                       StringPiece input,
                       StringPiece expected) {
     RestoreNodesFilter restore_nodes_filter2(&html_parse_);
     html_parse_.AddFilter(&restore_nodes_filter2);
     SetupWriter();
     restore_nodes_filter_.MoveOnStart(src1, dest1);
     if (node_to_delete != NULL) {
       restore_nodes_filter_.DeleteOnStart(node_to_delete);
     }
     restore_nodes_filter2.MoveOnStart(src2, dest2);
     RunTestsWithManyFlushWindows(input, expected);
   }

   HtmlWriterFilter upstream_writer_filter_;
   CountingCallbacksFilter pre_counts_filter_;
   StringWriter upstream_writer_;
   GoogleString upstream_buffer_;
   RestoreNodesFilter restore_nodes_filter_;
   CountingCallbacksFilter post_counts_filter_;
   bool expect_restored_;

  private:
   DISALLOW_COPY_AND_ASSIGN(HtmlRestoreTest);
 };

 TEST_F(HtmlRestoreTest, MoveAAfterB) {
   restore_nodes_filter_.MoveOnStart("a", "b");  // moves div 'a' after div 'b'
   RunTestsWithManyFlushWindows(
       ("0<div id=a>1<span>2</span>3</div>"
        "4<div id=b>5<span>6</span></div>7"),
       ("04<div id=b>5<span>6</span></div>"
        "<div id=a>1<span>2</span>3</div>7"));
 }

 TEST_F(HtmlRestoreTest, MoveAAfterBUnclosed) {
   restore_nodes_filter_.MoveOnStart("a", "b");  // moves div 'a' after div 'b'
   RunTestsWithManyFlushWindows(
       ("0<div id=a>1<span>2</span>3</div>"
        "4<div id=b>5<span>6</span>7"),  // b unclosed, but lexer auto-closes it.
       ("04<div id=b>5<span>6</span>7"
        "<div id=a>1<span>2</span>3</div>"));
 }

 TEST_F(HtmlRestoreTest, MoveAAfterNestedB) {
   restore_nodes_filter_.MoveOnStart("a", "b");  // moves div 'a' after div 'b'
   RunTestsWithManyFlushWindows(
       ("0<div id=a>1<span>2</span>3</div>"
        "4<div><div id=b>5<span>6</span></div>7</div>"),
       ("04<div><div id=b>5<span>6</span></div>"
        "<div id=a>1<span>2</span>3</div>7</div>"));
 }


 TEST_F(HtmlRestoreTest, MoveABAfterC) {
   restore_nodes_filter_.MoveOnStart("a", "c");
   restore_nodes_filter_.MoveOnStart("b", "a");
   RunTestsWithManyFlushWindows(
       "0<img id=a />1<img id=b />2<img id=c />3",
       "012<img id=c /><img id=a /><img id=b />3");
 }

 TEST_F(HtmlRestoreTest, MoveTextAfterDiv) {
   restore_nodes_filter_.MoveOnStart("start", "a");
   RunTestsWithManyFlushWindows(
       "start<div id=a></div>",
       "<div id=a></div>start");
 }

 TEST_F(HtmlRestoreTest, MoveDivAfterText) {
   restore_nodes_filter_.MoveOnStart("a", "hello");
   RunTestsWithManyFlushWindows(
       "<div id=a></div>hello",
       "hello<div id=a></div>");
 }

 TEST_F(HtmlRestoreTest, MoveTextfterText) {
   restore_nodes_filter_.MoveOnStart("one", "two");
   RunTestsWithManyFlushWindows("one<p>two", "<p>twoone");
 }

 TEST_F(HtmlRestoreTest, MoveStartWithEndNotVisibleAUnclosed) {
   message_handler_.AddPatternToSkipPrinting(
       "*Removed node <div id=a> (unclosed)*");
   SetupWriter();
   restore_nodes_filter_.MoveOnStart("a", "b");
   expect_restored_ = false;
   RunTestsWithManyFlushWindows("<div id=a>1<div id=b>2</div>", "");
   EXPECT_LT(0, message_handler_.MessagesOfType(kWarning));
 }

 TEST_F(HtmlRestoreTest, MoveDivWithMissingDestination) {
   message_handler_.AddPatternToSkipPrinting("*Removed node <div id=a></div>*");
   SetupWriter();
   restore_nodes_filter_.MoveOnStart("a", "b");
   expect_restored_ = false;
   RunTestsWithManyFlushWindows("<div id=a>1</div>", "");
   EXPECT_LT(0, message_handler_.MessagesOfType(kWarning));
 }

 TEST_F(HtmlRestoreTest, MoveCharsWithMissingDestination) {
   message_handler_.AddPatternToSkipPrinting(
       "*Removed node Characters text never replaced*");
   SetupWriter();
   restore_nodes_filter_.MoveOnStart("text", "no_such_destination");
   expect_restored_ = false;
   RunTestsWithManyFlushWindows("text", "");
   EXPECT_LT(0, message_handler_.MessagesOfType(kWarning));
 }

 TEST_F(HtmlRestoreTest, TwoDeleteAcrossFlush) {
   SetupWriter();
   restore_nodes_filter_.DeleteOnStart("a");
   RunTestsWithManyFlushWindows("1<div id=a></div>2", "12");
 }

 TEST_F(HtmlRestoreTest, RestoreOnOpenTag) {
   SetupWriter();
   restore_nodes_filter_.MoveOnStart("a", "b");
   restore_nodes_filter_.set_restore_on_open(true);
   RunTestsWithManyFlushWindows("<div id=a>abc</div><div id=b>def</div>",
                                "<div id=b><div id=a>abc</div>def</div>");
 }

 // This tests having two filters that each do deferrals.  The
 // interesting case is where the second filter in the chain defers a
 // node first, and then, before restoring first deferred node, another
 // filter defers a different node.
 TEST_F(HtmlRestoreTest, TwoDeferringFilters) {
   TestTwoFilters(
       "b", "c",
       "a", "d",
       NULL,  // Node to delete
       "<img id=a /><img id=b /><img id=c /><img id=d />",
       "<img id=c /><img id=b /><img id=d /><img id=a />");
 }

 TEST_F(HtmlRestoreTest, TwoDeferringFiltersWithDelete) {
   TestTwoFilters(
       "b", "c",  // In first filter, mov div b to after to div c.
       "a", "d",  // In second filter, move div a to after div d.
       "a",       // In first filter, delete node "a"
       "<img id=a /><img id=b /><img id=c /><img id=d />",
       "<img id=c /><img id=b /><img id=d />");
 }

 TEST_F(HtmlRestoreTest, TwoDeferringFiltersNestingOuterFirst) {
   TestTwoFilters(
       "a", "d",
       "b", "c",
       NULL,  // Node to delete
       "<div id=a><div id=b></div><div id=c></div></div><div id=d></div>",
       "<div id=d></div><div id=a><div id=c></div><div id=b></div></div>");
 }

 TEST_F(HtmlRestoreTest, TwoDeferringFiltersNestingOuterFirstWithDelete) {
   TestTwoFilters(
       "a", "d",
       "b", "c",
       "b",
       "<div id=a><div id=b></div><div id=c></div></div><div id=d></div>",
       "<div id=d></div><div id=a><div id=c></div></div>");
 }

 TEST_F(HtmlRestoreTest, TwoDeferringFiltersNestingInnerFirst) {
   TestTwoFilters(
       "b", "c",
       "a", "d",
       NULL,  // Node to delete
       "<div id=a><div id=b></div><div id=c></div></div><div id=d></div>",
       "<div id=d></div><div id=a><div id=c></div><div id=b></div></div>");
 }

 TEST_F(HtmlRestoreTest, TwoDeferringFiltersNestingInnerFirstWithDelete) {
   TestTwoFilters(
       "b", "c",
       "a", "d",
       "a",
       "<div id=a><div id=b></div><div id=c></div></div><div id=d></div>",
       "<div id=d></div>");
 }

 TEST_F(HtmlRestoreTest, DeferringAndDeletingFilters) {
   DeleteNodesFilter delete_nodes_filter(&html_parse_);
   RestoreNodesFilter restore_nodes_filter2(&html_parse_);
   html_parse_.AddFilter(&delete_nodes_filter);  // Upstream
   html_parse_.AddFilter(&restore_nodes_filter2);  // Downstream
   SetupWriter();
   // Don't do anything with restore_nodes_filter_
   delete_nodes_filter.set_delete_node_type(HtmlName::kSpan);
   delete_nodes_filter.set_delete_from_type(HtmlName::kDiv);
   delete_nodes_filter.set_delete_on_open_tag(true);
   restore_nodes_filter2.MoveOnStart("a", "d");
   const StringPiece kInput(
       "<span id=a><div id=b></div><div id=c></div></span><div id=d></div>");
   ValidateExpected("defer_and_delete", kInput,
                    "<div id=b></div><div id=c></div><div id=d></div>");
   EXPECT_EQ(1, delete_nodes_filter.num_deleted_elements());

   // With the same filter setup, put a flush in the middle.
   output_buffer_.clear();
   html_parse_.StartParse("http://test.com/with_flush");
   html_parse_.ParseText(kInput.substr(0, kInput.size() / 2));
   html_parse_.Flush();
   html_parse_.ParseText(kInput.substr(kInput.size() / 2));
   html_parse_.FinishParse();

   // Because of the flush, deleting the 'span' did not work.  However,
   // moving the span (and all its contents) after the 'd' did.
   //
   // TODO(jmarantz): consider making DeleteSavingChildren work even if
   // the EndElement is not yet parsed, in which case we can switch to
   // using RunTestsWithManyFlushWindows and expect the same results
   // regardless of when the flush occurs.
   EXPECT_STREQ(
       "<div id=d></div><span id=a><div id=b></div><div id=c></div></span>",
       output_buffer_);
   EXPECT_EQ(0, delete_nodes_filter.num_deleted_elements());
 }

 TEST_F(HtmlRestoreTest, DeleteDeferredNode) {
   DeleteNodesFilter delete_nodes_filter(&html_parse_);
   RestoreNodesFilter restore_nodes_filter2(&html_parse_);
   html_parse_.AddFilter(&delete_nodes_filter);  // Upstream
   html_parse_.AddFilter(&restore_nodes_filter2);  // Downstream
   SetupWriter();
   // Don't do anything with restore_nodes_filter_
   delete_nodes_filter.set_delete_node_type(HtmlName::kSpan);
   delete_nodes_filter.set_delete_from_type(HtmlName::kDiv);
   delete_nodes_filter.set_delete_on_open_tag(true);
   restore_nodes_filter2.MoveOnStart("a", "d");
   const StringPiece kInput("<span id=a></span><div id=d></div>");
   ValidateExpected("delete_deferred", kInput, "<div id=d></div>");
   EXPECT_EQ(1, delete_nodes_filter.num_deleted_elements());
   EXPECT_EQ(0, delete_nodes_filter.flushes_preventing_delete());

   // With the same filter setup, put a flush in the middle.
   output_buffer_.clear();
   html_parse_.StartParse("http://test.com/with_flush");
   html_parse_.ParseText(kInput.substr(0, kInput.size() / 2));
   html_parse_.Flush();
   html_parse_.ParseText(kInput.substr(kInput.size() / 2));
   html_parse_.FinishParse();

   // Because of the flush, deleting the 'span' did not work.  However,
   // moving the span after the 'd' did.
   //
   // TODO(jmarantz): consider making DeleteSavingChildren work even if
   // the EndElement is not yet parsed, in which case we can switch to
   // using RunTestsWithManyFlushWindows and expect the same results
   // regardless of when the flush occurs.
   EXPECT_STREQ("<div id=d></div><span id=a></span>", output_buffer_);
   EXPECT_EQ(0, delete_nodes_filter.num_deleted_elements());
   EXPECT_EQ(1, delete_nodes_filter.flushes_preventing_delete());
 }

 TEST_F(HtmlRestoreTest, CoalesceCharsAfterRestore) {
   restore_nodes_filter_.MoveOnStart("1", "a");
   SetupWriter();

   const StringPiece kInput("1<img id=a />2");
   int num_times_chars_are_coalesced = 0;
   int num_times_chars_are_not_coalesced = 0;
   for (int i = 0, n = kInput.size(); i < n; ++i) {
     ParseWithFlush(kInput, i);
     EXPECT_STREQ("<img id=a />12", output_buffer_) << i;

     // Before the deferral, we had two Characters nodes.
     EXPECT_EQ(2, pre_counts_filter_.num_char_elements()) << i;

     // The filter that does the deferring also sees two Characters nodes.
     EXPECT_EQ(2, restore_nodes_filter_.num_char_elements()) << i;

     // After the restore, the Characters nodes may be coalesced,
     // depending on the flush window.
     EXPECT_TRUE((post_counts_filter_.num_char_elements() == 1) ||
                 (post_counts_filter_.num_char_elements() == 2)) << i;
     if (post_counts_filter_.num_char_elements() == 1) {
       ++num_times_chars_are_coalesced;
     } else {
       ++num_times_chars_are_not_coalesced;
     }
   }
   EXPECT_LT(0, num_times_chars_are_coalesced);
   EXPECT_LT(0, num_times_chars_are_not_coalesced);
 }

 TEST_F(HtmlRestoreTest, CoalesceCharsOnDefer) {
   restore_nodes_filter_.MoveOnStart("a", "b");
   SetupWriter();

   const StringPiece kInput("1<img id=a />2<p id=b />");
   int num_times_chars_are_coalesced = 0;
   int num_times_chars_are_not_coalesced = 0;
   for (int i = 0, n = kInput.size(); i < n; ++i) {
     ParseWithFlush(kInput, i);
     EXPECT_STREQ(output_buffer_, "12<p id=b /><img id=a />");

     // Before the deferral, we had two Characters nodes.
     EXPECT_EQ(2, pre_counts_filter_.num_char_elements()) << i;

     // The filter that does the deferring also sees two Characters nodes.
     EXPECT_EQ(2, restore_nodes_filter_.num_char_elements()) << i;

     // After the restore, the Characters nodes may be coalesced,
     // depending on the flush window.

     // After the deferral, the Characters nodes may be coalesced,
     // depending on the flush window.
     EXPECT_TRUE((post_counts_filter_.num_char_elements() == 1) ||
                 (post_counts_filter_.num_char_elements() == 2)) << i;
     if (post_counts_filter_.num_char_elements() == 1) {
       ++num_times_chars_are_coalesced;
     } else {
       ++num_times_chars_are_not_coalesced;
     }
   }
   EXPECT_LT(0, num_times_chars_are_coalesced);
   EXPECT_LT(0, num_times_chars_are_not_coalesced);
 }

 // This test just shows that the lexer will, in the absense of Defer or
 // Delete calls, coalesce Characters nodes across Flush.  It does this
 // by being lazy and not emitting literals until it seems some HTML syntax.
 TEST_F(HtmlRestoreTest, CoalesceCharsAcrossFlush) {
   SetupWriter();

   const StringPiece kInput("12");
   for (int i = 0, n = kInput.size(); i < n; ++i) {
     ParseWithFlush(kInput, i);
     EXPECT_STREQ("12", output_buffer_) << i;
     EXPECT_EQ(1, pre_counts_filter_.num_char_elements()) << i;
   }
 }

 class InsertScriptsFilter : public EmptyHtmlFilter {
  public:
   explicit InsertScriptsFilter(HtmlParse* parse)
       : html_parse_(parse),
         at_start_(false),
         before_(false),
         external_(false) {
   }

   void set_insert_before(bool before) { before_ = before; }
   void set_at_start(bool at_start) { at_start_ = at_start; }
   void set_external(bool external) { external_ = external; }

  protected:
   virtual void StartElement(HtmlElement* element) { Insert(true, element); }
   virtual void EndElement(HtmlElement* element) { Insert(false, element); }
   virtual const char* Name() const { return "InsertScriptsFilter"; }

  private:
   void Insert(bool at_start, HtmlElement* element) {
     if (element->keyword() == HtmlName::kHead) {
       if (at_start == at_start_) {
         if (before_) {
           html_parse_->InsertScriptBeforeCurrent("inserted", external_);
         } else {
           html_parse_->InsertScriptAfterCurrent("inserted", external_);
         }
       }
     }
   }


  private:
   HtmlParse* html_parse_;
   bool at_start_;
   bool before_;
   bool external_;

   DISALLOW_COPY_AND_ASSIGN(InsertScriptsFilter);
 };

 TEST_F(HtmlParseTestNoBody, InsertInlineScriptAfterStartOfHead) {
   InsertScriptsFilter insert_scripts(&html_parse_);
   insert_scripts.set_insert_before(false);
   insert_scripts.set_at_start(true);
   insert_scripts.set_external(false);
   html_parse_.AddFilter(&insert_scripts);
   SetupWriter();
   ValidateExpected("1",
                    "<head>text</head>",
                    "<head><script>inserted</script>text</head>");
 }

 TEST_F(HtmlParseTestNoBody, InsertInlineScriptBeforeEndOfHead) {
   InsertScriptsFilter insert_scripts(&html_parse_);
   insert_scripts.set_insert_before(true);
   insert_scripts.set_at_start(false);
   insert_scripts.set_external(false);
   html_parse_.AddFilter(&insert_scripts);
   SetupWriter();
   ValidateExpected("1",
                    "<head>text</head>",
                    "<head>text<script>inserted</script></head>");
 }

 TEST_F(HtmlParseTestNoBody, InsertInlineScriptBeforeStartOfHead) {
   InsertScriptsFilter insert_scripts(&html_parse_);
   insert_scripts.set_insert_before(true);
   insert_scripts.set_at_start(true);
   insert_scripts.set_external(false);
   html_parse_.AddFilter(&insert_scripts);
   SetupWriter();
   ValidateExpected("1",
                    "<head>text</head>",
                    "<script>inserted</script><head>text</head>");
 }

 TEST_F(HtmlParseTestNoBody, InsertInlineScriptAfterEndOfHead) {
   InsertScriptsFilter insert_scripts(&html_parse_);
   insert_scripts.set_insert_before(false);
   insert_scripts.set_at_start(false);
   insert_scripts.set_external(false);
   html_parse_.AddFilter(&insert_scripts);
   SetupWriter();
   ValidateExpected("1",
                    "<head>text</head>",
                    "<head>text</head><script>inserted</script>");
 }

 TEST_F(HtmlParseTestNoBody, InsertExternalScriptAfterStartOfHead) {
   InsertScriptsFilter insert_scripts(&html_parse_);
   insert_scripts.set_insert_before(false);
   insert_scripts.set_at_start(true);
   insert_scripts.set_external(true);
   html_parse_.AddFilter(&insert_scripts);
   SetupWriter();
   ValidateExpected("1",
                    "<head>text</head>",
                    "<head><script src=\"inserted\"></script>text</head>");
 }

 TEST_F(HtmlParseTestNoBody, InsertExternalScriptBeforeEndOfHead) {
   InsertScriptsFilter insert_scripts(&html_parse_);
   insert_scripts.set_insert_before(true);
   insert_scripts.set_at_start(false);
   insert_scripts.set_external(true);
   html_parse_.AddFilter(&insert_scripts);
   SetupWriter();
   ValidateExpected("1",
                    "<head>text</head>",
                    "<head>text<script src=\"inserted\"></script></head>");
 }

 TEST_F(HtmlParseTestNoBody, InsertExternalScriptBeforeStartOfHead) {
   InsertScriptsFilter insert_scripts(&html_parse_);
   insert_scripts.set_insert_before(true);
   insert_scripts.set_at_start(true);
   insert_scripts.set_external(true);
   html_parse_.AddFilter(&insert_scripts);
   SetupWriter();
   ValidateExpected("1",
                    "<head>text</head>",
                    "<script src=\"inserted\"></script><head>text</head>");
 }

 TEST_F(HtmlParseTestNoBody, InsertExternalScriptAfterEndOfHead) {
   InsertScriptsFilter insert_scripts(&html_parse_);
   insert_scripts.set_insert_before(false);
   insert_scripts.set_at_start(false);
   insert_scripts.set_external(true);
   html_parse_.AddFilter(&insert_scripts);
   SetupWriter();
   ValidateExpected("1",
                    "<head>text</head>",
                    "<head>text</head><script src=\"inserted\"></script>");
 }

 }  // namespace net_instaweb