| /* |
| * Copyright 2010 Google Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // Author: jmarantz@google.com (Joshua Marantz) |
| |
| // Unit-test the html reader/writer to ensure that a few tricky |
| // constructs come through without corruption. |
| |
| #include <vector> |
| |
| #include "pagespeed/kernel/base/basictypes.h" |
| #include "pagespeed/kernel/base/gtest.h" |
| #include "pagespeed/kernel/base/gmock.h" |
| #include "pagespeed/kernel/base/message_handler.h" |
| #include "pagespeed/kernel/base/mock_message_handler.h" |
| #include "pagespeed/kernel/base/scoped_ptr.h" |
| #include "pagespeed/kernel/base/string.h" |
| #include "pagespeed/kernel/base/string_util.h" |
| #include "pagespeed/kernel/base/string_writer.h" |
| #include "pagespeed/kernel/html/disable_test_filter.h" |
| #include "pagespeed/kernel/html/empty_html_filter.h" |
| #include "pagespeed/kernel/html/explicit_close_tag.h" |
| #include "pagespeed/kernel/html/html_element.h" |
| #include "pagespeed/kernel/html/html_event.h" |
| #include "pagespeed/kernel/html/html_filter.h" |
| #include "pagespeed/kernel/html/html_name.h" |
| #include "pagespeed/kernel/html/html_node.h" |
| #include "pagespeed/kernel/html/html_parse.h" |
| #include "pagespeed/kernel/html/html_parse_test_base.h" |
| #include "pagespeed/kernel/html/html_testing_peer.h" |
| #include "pagespeed/kernel/html/html_writer_filter.h" |
| |
| using testing::UnorderedElementsAre; |
| |
| namespace net_instaweb { |
| |
| class HtmlParseTest : public HtmlParseTestBase { |
| protected: |
| // Returns the contents wrapped in a Div. |
| GoogleString Div(const StringPiece& text) { |
| return StrCat("<div>", text, "</div>"); |
| } |
| |
| // For tag-pairs that auto-close, we expect the appearance |
| // of tag2 to automatically close tag1. |
| void ExpectAutoClose(const char* tag1, const char* tag2) { |
| GoogleString test_case = StrCat("auto_close_", tag1, "_", tag2); |
| ValidateExpected( |
| test_case, |
| Div(StrCat("<", tag1, ">x<", tag2, ">y")), |
| Div(StrCat("<", tag1, ">x</", tag1, "><", |
| StrCat(tag2, ">y</", tag2, ">")))); |
| } |
| |
| // For 2 tags that do not have a specified auto-close relationship, |
| // we expect the appearance of tag2 to nest inside tag1. |
| void ExpectNoAutoClose(const char* tag1, const char* tag2) { |
| GoogleString test_case = StrCat("no_auto_close_", tag1, "_", tag2); |
| ValidateExpected( |
| test_case, |
| Div(StrCat("<", tag1, ">x<", tag2, ">y")), |
| Div(StrCat("<", tag1, ">x<", tag2, ">y</", |
| StrCat(tag2, "></", tag1, ">")))); |
| } |
| |
| virtual bool AddBody() const { return true; } |
| |
| // Sends the input through the HtmlParse filter chain, flushing |
| // at flush_index. Leaves resulting output in output_buffer_. |
| void ParseWithFlush(StringPiece input, int flush_index) { |
| GoogleString this_id = StringPrintf("http://test.com/%d", flush_index); |
| output_buffer_.clear(); |
| html_parse_.StartParse(this_id); |
| html_parse_.ParseText(input.substr(0, flush_index)); |
| html_parse_.Flush(); |
| html_parse_.ParseText(input.substr(flush_index)); |
| html_parse_.FinishParse(); |
| } |
| }; |
| |
| class HtmlParseTestNoBody : public HtmlParseTestBase { |
| virtual bool AddBody() const { return false; } |
| }; |
| |
| TEST_F(HtmlParseTest, AvoidFalseXmlComment) { |
| ValidateNoChanges("avoid_false_xml_comment", |
| "<script type=\"text/javascript\">\n" |
| "// <!-- this looks like a comment but is not\n" |
| "</script>"); |
| } |
| |
| TEST_F(HtmlParseTest, RetainBogusEndTag) { |
| ValidateNoChanges("bogus_end_tag", |
| "<script language=\"JavaScript\" type=\"text/javascript\">\n" |
| "<!--\n" |
| "var s = \"</retain_bogus_end_tag>\";\n" |
| "// -->\n" |
| "</script>"); |
| } |
| |
| TEST_F(HtmlParseTest, AmpersandInHref) { |
| // Note that we will escape the "&" in the href. |
| ValidateNoChanges("ampersand_in_href", |
| "<a href=\"http://myhost.com/path?arg1=val1&arg2=val2\">Hello</a>"); |
| } |
| |
| TEST_F(HtmlParseTest, BooleanSpaceCloseInTag) { |
| ValidateExpected("bool_space_close", "<a b >foo</a>", "<a b>foo</a>"); |
| ValidateNoChanges("bool_close", "<a b>foo</a>"); |
| ValidateExpected("space_close_sq", "<a b='c' >foo</a>", "<a b='c'>foo</a>"); |
| ValidateExpected("space_close_dq", |
| "<a b=\"c\" >foo</a>", "<a b=\"c\">foo</a>"); |
| ValidateExpected("space_close_nq", "<a b=c >foo</a>", "<a b=c>foo</a>"); |
| // Distilled from http://www.gougou.com/ |
| // Unclear exactly what we should do here, maybe leave it as it was without |
| // the space? |
| ValidateExpected("allow_semicolon", |
| "<a onclick='return m(this)'; >foo</a>", |
| "<a onclick='return m(this)' ;>foo</a>"); |
| } |
| |
| TEST_F(HtmlParseTest, EmbeddedNuls) { |
| const char kHtml[] = "<script att\0r></script>"; |
| // Note: STATIC_STRLEN won't stop at embedded null. |
| ValidateNoChanges("inner_mess", GoogleString(kHtml, STATIC_STRLEN(kHtml))); |
| |
| const char kHtml2[] = "<script\0y></script>"; |
| // Note: STATIC_STRLEN won't stop at embedded null. |
| ValidateNoChanges("inner_mess2", |
| GoogleString(kHtml2, STATIC_STRLEN(kHtml2))); |
| } |
| |
| class AttrValuesSaverFilter : public EmptyHtmlFilter { |
| public: |
| AttrValuesSaverFilter() { } |
| |
| virtual void StartElement(HtmlElement* element) { |
| const HtmlElement::AttributeList& attrs = element->attributes(); |
| for (HtmlElement::AttributeConstIterator i(attrs.begin()); |
| i != attrs.end(); ++i) { |
| const char* value = i->DecodedValueOrNull(); |
| if (i->decoding_error()) { |
| value_ += "<ERROR>"; |
| } else if (value == NULL) { |
| value_ += "(null)"; |
| } else { |
| value_ += value; |
| } |
| } |
| } |
| |
| const GoogleString& value() { return value_; } |
| virtual const char* Name() const { return "attr_saver"; } |
| |
| private: |
| GoogleString value_; |
| |
| DISALLOW_COPY_AND_ASSIGN(AttrValuesSaverFilter); |
| }; |
| |
| TEST_F(HtmlParseTest, EscapedSingleQuote) { |
| AttrValuesSaverFilter attr_saver; |
| html_parse_.AddFilter(&attr_saver); |
| Parse("escaped_single_quote", |
| "<img src='my'single_quoted_image.jpg'/>"); |
| EXPECT_EQ("my'single_quoted_image.jpg", attr_saver.value()); |
| } |
| |
| TEST_F(HtmlParseTest, AttrDecodeError) { |
| AttrValuesSaverFilter attr_saver; |
| html_parse_.AddFilter(&attr_saver); |
| Parse("attr_not_decodable", "<img src='muñecos'/>"); |
| EXPECT_EQ("<ERROR>", attr_saver.value()); |
| } |
| |
| TEST_F(HtmlParseTest, UnclosedQuote) { |
| // In this test, the system automatically closes the 'a' tag, which |
| // didn't really get closed in the input text. The exact syntax |
| // of the expected results not critical, as long as the parser recovers |
| // and does not crash. |
| // |
| // TODO(jmarantz): test error reporting. |
| ValidateNoChanges("unclosed_quote", |
| "<div>\n" |
| " <a href=\"http://myhost.com/path?arg1=val1&arg2=val2>Hello</a>\n" |
| "</div>\n" |
| "<p>next token</p>" |
| "</body></html>\n" |
| "\"></a></div>"); |
| } |
| |
| TEST_F(HtmlParseTest, NestedDivInBr) { |
| ValidateNoChanges("nested_div_in_br", |
| "<br><div>hello</div></br>"); |
| } |
| |
| // bug 2465145 - Sequential defaulted attribute tags lost |
| TEST_F(HtmlParseTest, SequentialDefaultedTagsLost) { |
| // This test cannot work with libxml, but since we use our own |
| // parser we can make it work. See |
| // https://bugzilla.gnome.org/show_bug.cgi?id=611655 |
| ValidateNoChanges("sequential_defaulted_attribute_tags_lost", |
| "<select>\n" |
| " <option value=\"&cat=244\">Other option</option>\n" |
| " <option value selected style=\"color: #ccc;\">Default option" |
| "</option>\n" |
| "</select>"); |
| |
| // Illegal attribute "http://www.yahoo.com", per HTML5, is two attributes: |
| // http: and "yahoo.com", with the slashes going into the ether. |
| // (This is also how Chrome and Firefox parse it.) |
| ValidateExpected( |
| "yahoo", |
| "<a href=\"#\" http://www.yahoo.com class=\"a b\">yahoo</a>", |
| "<a href=\"#\" http: www.yahoo.com class=\"a b\">yahoo</a>"); |
| |
| // Here's another interesting thing from the bug testcase. |
| // Specifying a literal "&" without a recognized sequence |
| // following it gets parsed correctly by libxml2, and then |
| // re-encoded by our writer as &. That's fine; let's |
| // make sure that doesn't change. |
| ValidateNoChanges("amp_cat", |
| "<option value=\"&cat=244\">other</option>"); |
| } |
| |
| // bug 2465201 : some html constructs do not need ';' termination. |
| // Fixed by providing own lexer. |
| TEST_F(HtmlParseTest, UnterminatedTokens) { |
| // the termination semicolons should be added in the output. |
| ValidateNoChanges("unterminated_tokens", |
| "<p>Look at the non breaking space: \" \"</p>"); |
| } |
| |
| // bug 2467040 : keep ampersands and quotes encoded |
| TEST_F(HtmlParseTest, EncodeAmpersandsAndQuotes) { |
| ValidateNoChanges("ampersands_in_text", |
| "<p>This should be a string '&amp;' not a single ampersand.</p>"); |
| ValidateNoChanges("ampersands_in_values", |
| "<img alt=\"This should be a string '&amp;' " |
| "not a single ampersand.\"/>"); |
| ValidateNoChanges("quotes", |
| "<p>Clicking <a href=\"javascript: alert("Alert works!");\">" |
| "here</a> should pop up an alert box.</p>"); |
| } |
| |
| // bug 2508334 : encoding unicode in general |
| TEST_F(HtmlParseTest, EncodeUnicode) { |
| ValidateNoChanges("unicode_in_text", |
| "<p>Non-breaking space: ' '</p>\n" |
| "<p>Alpha: 'α'</p>\n" |
| "<p>Unicode #54321: '퐱'</p>\n"); |
| } |
| |
| TEST_F(HtmlParseTest, ImplicitExplicitClose) { |
| // The lexer/printer preserves the input syntax, making it easier |
| // to diff inputs & outputs. |
| // |
| // TODO(jmarantz): But we can have a rewrite pass that eliminates |
| // the superfluous "/>". |
| ValidateNoChanges("one_brief_one_implicit_input", |
| "<input type=\"text\" name=\"username\">" |
| "<input type=\"password\" name=\"password\"/>"); |
| } |
| |
| TEST_F(HtmlParseTest, OpenBracketAfterQuote) { |
| // Note: even though it looks like two input elements, in practice |
| // it's parsed as one. |
| const char input[] = |
| "<input type=\"text\" name=\"username\"" |
| "<input type=\"password\" name=\"password\"/>"; |
| const char expected[] = |
| "<input type=\"text\" name=\"username\"" |
| " <input type=\"password\" name=\"password\"/>"; |
| // Extra space 'between' attributes' |
| ValidateExpected("open_bracket_after_quote", input, expected); |
| } |
| |
| TEST_F(HtmlParseTest, OpenBracketUnquoted) { |
| // '<' after unquoted attr value. |
| // This is just a malformed attribute name, not a start of a new tag. |
| const char input[] = |
| "<input type=\"text\" name=username" |
| "<input type=\"password\" name=\"password\"/>"; |
| ValidateNoChanges("open_bracket_unquoted", input); |
| } |
| |
| TEST_F(HtmlParseTest, OpenBracketAfterEquals) { |
| // '<' after equals sign. This is actually an attribute value, |
| // not a start of a new tag. |
| const char input[] = |
| "<input type=\"text\" name=" |
| "<input type=\"password\" name=\"password\"/>"; |
| ValidateNoChanges("open_brack_after_equals", input); |
| } |
| |
| TEST_F(HtmlParseTest, OpenBracketAfterName) { |
| // '<' after after attr name. |
| const char input[] = |
| "<input type=\"text\" name" |
| "<input type=\"password\" name=\"password\"/>"; |
| ValidateNoChanges("open_brack_after_name", input); |
| } |
| |
| class HtmlParseTestNoBodyNoHtml : public HtmlParseTestNoBody { |
| protected: |
| virtual bool AddHtmlTags() const { return false; } |
| |
| void CheckOutput(int start_index, int end_index, |
| const GoogleString& input, |
| const GoogleString& expected_output) { |
| for (int i = start_index; i < end_index; ++i) { |
| SetupWriter(); |
| html_parse()->set_size_limit(i); |
| html_parse()->StartParse("http://test.com/in.html"); |
| // Flush after every character. |
| for (int j = 0; j < input.size(); ++j) { |
| GoogleString x; |
| x.push_back(input[j]); |
| html_parse()->ParseText(StringPiece(x)); |
| html_parse()->Flush(); |
| } |
| html_parse()->FinishParse(); |
| EXPECT_EQ(expected_output, output_buffer_); |
| } |
| } |
| }; |
| |
| TEST_F(HtmlParseTestNoBodyNoHtml, SizeLimit) { |
| static const char input[] = |
| "<html>" // 6 chars |
| "<input type=\"text\"/>" // 20 chars |
| "<script type=\"text/javascript\">alert('123');</script>" // 53 chars |
| "<!--[if IE]>...<![endif]-->" // 27 chars |
| "<table><tr><td>blah</td></tr></table>" // 37 chars |
| "</html>"; // 7 chars |
| ValidateNoChanges("no_limit", input); |
| |
| static const char output_when_break_in_html[] = |
| "<html></html>"; |
| |
| for (int i = 1; i < 150; ++i) { |
| // With no flushes, the output is just <html></html> |
| html_parse_.set_size_limit(i); |
| ValidateExpected("break_in_input", input, |
| output_when_break_in_html); |
| } |
| |
| // Now test with flushes injected. |
| |
| CheckOutput(1, 6, input, output_when_break_in_html); |
| |
| static const char output_when_break_in_input[] = |
| "<html><input type=\"text\"/></html>"; |
| CheckOutput(6, 26, input, output_when_break_in_input); |
| |
| static const char output_with_break_in_script_tag[] = |
| "<html><input type=\"text\"/>" |
| "<script type=\"text/javascript\"></script>" |
| "</html>"; |
| CheckOutput(26, 57, input, output_with_break_in_script_tag); |
| |
| static const char output_with_break_in_script_text_or_later[] = |
| "<html><input type=\"text\"/>" |
| "<script type=\"text/javascript\">alert('123');</script>" |
| "</html>"; |
| CheckOutput(57, 79, input, output_with_break_in_script_text_or_later); |
| |
| static const char output_with_break_in_comment[] = |
| "<html><input type=\"text\"/>" |
| "<script type=\"text/javascript\">alert('123');</script>" |
| "<!--[if IE]>...<![endif]-->" |
| "<table></table>" |
| "</html>"; |
| CheckOutput(79, 113, input, output_with_break_in_comment); |
| |
| static const char output_with_break_in_tr[] = |
| "<html><input type=\"text\"/>" |
| "<script type=\"text/javascript\">alert('123');</script>" |
| "<!--[if IE]>...<![endif]-->" |
| "<table><tr></tr></table>" |
| "</html>"; |
| CheckOutput(113, 117, input, output_with_break_in_tr); |
| |
| static const char output_with_break_in_td[] = |
| "<html><input type=\"text\"/>" |
| "<script type=\"text/javascript\">alert('123');</script>" |
| "<!--[if IE]>...<![endif]-->" |
| "<table><tr><td></td></tr></table>" |
| "</html>"; |
| CheckOutput(117, 121, input, output_with_break_in_td); |
| |
| static const char output_with_break_in_td_text[] = |
| "<html><input type=\"text\"/>" |
| "<script type=\"text/javascript\">alert('123');</script>" |
| "<!--[if IE]>...<![endif]-->" |
| "<table><tr><td>blah</td></tr></table>" |
| "</html>"; |
| CheckOutput(121, 160, input, output_with_break_in_td_text); |
| } |
| |
| TEST_F(HtmlParseTest, OpenBracketAfterSpace) { |
| // '<' after after unquoted attr value. Here name<input is an attribute |
| // name. |
| const char input[] = |
| "<input type=\"text\" " |
| "<input type=\"password\" name=\"password\"/>"; |
| ValidateNoChanges("open_brack_after_name", input); |
| } |
| |
| TEST_F(HtmlParseTest, AutoClose) { |
| ExplicitCloseTag close_tags; |
| html_parse_.AddFilter(&close_tags); |
| |
| // Cover the simple cases. E.g. dd is closed by tr, but not dd. |
| ExpectNoAutoClose("dd", "tr"); |
| ExpectAutoClose("dd", "dd"); |
| |
| ExpectAutoClose("dt", "dd"); |
| ExpectAutoClose("dt", "dt"); |
| ExpectNoAutoClose("dt", "rp"); |
| |
| ExpectAutoClose("li", "li"); |
| ExpectNoAutoClose("li", "dt"); |
| |
| ExpectAutoClose("optgroup", "optgroup"); |
| ExpectNoAutoClose("optgroup", "rp"); |
| |
| ExpectAutoClose("option", "optgroup"); |
| ExpectAutoClose("option", "option"); |
| ExpectNoAutoClose("option", "rp"); |
| |
| // <p> has an outrageous number of tags that auto-close it. |
| ExpectNoAutoClose("p", "tr"); // tr is not listed in the auto-closers for p. |
| ExpectAutoClose("p", "address"); // first closer of 28. |
| ExpectAutoClose("p", "h2"); // middle closer of 28. |
| ExpectAutoClose("p", "ul"); // last closer of 28. |
| |
| // Cover the remainder of the cases. |
| ExpectAutoClose("rp", "rt"); |
| ExpectAutoClose("rp", "rp"); |
| ExpectNoAutoClose("rp", "dd"); |
| |
| ExpectAutoClose("rt", "rt"); |
| ExpectAutoClose("rt", "rp"); |
| ExpectNoAutoClose("rt", "dd"); |
| |
| ExpectAutoClose("tbody", "tbody"); |
| ExpectAutoClose("tbody", "tfoot"); |
| ExpectNoAutoClose("tbody", "dd"); |
| |
| ExpectAutoClose("td", "td"); |
| ExpectAutoClose("td", "th"); |
| ExpectNoAutoClose("td", "rt"); |
| |
| ExpectAutoClose("tfoot", "tbody"); |
| ExpectNoAutoClose("tfoot", "dd"); |
| |
| ExpectAutoClose("th", "td"); |
| ExpectAutoClose("th", "th"); |
| ExpectNoAutoClose("th", "rt"); |
| |
| ExpectAutoClose("thead", "tbody"); |
| ExpectAutoClose("thead", "tfoot"); |
| ExpectNoAutoClose("thead", "dd"); |
| |
| ExpectAutoClose("tr", "tr"); |
| ExpectNoAutoClose("tr", "td"); |
| |
| // http://www.w3.org/TR/html5/the-end.html#misnested-tags:-b-i-b-i |
| |
| |
| // TODO(jmarantz): add more tests related to formatting keywords. |
| } |
| |
| TEST_F(HtmlParseTest, BogusComment) { |
| ValidateNoChanges("what_php", |
| "<?php include('includes/_pagebottom.tpl.php'); ?>"); |
| |
| ValidateNoChanges("bad break", "</\na>"); |
| } |
| |
| namespace { |
| |
| class AnnotatingHtmlFilter : public EmptyHtmlFilter { |
| public: |
| AnnotatingHtmlFilter() : annotate_flush_(false) {} |
| virtual ~AnnotatingHtmlFilter() {} |
| |
| virtual void StartElement(HtmlElement* element) { |
| StrAppend(&buffer_, (buffer_.empty() ? "+" : " +"), element->name_str()); |
| |
| bool first = true; |
| const HtmlElement::AttributeList& attrs = element->attributes(); |
| for (HtmlElement::AttributeConstIterator i(attrs.begin()); |
| i != attrs.end(); ++i) { |
| const HtmlElement::Attribute& attr = *i; |
| StrAppend(&buffer_, (first ? ":" : ","), attr.name_str()); |
| const char* value = attr.DecodedValueOrNull(); |
| if (attr.decoding_error()) { |
| StrAppend(&buffer_, "=<ERROR>"); |
| } else if (value != NULL) { |
| StrAppend(&buffer_, "=", attr.quote_str(), value, attr.quote_str()); |
| } |
| first = false; |
| } |
| } |
| virtual void EndElement(HtmlElement* element) { |
| StrAppend(&buffer_, " -", element->name_str()); |
| switch (element->style()) { |
| case HtmlElement::AUTO_CLOSE: buffer_ += "(a)"; break; |
| case HtmlElement::IMPLICIT_CLOSE: buffer_ += "(i)"; break; |
| case HtmlElement::EXPLICIT_CLOSE: buffer_ += "(e)"; break; |
| case HtmlElement::BRIEF_CLOSE: buffer_ += "(b)"; break; |
| case HtmlElement::UNCLOSED: buffer_ += "(u)"; break; |
| case HtmlElement::INVISIBLE: buffer_ += "(I)"; break; |
| } |
| } |
| virtual void Characters(HtmlCharactersNode* characters) { |
| StrAppend(&buffer_, (buffer_.empty() ? "'" : " '"), characters->contents(), |
| "'"); |
| } |
| |
| virtual const char* Name() const { return "AnnotatingHtmlFilter"; } |
| |
| const GoogleString& buffer() const { return buffer_; } |
| void Clear() { buffer_.clear(); } |
| |
| virtual void Flush() { |
| if (annotate_flush_) { |
| buffer_ += "[F]"; |
| } |
| } |
| |
| void set_annotate_flush(bool x) { annotate_flush_ = x; } |
| |
| private: |
| bool annotate_flush_; |
| GoogleString buffer_; |
| }; |
| |
| } // namespace |
| |
| class HtmlAnnotationTest : public HtmlParseTestNoBody { |
| protected: |
| virtual void SetUp() { |
| HtmlParseTestNoBody::SetUp(); |
| html_parse_.AddFilter(&annotation_); |
| } |
| |
| const GoogleString& annotation() { return annotation_.buffer(); } |
| void ResetAnnotation() { annotation_.Clear(); } |
| virtual bool AddHtmlTags() const { return false; } |
| |
| protected: |
| AnnotatingHtmlFilter annotation_; |
| }; |
| |
| TEST_F(HtmlAnnotationTest, CorrectTaggify) { |
| // Under HTML5 rules (and recent Chrome and FF practice), something like |
| // <foo</bar> makes an element named foo<, with attribute named bar. |
| // (See 12.2.4.10 Tag name state). |
| // |
| // However, we have to be careful not to turn just anything following < |
| // into an element name, since sometimes there are <'s which are |
| // meant to just be less than signs. |
| // |
| ValidateNoChanges("no_taggify_digit", "<p>1<2</p>"); |
| EXPECT_EQ("+p '1<2' -p(e)", annotation()); |
| ResetAnnotation(); |
| |
| ValidateNoChanges("no_taggify_unicode", "<p>☃<☕</p>"); |
| EXPECT_EQ("+p '☃<☕' -p(e)", annotation()); |
| ResetAnnotation(); |
| |
| ValidateExpected("letter", |
| "<p>x<y</p>", "<p>x<y< p>"); // lost the / since 'p' is attr. |
| EXPECT_EQ("+p 'x' +y<:p -y<(u) -p(u)", annotation()); |
| ResetAnnotation(); |
| |
| ValidateExpected("taggify_letter+digit", |
| "<p>x1<y2</p>", "<p>x1<y2< p>"); |
| EXPECT_EQ("+p 'x1' +y2<:p -y2<(u) -p(u)", annotation()); |
| ResetAnnotation(); |
| |
| ValidateExpected("taggify_letter+unicode", "<p>x☃<y☕</p>", |
| "<p>x☃<y☕< p>"); // no / since p is attr on a y☕< element. |
| EXPECT_EQ("+p 'x☃' +y☕<:p -y☕<(u) -p(u)", annotation()); |
| ResetAnnotation(); |
| |
| ValidateNoChanges("no_taggify_digit+letter", "<p>1x<2y</p>"); |
| EXPECT_EQ("+p '1x<2y' -p(e)", annotation()); |
| ResetAnnotation(); |
| |
| ValidateNoChanges("no_taggify_unicode+letter", "<p>☃x<☕y</p>"); |
| EXPECT_EQ("+p '☃x<☕y' -p(e)", annotation()); |
| ResetAnnotation(); |
| |
| // Found on http://www.taobao.com/ |
| // Don't turn <1... -> <1...> |
| ValidateNoChanges("taobao", "<a>1+1<1母婴全场加1元超值购</a>"); |
| EXPECT_EQ("+a '1+1<1母婴全场加1元超值购' -a(e)", annotation()); |
| ResetAnnotation(); |
| } |
| |
| TEST_F(HtmlAnnotationTest, WeirdAttributes) { |
| // Just about everything can be an attribute |
| ValidateNoChanges("weird_attr", "<a ,=\"foo\">"); |
| EXPECT_EQ("+a:,=\"foo\" -a(u)", annotation()); |
| ResetAnnotation(); |
| |
| // ... even an equal sign |
| ValidateNoChanges("weird_attr_equal", "<a ==\"foo\">"); |
| EXPECT_EQ("+a:==\"foo\" -a(u)", annotation()); |
| ResetAnnotation(); |
| } |
| |
| TEST_F(HtmlAnnotationTest, WeirdCloseCase) { |
| // </> is nothing useful, but we preserve it as a literal. |
| ValidateNoChanges("close_nothing", "</><foo>"); |
| EXPECT_EQ("'</>' +foo -foo(u)", annotation()); |
| ResetAnnotation(); |
| |
| // <foo / > isn't an attempt at self-close, it just has a stray / |
| // we can't represent. |
| ValidateExpected("not_self_close", "<foo / >", "<foo>"); |
| EXPECT_EQ("+foo -foo(u)", annotation()); |
| ResetAnnotation(); |
| |
| // <foo /> is a self-close. |
| ValidateExpected("self_close", "<foo />", "<foo/>"); |
| EXPECT_EQ("+foo -foo(b)", annotation()); |
| ResetAnnotation(); |
| } |
| |
| TEST_F(HtmlAnnotationTest, UnbalancedMarkup) { |
| // The second 'tr' closes the first one, and our HtmlWriter will not |
| // implicitly close 'tr' because IsImplicitlyClosedTag is false, so |
| // the markup is changed to add the missing tr. |
| ValidateNoChanges("unbalanced_markup", |
| "<font><tr><i><font></i></font><tr></font>"); |
| |
| // We use this (hopefully) self-explanatory annotation format to indicate |
| // what's going on in the parse. |
| EXPECT_EQ("+font -font(a) +tr +i +font -font(u) -i(e) '</font>' -tr(a) +tr " |
| "'</font>' -tr(u)", |
| annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, StrayCloseTr) { |
| ValidateNoChanges("stray_tr", |
| "<table><tr><table></tr></table></tr></table>"); |
| |
| // We use this (hopefully) self-explanatory annotation format to indicate |
| // what's going on in the parse. |
| EXPECT_EQ("+table +tr +table '</tr>' -table(e) -tr(e) -table(e)", |
| annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, IClosedByOpenTr) { |
| ValidateNoChanges("unclosed_i_tag", "<tr><i>a<tr>b"); |
| EXPECT_EQ("+tr +i 'a' -i(a) -tr(a) +tr 'b' -tr(u)", annotation()); |
| |
| // TODO(jmarantz): morlovich points out that this is nowhere near |
| // how a browser will handle this stuff... For a nighmarish testcase, try: |
| // data:text/html,<table><tr><td><i>a<tr>b |
| // |
| // The 'a' gets rendered in italics *after* the b. |
| // |
| // See also: |
| // http://www.whatwg.org/specs/web-apps/current-work/multipage/ |
| // the-end.html#unexpected-markup-in-tables |
| // |
| // But note that these 2 are the same and do what I expect: |
| // |
| // data:text/html,<table><tr><td><i>a</td></tr></table>b |
| // data:text/html,<table><tr><td><i>a</table>b |
| // |
| // the 'a' is italicized but the 'b' is not. If I omit the 'td' |
| // then the 'b' gets italicized. This implies I suppose that 'i' is |
| // closed by td but is not closed by tr or table. And it is indeed |
| // closed by the *implicit* closing of td. |
| |
| // http://www.w3.org/TR/html5/the-end.html#misnested-tags:-b-i-b-i |
| } |
| |
| TEST_F(HtmlAnnotationTest, INotClosedByOpenTableExplicit) { |
| ValidateNoChanges("explicit_close_tr", "<i>a<table><tr></tr></table>b"); |
| EXPECT_EQ("+i 'a' +table +tr -tr(e) -table(e) 'b' -i(u)", annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, INotClosedByOpenTableImplicit) { |
| ValidateNoChanges("implicit_close_tr", "<i>a<table><tr></table>b"); |
| EXPECT_EQ("+i 'a' +table +tr -tr(u) -table(e) 'b' -i(u)", annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, AClosedByBInLi) { |
| ValidateNoChanges("a_closed_by_b", "<li><a href='x'></b>"); |
| EXPECT_EQ("+li +a:href='x' '</b>' -a(u) -li(u)", annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, BClosedByTd) { |
| ValidateNoChanges("b_closed_by_td", "<table><tr><td><b>1</table></b>"); |
| |
| // The <b> gets closed by the </td>, which is automatically closed by |
| // the td, which is automatically closed by the tr, which is automatically |
| // closed by the tbody, which is automatically closed by the "</table>". |
| // The actual "</b>" that appears here doesn't close any open tags, so |
| // its rendered as literal characters. |
| // |
| // TODO(jmarantz): consider adding a new event-type to represent bogus |
| // tags rather than using Characters. |
| EXPECT_EQ("+table +tr +td +b '1' -b(u) -td(u) -tr(u) -table(e) '</b>'", |
| annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, BNotClosedByTable) { |
| ValidateNoChanges( |
| "a_closed_by_b", |
| "<table><tbody><tr><b><td>hello</tr></tbody></table>World</b>"); |
| |
| // We do not create the same annotation Chrome does in this case. Opening up |
| // the inspector on |
| // data:text/html,<table><tbody><tr><b><td>hello</tr></tbody></table>World</b> |
| // shows us (ignoring html, head, and body tags for brevity): |
| // <b></b> |
| // <table> |
| // <tbody> |
| // <tr> |
| // <td>hello</td> |
| // </td> |
| // </tbody> |
| // </table> |
| // <b>World</b> |
| // For us to replicate this structure, we'd have to move the 'b' tag ahead of |
| // the <table> opening tag. To do this we would need to buffer tables until |
| // they reached the end-table tag. This does not appear to be a good |
| // tradeoff as tables might be large and buffering them would impact |
| // the UX for all sites, as a defense against bad markup and filters that |
| // care deeply about the structure of formatting elements in illegal DOM |
| // positions. |
| // |
| // But note that this malformed markup will in fact pass through |
| // parsing & serialization with byte accuracy. |
| } |
| |
| TEST_F(HtmlAnnotationTest, StrayCloseTrInTable) { |
| ValidateNoChanges("stray_close_tr", |
| "<div><table><tbody><td>1</td></tr></tbody></table></div>"); |
| EXPECT_EQ("+div +table +tbody +td '1' -td(e) '</tr>' -tbody(e) -table(e) " |
| "-div(e)", annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, StrayCloseTrInTableWithUnclosedTd) { |
| ValidateNoChanges("stray_close_tr_unclosed_td", |
| "<tr><table><td>1</tr></table>"); |
| EXPECT_EQ("+tr +table +td '1</tr>' -td(u) -table(e) -tr(u)", annotation()); |
| // TODO(jmarantz): the above is not quite DOM-accurate. A 'tr' will |
| // actually be synthesized around the <td>. To solve this and |
| // maintain byte accuracy we must synthesize an HtmlElement whose |
| // opening-tag is invisible, and create a map that requires <td> |
| // elements to be enclosed in <tr> etc. See, in Chrome, |
| // data:text/html,<tr><table><td>1</tr></table> |
| } |
| |
| TEST_F(HtmlAnnotationTest, OverlappingStyleTags) { |
| ValidateNoChanges("overlapping_style_tags", "n<b>b<i>bi</b>i</i>n"); |
| |
| // TODO(jmarantz): The behavior of this sequence is well-specified, but |
| // is not currently implemented by PSA. We should have |
| // EXPECT_EQ("'n' +b 'b' +i 'bi' -i(u) -b(e) +i* 'i' -i(e) 'n'", |
| // annotation()); |
| // Note that we will need to render a synthetic <i> that shows up in our |
| // DOM tree but does not get serialized. We have no current representation |
| // for that, but we could easily add a bool to HtmlElement to suppress the |
| // serialization of the open tag. Above that's represented by "+i*". |
| // |
| // But we actually get this, which does not have the 'i' in italics. |
| EXPECT_EQ("'n' +b 'b' +i 'bi' -i(u) -b(e) 'i</i>n'", annotation()); |
| |
| // There is no real drawback to implementing this; but at the moment |
| // no filters are likely to care. |
| } |
| |
| TEST_F(HtmlAnnotationTest, AClosedByP) { |
| ValidateNoChanges("a_closed_by_p", "<P>This is a <A>link<P>More"); |
| |
| // According to Chrome("data:text/html,<P>This is a <A>link<P>More") the |
| // structure should be something like this: |
| // "+p 'This is a' +a link -a -p +p +a more -a -p" |
| // In this fashion a&p overlap together in a fashion similar to bold and |
| // italic. |
| // |
| // But we actually product this markup: |
| EXPECT_EQ("+P 'This is a ' +A 'link' +P 'More' -P(u) -A(u) -P(u)", |
| annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, PFont) { |
| ValidateNoChanges("p_font", "<P><FONT>a<P>b</FONT>"); |
| |
| // TODO(jmarantz): The second <P> should force the close of |
| // the first one, despite the intervening <font>. In other words |
| // we need to keep track of which formatting elements are active: |
| // <p> does not nest but I suppose <font> likely does. |
| // |
| // Chrome("data:text/html,<P><FONT>a<P>b</FONT>") yields |
| // "<p><font>a</font</p><p><font><b></font></p>" |
| EXPECT_EQ("+P +FONT 'a' +P 'b' -P(u) -FONT(e) -P(u)", annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, HtmlTbodyCol) { |
| // The spaces before the tag names are invalid. Chrome parses these as |
| // literals; our behavior is consistent. |
| ValidateNoChanges("html_tbody_col", "< HTML> < TBODY> < COL SPAN=999999999>"); |
| EXPECT_EQ("'< HTML> < TBODY> < COL SPAN=999999999>'", annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, WeirdAttrQuotes) { |
| // Note that in the expected results, a space was inserted before |
| // 'position:absolute' and before 'Windings'. I think this is correct. |
| // |
| // TODO(jmarantz): check in Chrome. |
| ValidateExpected("weird_attr_quotes", |
| "<DIV STYLE=\"top:214px; left:139px;\"" |
| "position:absolute; font-size:26px;\">" |
| "<NOBR><SPAN STYLE=\"font-family:\"Wingdings 2\";\">" |
| "</SPAN></NOBR></DIV>", |
| "<DIV STYLE=\"top:214px; left:139px;\" " |
| "position:absolute; font-size:26px;\">" |
| "<NOBR><SPAN STYLE=\"font-family:\" Wingdings 2\";\">" |
| "</SPAN></NOBR></DIV>"); |
| EXPECT_EQ("+DIV:STYLE=\"top:214px; left:139px;\",position:absolute;," |
| "font-size:26px;\" +NOBR " |
| "+SPAN:STYLE=\"font-family:\",Wingdings,2\";\" " |
| "-SPAN(e) -NOBR(e) -DIV(e)", annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, Misc) { |
| // |
| // 1. This is <B>bold, <I>bold italic, </b>italic, </i>normal text |
| // 2. <P>This is a <A>link<P>More |
| // 3. <P><FONT>a<P>b</FONT> |
| // 7. <img title=="><script>alert('foo')</script>"> |
| // 8. < HTML> < TBODY> < COL SPAN=999999999> |
| // 9. <DIV STYLE="top:214px; left:139px; position:absolute; font-size:26px;"> |
| // <NOBR><SPAN STYLE="font-family:"Wingdings 2";"></SPAN></NOBR></DIV> |
| // 10. <a href="http://www.cnn.com/"' title="cnn.com">cnn</a> |
| // 11. do <![if !supportLists]>not<![endif]> lose this text |
| // 12. <table><tr><td>row1<tr><td>row2</td> |
| // 13. <table><tr><td>foo<td>bar<tr><td>baz<td>boo</table> |
| // 14. <p>The quick <strong>brown fox</strong></p>\njumped over the\n |
| // <p>lazy</strong> dog.</p> |
| // 15. <p> paragraph <h1> heading </h1> |
| // 16. <a href="h">1<a>2</a></a> |
| ValidateNoChanges("quote_balance", "<img title=\"><script>alert('foo')" |
| "</script>\">"); |
| EXPECT_EQ("+img:title=\"><script>alert('foo')</script>\" -img(i)", |
| annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, DoubleEquals) { |
| // Note that the attr-value is not in fact a quoted string. The second |
| // "=" begins the attr-value and its terminated by the ">". The script |
| // is not in the quote. The closing quote and > are stray and rendered |
| // as characters in our DOM. We are byte accurate. This behavior |
| // was hand-confirmed as consistent with Chrome by typing |
| // data:text/html,<img title=="><script>alert('foo')</script>"> |
| // into the URL bar on 12/13/2011. The "alert" popped up which is |
| // consistent with the dom annotation below. |
| ValidateNoChanges("double_equals", |
| "<img title==\"><script>alert('foo')</script>\">"); |
| EXPECT_EQ("+img:title==\" -img(i) +script 'alert('foo')' -script(e) '\">'", |
| annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, AttrEqStartWithSlash) { |
| // Note the "/>" here does *not* briefly end the 'body'; it's part of the |
| // attribute. Verified with chrome using |
| // data:text/html,<body title=/>hello</body> |
| ValidateNoChanges("attr_eq_starts_with_slash", "<body title=/>1</body>"); |
| EXPECT_EQ("+body:title=/ '1' -body(e)", annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, AttrEqEndsWithSlash) { |
| // Note again the "/>" here does *not* briefly end the 'body'; it's part of |
| // the attribute. Verified with chrome using |
| // data:text/html,<body title=x/>hello</body> |
| ValidateNoChanges("attr_eq_ends_with_slash", "<body title=x/></body>"); |
| EXPECT_EQ("+body:title=x/ -body(e)", annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, TableForm) { |
| ValidateNoChanges("table_form", "<table><form><input></table><input></form>"); |
| EXPECT_EQ("+table +form +input -input(i) -form(u) -table(e)" |
| " +input -input(i) '</form>'", |
| annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, ComplexQuotedAttribute) { |
| ValidateNoChanges("complex_quoted_attr", |
| "<div x='\\'><img onload=alert(42)" |
| "src=http://json.org/img/json160.gif>'></div>"); |
| EXPECT_EQ("+div:x='\\' " |
| "+img:onload=alert(42)src=http://json.org/img/json160.gif " |
| "-img(i) ''>' -div(e)", annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, DivNbsp) { |
| ValidateNoChanges("div_nbsp", |
| "<div    style=\\-\\mo\\z\\-b\\i\\nd\\in\\g:\\url(" |
| "//business\\i\\nfo.co.uk\\/labs\\/xbl\\/xbl\\.xml\\#xss)" |
| ">"); |
| EXPECT_EQ("'<div    style=\\-\\mo\\z\\-b\\i\\nd\\in\\g:\\" |
| "url(//business\\i\\nfo.co.uk\\/labs\\/xbl\\/xbl\\.xml\\#xss)>'", |
| annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, ExtraQuote) { |
| ValidateExpected( |
| "extra_quote", |
| "<a href=\"http://www.cnn.com/\"' title=\"cnn.com\">cnn</a>", |
| "<a href=\"http://www.cnn.com/\" ' title=\"cnn.com\">cnn</a>"); |
| } |
| |
| TEST_F(HtmlAnnotationTest, TrNesting) { |
| ValidateNoChanges("nesting", "<tr><td><tr a=b><td c=d></td></tr>"); |
| EXPECT_EQ("+tr +td -td(a) -tr(a) +tr:a=b +td:c=d -td(e) -tr(e)", |
| annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, AttrEndingWithOpenAngle) { |
| ValidateNoChanges("weird_attr", "<script src=foo<bar>Content"); |
| EXPECT_EQ("+script:src=foo<bar 'Content' -script(u)", annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, ScriptQuirkBasic) { |
| ValidateNoChanges("script_quirk_1", |
| "<script><!--<script></script>a</script>b"); |
| EXPECT_EQ("+script '<!--<script></script>a' -script(e) 'b'", annotation()); |
| |
| ResetAnnotation(); |
| ValidateNoChanges("script_quirk_2", |
| "<script><!--</script>a</script>b"); |
| EXPECT_EQ("+script '<!--' -script(e) 'a</script>b'", annotation()); |
| |
| ResetAnnotation(); |
| ValidateNoChanges("script_quirk_3", |
| "<script><script></script>a</script>b"); |
| EXPECT_EQ("+script '<script>' -script(e) 'a</script>b'", annotation()); |
| |
| ResetAnnotation(); |
| ValidateNoChanges("script_quirk_4", |
| "<script><!--<script>--></script>a</script>b"); |
| EXPECT_EQ("+script '<!--<script>-->' -script(e) 'a</script>b'", annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, ScriptQuirkCloseAttr) { |
| // HTML5 script parsing is weird in that </script> actually gets attribute |
| // parsing. |
| ValidateExpected("script_quirk_close", |
| "<script></script a=\"foo>\">Bar", |
| "<script></script>Bar"); |
| EXPECT_EQ("+script -script(e) 'Bar'", annotation()); |
| |
| ResetAnnotation(); |
| ValidateExpected("script_quirk_close2", |
| "<script></script a=\"foo>\" bar=\'>' bax>Bar", |
| "<script></script>Bar"); |
| EXPECT_EQ("+script -script(e) 'Bar'", annotation()); |
| |
| |
| ResetAnnotation(); |
| ValidateExpected("script_quirk_close_slash", |
| "<script></script a=\"foo>\"/>Bar", |
| "<script></script>Bar"); |
| EXPECT_EQ("+script -script(e) 'Bar'", annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, ScriptQuirkBriefClose) { |
| // HTML5 script parsing --- closing </style /> |
| ValidateExpected("script_quirk_close_brief", |
| "<script></script/>Bar", |
| "<script></script>Bar"); |
| EXPECT_EQ("+script -script(e) 'Bar'", annotation()); |
| |
| ResetAnnotation(); |
| ValidateExpected("script_quirk_close_brief", |
| "<script></script /foo>Bar", |
| "<script></script>Bar"); |
| EXPECT_EQ("+script -script(e) 'Bar'", annotation()); |
| } |
| |
| // TODO(jmarantz): fix this case; we lose the stray "=". |
| // TEST_F(HtmlAnnotationTest, StrayEq) { |
| // ValidateNoChanges("stray_eq", "<a href='foo.html'=>b</a>"); |
| // EXPECT_EQ("+a:href=foo.html -a(e)", annotation()); |
| // } |
| |
| TEST_F(HtmlAnnotationTest, FlushDoesNotBreakCharacterBlock) { |
| annotation_.set_annotate_flush(true); |
| html_parse_.StartParse("http://test.com/blank_flush.html"); |
| html_parse_.ParseText("<div></div>"); // will get flushed. |
| html_parse_.ParseText("bytes:"); // will not get flushed till the end. |
| html_parse_.Flush(); |
| html_parse_.ParseText(":more:"); |
| html_parse_.Flush(); |
| html_parse_.ParseText(":still more:"); |
| html_parse_.Flush(); |
| html_parse_.ParseText(":final bytes:"); |
| html_parse_.FinishParse(); |
| EXPECT_STREQ( |
| "+div -div(e)[F][F][F] 'bytes::more::still more::final bytes:'[F]", |
| annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, FlushDoesNotBreakScriptTag) { |
| annotation_.set_annotate_flush(true); |
| html_parse_.StartParse("http://test.com/blank_flush.html"); |
| html_parse_.ParseText("<script>"); |
| html_parse_.Flush(); |
| html_parse_.ParseText("a=b;"); |
| html_parse_.Flush(); |
| html_parse_.ParseText("c=d;"); |
| html_parse_.Flush(); |
| html_parse_.ParseText("</scr"); |
| html_parse_.Flush(); |
| html_parse_.ParseText("ipt><script>"); |
| html_parse_.Flush(); |
| html_parse_.ParseText("e=f;"); |
| html_parse_.Flush(); |
| html_parse_.ParseText("g=h;"); |
| // No explicit </script> but the lexer will help us close it. |
| html_parse_.FinishParse(); |
| EXPECT_STREQ("[F][F][F][F] +script 'a=b;c=d;' -script(e)[F][F]" |
| " +script 'e=f;g=h;' -script(u)[F]", // "(u)" for unclosed. |
| annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, FlushDoesNotBreakScriptTagWithComment) { |
| SetupWriter(); |
| annotation_.set_annotate_flush(true); |
| html_parse_.StartParse("http://test.com/blank_flush.html"); |
| html_parse_.ParseText("<script>"); |
| html_parse_.InsertComment("c1"); |
| html_parse_.Flush(); |
| html_parse_.ParseText("a=b;"); |
| html_parse_.Flush(); |
| html_parse_.ParseText("</script><script>"); |
| html_parse_.InsertComment("c2"); |
| html_parse_.Flush(); |
| html_parse_.ParseText("</script>"); |
| html_parse_.FinishParse(); |
| EXPECT_STREQ("[F][F] +script 'a=b;' -script(e)[F] +script -script(e)[F]", |
| annotation()); |
| EXPECT_STREQ("<!--c1--><script>a=b;</script><!--c2--><script></script>", |
| output_buffer_); |
| } |
| |
| TEST_F(HtmlAnnotationTest, FlushDoesNotBreakStyleTag) { |
| annotation_.set_annotate_flush(true); |
| html_parse_.StartParse("http://test.com/blank_flush.html"); |
| html_parse_.ParseText("<style>"); |
| html_parse_.Flush(); |
| html_parse_.ParseText(".blue {color: "); |
| html_parse_.Flush(); |
| html_parse_.ParseText("blue;}"); |
| html_parse_.Flush(); |
| html_parse_.ParseText("</style>"); |
| html_parse_.FinishParse(); |
| EXPECT_STREQ("[F][F][F] +style '.blue {color: blue;}' -style(e)[F]", |
| annotation()); |
| } |
| |
| TEST_F(HtmlAnnotationTest, UnclosedScriptOnly) { |
| SetupWriter(); |
| annotation_.set_annotate_flush(true); |
| html_parse_.StartParse("http://test.com/blank_flush.html"); |
| html_parse_.ParseText("<script>"); |
| html_parse_.FinishParse(); |
| |
| // Note that we will get an EndElement callback. See -script(u) in annotation. |
| // However we will not insert a </script> in the output, since there was none |
| // in the input. |
| EXPECT_STREQ("+script -script(u)[F]", annotation()); |
| EXPECT_STREQ("<script>", output_buffer_); |
| } |
| |
| TEST_F(HtmlAnnotationTest, UnclosedScriptOnlyWithFlush) { |
| SetupWriter(); |
| annotation_.set_annotate_flush(true); |
| html_parse_.StartParse("http://test.com/blank_flush.html"); |
| html_parse_.ParseText("<script>"); |
| html_parse_.Flush(); |
| html_parse_.FinishParse(); |
| |
| // Note that we will get an EndElement callback. See -script(u) in annotation. |
| // However we will not insert a </script> in the output, since there was none |
| // in the input. |
| EXPECT_STREQ("[F] +script -script(u)[F]", annotation()); |
| EXPECT_STREQ("<script>", output_buffer_); |
| } |
| |
| TEST_F(HtmlAnnotationTest, NulInAttrName) { |
| // Tests that we don't crash with an embedded NUL in an attribute name. |
| SetupWriter(); |
| html_parse_.StartParse("http://test.com/nul_in_attr.html"); |
| html_parse_.ParseText("<img src"); |
| html_parse_.ParseText(StringPiece("\0", 1)); |
| html_parse_.ParseText("file:-1675375991 />"); |
| html_parse_.FinishParse(); |
| } |
| |
| TEST_F(HtmlParseTest, MakeName) { |
| EXPECT_EQ(0, HtmlTestingPeer::symbol_table_size(&html_parse_)); |
| |
| // Empty names are a corner case that we hope does not crash. Note |
| // that empty-string atoms are special-cased in the symbol table |
| // and require no new allocated bytes. |
| { |
| HtmlName empty = html_parse_.MakeName(""); |
| EXPECT_EQ(0, HtmlTestingPeer::symbol_table_size(&html_parse_)); |
| EXPECT_EQ(HtmlName::kNotAKeyword, empty.keyword()); |
| EXPECT_EQ("", empty.value()); |
| } |
| |
| // When we make a name using its enum, there should be no symbol table growth. |
| HtmlName body_symbol = html_parse_.MakeName(HtmlName::kBody); |
| EXPECT_EQ(0, HtmlTestingPeer::symbol_table_size(&html_parse_)); |
| EXPECT_EQ(HtmlName::kBody, body_symbol.keyword()); |
| |
| // When we make a name using the canonical form (all-lower-case) there |
| // should still be no symbol table growth. |
| HtmlName body_canonical = html_parse_.MakeName("body"); |
| EXPECT_EQ(0, HtmlTestingPeer::symbol_table_size(&html_parse_)); |
| EXPECT_EQ(HtmlName::kBody, body_canonical.keyword()); |
| |
| // But when we introduce a new capitalization, we want to retain the |
| // case, even though we do html keyword matching. We will have to |
| // store the new form in the symbol table so we'll be allocating |
| // some bytes, including the nul terminator. |
| HtmlName body_new_capitalization = html_parse_.MakeName("Body"); |
| EXPECT_EQ(4, HtmlTestingPeer::symbol_table_size(&html_parse_)); |
| EXPECT_EQ(HtmlName::kBody, body_new_capitalization.keyword()); |
| |
| // Make a name out of something that is not a keyword. |
| // This should also increase the symbol-table size. |
| HtmlName non_keyword = html_parse_.MakeName("hiybbprqag"); |
| EXPECT_EQ(14, HtmlTestingPeer::symbol_table_size(&html_parse_)); |
| EXPECT_EQ(HtmlName::kNotAKeyword, non_keyword.keyword()); |
| |
| // Empty names are a corner case that we hope does not crash. Note |
| // that empty-string atoms are special-cased in the symbol table |
| // and require no new allocated bytes. |
| { |
| HtmlName empty = html_parse_.MakeName(""); |
| EXPECT_EQ(14, HtmlTestingPeer::symbol_table_size(&html_parse_)); |
| EXPECT_EQ(HtmlName::kNotAKeyword, empty.keyword()); |
| EXPECT_EQ("", empty.value()); |
| } |
| } |
| |
| // bug 2508140 : <noscript> in <head> |
| TEST_F(HtmlParseTestNoBody, NoscriptInHead) { |
| // Some real websites (ex: google.com) have <noscript> in the <head> even |
| // though this is technically illegal according to the HTML4 spec. |
| // We should support the case in use. |
| ValidateNoChanges("noscript_in_head", |
| "<head><noscript><title>You don't have JS enabled :(</title>" |
| "</noscript></head>"); |
| } |
| |
| TEST_F(HtmlParseTestNoBody, NoCaseFold) { |
| // Case folding is off by default. However, we don't keep the |
| // closing-tag separate in the IR so we will always make that |
| // match. |
| ValidateExpected("no_case_fold", |
| "<DiV><Other xY='AbC' Href='dEf'>Hello</OTHER></diV>", |
| "<DiV><Other xY='AbC' Href='dEf'>Hello</Other></DiV>"); |
| // Despite the fact that we retain case, in our IR, and the cases did not |
| // match between opening and closing tags, there should be no messages |
| // warning about unmatched tags. |
| EXPECT_EQ(0, message_handler_.TotalMessages()); |
| } |
| |
| TEST_F(HtmlParseTestNoBody, CaseFold) { |
| SetupWriter(); |
| html_writer_filter_->set_case_fold(true); |
| ValidateExpected("case_fold", |
| "<DiV><Other xY='AbC' Href='dEf'>Hello</OTHER></diV>", |
| "<div><other xy='AbC' href='dEf'>Hello</other></div>"); |
| } |
| |
| // Bool that is auto-initialized to false |
| class Bool { |
| public: |
| Bool() : value_(false) {} |
| Bool(bool value) : value_(value) {} // Copy constructor // NOLINT |
| const bool Test() const { return value_; } |
| |
| private: |
| bool value_; |
| }; |
| |
| // Class simply keeps track of which handlers have been called. |
| class HandlerCalledFilter : public HtmlFilter { |
| public: |
| HandlerCalledFilter() : enabled_value_(true) {} |
| |
| virtual void StartDocument() { called_start_document_ = true; } |
| virtual void EndDocument() { called_end_document_ = true;} |
| virtual void StartElement(HtmlElement* element) { |
| called_start_element_ = true; |
| } |
| virtual void EndElement(HtmlElement* element) { |
| called_end_element_ = true; |
| } |
| virtual void Cdata(HtmlCdataNode* cdata) { called_cdata_ = true; } |
| virtual void Comment(HtmlCommentNode* comment) { called_comment_ = true; } |
| virtual void IEDirective(HtmlIEDirectiveNode* directive) { |
| called_ie_directive_ = true; |
| } |
| virtual void Characters(HtmlCharactersNode* characters) { |
| called_characters_ = true; |
| } |
| virtual void Directive(HtmlDirectiveNode* directive) { |
| called_directive_ = true; |
| } |
| virtual void Flush() { called_flush_ = true; } |
| |
| virtual void DetermineEnabled(GoogleString* disabled_reason) { |
| set_is_enabled(enabled_value_); |
| } |
| |
| virtual bool CanModifyUrls() { return false; } |
| |
| void SetEnabled(bool enabled_value) { |
| enabled_value_ = enabled_value; |
| } |
| virtual const char* Name() const { return "HandlerCalled"; } |
| |
| Bool called_start_document_; |
| Bool called_end_document_; |
| Bool called_start_element_; |
| Bool called_end_element_; |
| Bool called_cdata_; |
| Bool called_comment_; |
| Bool called_ie_directive_; |
| Bool called_characters_; |
| Bool called_directive_; |
| Bool called_flush_; |
| |
| private: |
| bool enabled_value_; |
| |
| DISALLOW_COPY_AND_ASSIGN(HandlerCalledFilter); |
| }; |
| |
| class HandlerCalledTest : public HtmlParseTest { |
| protected: |
| HandlerCalledTest() { |
| html_parse_.AddFilter(&handler_called_filter_); |
| first_event_listener_ = new HandlerCalledFilter(); |
| second_event_listener_ = new HandlerCalledFilter(); |
| html_parse_.add_event_listener(first_event_listener_); |
| html_parse_.add_event_listener(second_event_listener_); |
| } |
| |
| HandlerCalledFilter handler_called_filter_; |
| HandlerCalledFilter* first_event_listener_; |
| HandlerCalledFilter* second_event_listener_; |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(HandlerCalledTest); |
| }; |
| |
| // Check that StartDocument and EndDocument were called for filters. |
| TEST_F(HandlerCalledTest, StartEndDocumentCalled) { |
| Parse("start_end_document_called", ""); |
| EXPECT_TRUE(handler_called_filter_.called_start_document_.Test()); |
| EXPECT_TRUE(handler_called_filter_.called_end_document_.Test()); |
| EXPECT_TRUE(first_event_listener_->called_start_document_.Test()); |
| EXPECT_TRUE(first_event_listener_->called_end_document_.Test()); |
| EXPECT_TRUE(second_event_listener_->called_start_document_.Test()); |
| EXPECT_TRUE(second_event_listener_->called_end_document_.Test()); |
| } |
| |
| // Check that StartDocument and EndDocument were called for filters. |
| TEST_F(HandlerCalledTest, StartEndDocumentWithFilterDisabled) { |
| handler_called_filter_.SetEnabled(false); |
| Parse("start_end_document_called", ""); |
| EXPECT_FALSE(handler_called_filter_.called_start_document_.Test()); |
| EXPECT_FALSE(handler_called_filter_.called_end_document_.Test()); |
| EXPECT_TRUE(first_event_listener_->called_start_document_.Test()); |
| EXPECT_TRUE(first_event_listener_->called_end_document_.Test()); |
| EXPECT_TRUE(second_event_listener_->called_start_document_.Test()); |
| EXPECT_TRUE(second_event_listener_->called_end_document_.Test()); |
| |
| handler_called_filter_.SetEnabled(true); |
| Parse("start_end_document_called", ""); |
| EXPECT_TRUE(handler_called_filter_.called_start_document_.Test()); |
| EXPECT_TRUE(handler_called_filter_.called_end_document_.Test()); |
| EXPECT_TRUE(first_event_listener_->called_start_document_.Test()); |
| EXPECT_TRUE(first_event_listener_->called_end_document_.Test()); |
| EXPECT_TRUE(second_event_listener_->called_start_document_.Test()); |
| EXPECT_TRUE(second_event_listener_->called_end_document_.Test()); |
| } |
| |
| TEST_F(HandlerCalledTest, StartEndElementCalled) { |
| Parse("start_end_element_called", "<p>...</p>"); |
| EXPECT_TRUE(handler_called_filter_.called_start_element_.Test()); |
| EXPECT_TRUE(handler_called_filter_.called_end_element_.Test()); |
| EXPECT_TRUE(first_event_listener_->called_start_element_.Test()); |
| EXPECT_TRUE(first_event_listener_->called_end_element_.Test()); |
| EXPECT_TRUE(second_event_listener_->called_start_element_.Test()); |
| EXPECT_TRUE(second_event_listener_->called_end_element_.Test()); |
| } |
| |
| TEST_F(HandlerCalledTest, CdataCalled) { |
| Parse("cdata_called", "<![CDATA[...]]>"); |
| // Looks like a directive, but isn't. |
| EXPECT_FALSE(handler_called_filter_.called_directive_.Test()); |
| EXPECT_TRUE(handler_called_filter_.called_cdata_.Test()); |
| EXPECT_FALSE(first_event_listener_->called_directive_.Test()); |
| EXPECT_TRUE(first_event_listener_->called_cdata_.Test()); |
| EXPECT_FALSE(second_event_listener_->called_directive_.Test()); |
| EXPECT_TRUE(second_event_listener_->called_cdata_.Test()); |
| } |
| |
| TEST_F(HandlerCalledTest, CommentCalled) { |
| Parse("comment_called", "<!--...-->"); |
| EXPECT_TRUE(handler_called_filter_.called_comment_.Test()); |
| EXPECT_TRUE(first_event_listener_->called_comment_.Test()); |
| EXPECT_TRUE(second_event_listener_->called_comment_.Test()); |
| } |
| |
| TEST_F(HandlerCalledTest, IEDirectiveCalled1) { |
| Parse("ie_directive_called", "<!--[if IE]>...<![endif]-->"); |
| // Looks like a comment, but isn't. |
| EXPECT_FALSE(handler_called_filter_.called_comment_.Test()); |
| EXPECT_TRUE(handler_called_filter_.called_ie_directive_.Test()); |
| EXPECT_FALSE(first_event_listener_->called_comment_.Test()); |
| EXPECT_TRUE(first_event_listener_->called_ie_directive_.Test()); |
| EXPECT_FALSE(second_event_listener_->called_comment_.Test()); |
| EXPECT_TRUE(second_event_listener_->called_ie_directive_.Test()); |
| } |
| |
| TEST_F(HandlerCalledTest, IEDirectiveCalled2) { |
| // See http://code.google.com/p/modpagespeed/issues/detail?id=136 and |
| // http://msdn.microsoft.com/en-us/library/ms537512(VS.85).aspx#dlrevealed |
| Parse("ie_directive_called", "<!--[if lte IE 8]>...<![endif]-->"); |
| EXPECT_FALSE(handler_called_filter_.called_comment_.Test()); |
| EXPECT_TRUE(handler_called_filter_.called_ie_directive_.Test()); |
| EXPECT_FALSE(first_event_listener_->called_comment_.Test()); |
| EXPECT_TRUE(first_event_listener_->called_ie_directive_.Test()); |
| EXPECT_FALSE(second_event_listener_->called_comment_.Test()); |
| EXPECT_TRUE(second_event_listener_->called_ie_directive_.Test()); |
| } |
| |
| TEST_F(HandlerCalledTest, IEDirectiveCalled3) { |
| Parse("ie_directive_called", "<!--[if false]>...<![endif]-->"); |
| EXPECT_FALSE(handler_called_filter_.called_comment_.Test()); |
| EXPECT_TRUE(handler_called_filter_.called_ie_directive_.Test()); |
| EXPECT_FALSE(first_event_listener_->called_comment_.Test()); |
| EXPECT_TRUE(first_event_listener_->called_ie_directive_.Test()); |
| EXPECT_FALSE(second_event_listener_->called_comment_.Test()); |
| EXPECT_TRUE(second_event_listener_->called_ie_directive_.Test()); |
| } |
| |
| // Downlevel-revealed commments normally look like <![if foo]>...<![endif]>. |
| // However, although most (non-IE) browsers will ignore those, they're |
| // technically not valid, so some sites use the below trick (which is valid |
| // HTML, and still works for IE). For an explanation, see |
| // http://en.wikipedia.org/wiki/Conditional_comment# |
| // Downlevel-revealed_conditional_comment |
| TEST_F(HandlerCalledTest, IEDirectiveCalledRevealedOpen) { |
| Parse("ie_directive_called", "<!--[if !IE]><!-->"); |
| EXPECT_FALSE(handler_called_filter_.called_comment_.Test()); |
| EXPECT_TRUE(handler_called_filter_.called_ie_directive_.Test()); |
| EXPECT_FALSE(first_event_listener_->called_comment_.Test()); |
| EXPECT_TRUE(first_event_listener_->called_ie_directive_.Test()); |
| EXPECT_FALSE(second_event_listener_->called_comment_.Test()); |
| EXPECT_TRUE(second_event_listener_->called_ie_directive_.Test()); |
| } |
| TEST_F(HandlerCalledTest, IEDirectiveCalledRevealedClose) { |
| Parse("ie_directive_called", "<!--<![endif]-->"); |
| EXPECT_FALSE(handler_called_filter_.called_comment_.Test()); |
| EXPECT_TRUE(handler_called_filter_.called_ie_directive_.Test()); |
| EXPECT_FALSE(first_event_listener_->called_comment_.Test()); |
| EXPECT_TRUE(first_event_listener_->called_ie_directive_.Test()); |
| EXPECT_FALSE(second_event_listener_->called_comment_.Test()); |
| EXPECT_TRUE(second_event_listener_->called_ie_directive_.Test()); |
| } |
| |
| // Unit tests for event-list manipulation. In these tests, we do not parse |
| // HTML input text, but instead create two 'Characters' nodes and use the |
| // event-list manipulation methods and make sure they render as expected. |
| class EventListManipulationTest : public HtmlParseTest { |
| protected: |
| EventListManipulationTest() { } |
| |
| virtual void SetUp() { |
| HtmlParseTest::SetUp(); |
| static const char kUrl[] = "http://html.parse.test/event_list_test.html"; |
| ASSERT_TRUE(html_parse_.StartParse(kUrl)); |
| node1_ = html_parse_.NewCharactersNode(NULL, "1"); |
| HtmlTestingPeer::AddEvent(&html_parse_, |
| new HtmlCharactersEvent(node1_, -1)); |
| node2_ = html_parse_.NewCharactersNode(NULL, "2"); |
| node3_ = html_parse_.NewCharactersNode(NULL, "3"); |
| // Note: the last 2 are not added in SetUp. |
| } |
| |
| virtual void TearDown() { |
| html_parse_.FinishParse(); |
| HtmlParseTest::TearDown(); |
| } |
| |
| void CheckExpected(const GoogleString& expected) { |
| SetupWriter(); |
| html_parse()->ApplyFilter(html_writer_filter_.get()); |
| EXPECT_EQ(expected, output_buffer_); |
| } |
| |
| HtmlCharactersNode* node1_; |
| HtmlCharactersNode* node2_; |
| HtmlCharactersNode* node3_; |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(EventListManipulationTest); |
| }; |
| |
| TEST_F(EventListManipulationTest, TestReplace) { |
| EXPECT_TRUE(html_parse_.ReplaceNode(node1_, node2_)); |
| CheckExpected("2"); |
| } |
| |
| TEST_F(EventListManipulationTest, TestInsertNodeBeforeNode) { |
| HtmlTestingPeer::set_coalesce_characters(&html_parse_, false); |
| html_parse_.InsertNodeBeforeNode(node1_, node2_); |
| CheckExpected("21"); |
| html_parse_.InsertNodeBeforeNode(node1_, node3_); |
| CheckExpected("231"); |
| } |
| |
| TEST_F(EventListManipulationTest, TestInsertNodeAfterNode) { |
| HtmlTestingPeer::set_coalesce_characters(&html_parse_, false); |
| html_parse_.InsertNodeAfterNode(node1_, node2_); |
| CheckExpected("12"); |
| html_parse_.InsertNodeAfterNode(node1_, node3_); |
| CheckExpected("132"); |
| } |
| |
| TEST_F(EventListManipulationTest, TestInsertNodeBeforeCurrent) { |
| HtmlTestingPeer::set_coalesce_characters(&html_parse_, false); |
| html_parse_.InsertNodeBeforeCurrent(node2_); |
| // Current is left at queue_.end() after the AddEvent. |
| CheckExpected("12"); |
| |
| HtmlTestingPeer::SetCurrent(&html_parse_, node1_); |
| html_parse_.InsertNodeBeforeCurrent(node3_); |
| CheckExpected("312"); |
| } |
| |
| TEST_F(EventListManipulationTest, TestInsertNodeAfterCurrent) { |
| HtmlTestingPeer::set_coalesce_characters(&html_parse_, false); |
| HtmlTestingPeer::SetCurrent(&html_parse_, node1_); |
| html_parse_.InsertNodeAfterCurrent(node2_); |
| // Note that if we call CheckExpected here it will mutate current_. |
| html_parse_.InsertNodeAfterCurrent(node3_); |
| CheckExpected("123"); |
| } |
| |
| TEST_F(EventListManipulationTest, TestDeleteOnly) { |
| html_parse_.DeleteNode(node1_); |
| CheckExpected(""); |
| } |
| |
| TEST_F(EventListManipulationTest, TestDeleteFirst) { |
| HtmlTestingPeer::set_coalesce_characters(&html_parse_, false); |
| HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node2_, -1)); |
| HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node3_, -1)); |
| html_parse_.DeleteNode(node1_); |
| CheckExpected("23"); |
| html_parse_.DeleteNode(node2_); |
| CheckExpected("3"); |
| html_parse_.DeleteNode(node3_); |
| CheckExpected(""); |
| } |
| |
| TEST_F(EventListManipulationTest, TestDeleteLast) { |
| HtmlTestingPeer::set_coalesce_characters(&html_parse_, false); |
| HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node2_, -1)); |
| HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node3_, -1)); |
| html_parse_.DeleteNode(node3_); |
| CheckExpected("12"); |
| html_parse_.DeleteNode(node2_); |
| CheckExpected("1"); |
| html_parse_.DeleteNode(node1_); |
| CheckExpected(""); |
| } |
| |
| TEST_F(EventListManipulationTest, TestDeleteMiddle) { |
| HtmlTestingPeer::set_coalesce_characters(&html_parse_, false); |
| HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node2_, -1)); |
| HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node3_, -1)); |
| html_parse_.DeleteNode(node2_); |
| CheckExpected("13"); |
| } |
| |
| // Note that an unconditionally sanity check runs after every |
| // filter, verifying that all the parent-pointers are correct. |
| // CheckExpected applies the HtmlWriterFilter, so it runs the |
| // parent-pointer check. |
| TEST_F(EventListManipulationTest, TestAddParentToSequence) { |
| HtmlTestingPeer::set_coalesce_characters(&html_parse_, false); |
| HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node2_, -1)); |
| HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node3_, -1)); |
| HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv); |
| EXPECT_TRUE(html_parse_.AddParentToSequence(node1_, node3_, div)); |
| CheckExpected("<div>123</div>"); |
| |
| // Now interpose a span between the div and the Characters nodes. |
| HtmlElement* span = html_parse_.NewElement(div, HtmlName::kSpan); |
| EXPECT_TRUE(html_parse_.AddParentToSequence(node1_, node2_, span)); |
| CheckExpected("<div><span>12</span>3</div>"); |
| |
| // Next, add an HTML block above the div. Note that we pass 'div' in as |
| // both 'first' and 'last'. |
| HtmlElement* html = html_parse_.NewElement(NULL, HtmlName::kHtml); |
| EXPECT_TRUE(html_parse_.AddParentToSequence(div, div, html)); |
| CheckExpected("<html><div><span>12</span>3</div></html>"); |
| } |
| |
| TEST_F(EventListManipulationTest, TestPrependChild) { |
| HtmlTestingPeer::set_coalesce_characters(&html_parse_, false); |
| HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv); |
| html_parse_.InsertNodeBeforeCurrent(div); |
| CheckExpected("1<div></div>"); |
| |
| html_parse_.PrependChild(div, node2_); |
| CheckExpected("1<div>2</div>"); |
| html_parse_.PrependChild(div, node3_); |
| CheckExpected("1<div>32</div>"); |
| |
| // TODO(sligocki): Test with elements that don't explicitly end like image. |
| } |
| |
| TEST_F(EventListManipulationTest, TestAppendChild) { |
| HtmlTestingPeer::set_coalesce_characters(&html_parse_, false); |
| HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv); |
| html_parse_.InsertNodeBeforeCurrent(div); |
| CheckExpected("1<div></div>"); |
| |
| html_parse_.AppendChild(div, node2_); |
| CheckExpected("1<div>2</div>"); |
| html_parse_.AppendChild(div, node3_); |
| CheckExpected("1<div>23</div>"); |
| |
| // TODO(sligocki): Test with elements that don't explicitly end like image. |
| } |
| |
| TEST_F(EventListManipulationTest, TestAddParentToSequenceDifferentParents) { |
| HtmlTestingPeer::set_coalesce_characters(&html_parse_, false); |
| HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node2_, -1)); |
| HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv); |
| EXPECT_TRUE(html_parse_.AddParentToSequence(node1_, node2_, div)); |
| CheckExpected("<div>12</div>"); |
| HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node3_, -1)); |
| CheckExpected("<div>12</div>3"); |
| EXPECT_FALSE(html_parse_.AddParentToSequence(node2_, node3_, div)); |
| } |
| |
| TEST_F(EventListManipulationTest, TestDeleteGroup) { |
| HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node2_, -1)); |
| HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv); |
| EXPECT_TRUE(html_parse_.AddParentToSequence(node1_, node2_, div)); |
| CheckExpected("<div>12</div>"); |
| html_parse_.DeleteNode(div); |
| CheckExpected(""); |
| } |
| |
| TEST_F(EventListManipulationTest, TestMoveElementIntoParent1) { |
| HtmlElement* head = html_parse_.NewElement(NULL, HtmlName::kHead); |
| EXPECT_TRUE(html_parse_.AddParentToSequence(node1_, node1_, head)); |
| CheckExpected("<head>1</head>"); |
| HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node2_, -1)); |
| HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv); |
| EXPECT_TRUE(html_parse_.AddParentToSequence(node2_, node2_, div)); |
| CheckExpected("<head>1</head><div>2</div>"); |
| HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node3_, -1)); |
| CheckExpected("<head>1</head><div>2</div>3"); |
| HtmlTestingPeer::SetCurrent(&html_parse_, div); |
| EXPECT_TRUE(html_parse_.MoveCurrentInto(head)); |
| CheckExpected("<head>1<div>2</div></head>3"); |
| } |
| |
| TEST_F(EventListManipulationTest, TestMoveElementIntoParent2) { |
| HtmlTestingPeer::set_coalesce_characters(&html_parse_, false); |
| HtmlElement* head = html_parse_.NewElement(NULL, HtmlName::kHead); |
| EXPECT_TRUE(html_parse_.AddParentToSequence(node1_, node1_, head)); |
| CheckExpected("<head>1</head>"); |
| HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node2_, -1)); |
| HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node3_, -1)); |
| CheckExpected("<head>1</head>23"); |
| HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv); |
| EXPECT_TRUE(html_parse_.AddParentToSequence(node3_, node3_, div)); |
| CheckExpected("<head>1</head>2<div>3</div>"); |
| HtmlTestingPeer::SetCurrent(&html_parse_, div); |
| EXPECT_TRUE(html_parse_.MoveCurrentInto(head)); |
| CheckExpected("<head>1<div>3</div></head>2"); |
| EXPECT_TRUE(html_parse_.DeleteSavingChildren(div)); |
| CheckExpected("<head>13</head>2"); |
| EXPECT_TRUE(html_parse_.DeleteSavingChildren(head)); |
| CheckExpected("132"); |
| } |
| |
| TEST_F(EventListManipulationTest, TestDeleteSavingChildrenEnd) { |
| HtmlTestingPeer::set_coalesce_characters(&html_parse_, false); |
| HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv); |
| EXPECT_TRUE(html_parse_.AddParentToSequence(node1_, node1_, div)); |
| CheckExpected("<div>1</div>"); |
| EXPECT_TRUE(html_parse_.DeleteSavingChildren(div)); |
| CheckExpected("1"); |
| } |
| |
| TEST_F(EventListManipulationTest, TestMoveCurrentBefore) { |
| // Setup events. |
| HtmlTestingPeer::set_coalesce_characters(&html_parse_, false); |
| HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node2_, -1)); |
| HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv); |
| EXPECT_TRUE(html_parse_.AddParentToSequence(node1_, node2_, div)); |
| HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node3_, -1)); |
| CheckExpected("<div>12</div>3"); |
| HtmlTestingPeer::SetCurrent(&html_parse_, node3_); |
| |
| // Test MoveCurrentBefore(). |
| EXPECT_TRUE(html_parse_.MoveCurrentBefore(node2_)); |
| CheckExpected("<div>132</div>"); |
| |
| #ifdef NDEBUG |
| // Test that current_ pointing to end() does not crash in non-debug build. |
| // In debug build, there is a LOG(DFATAL), so we cannot run this. |
| // NOTE: We do not expect this case ever to happen in normal code. |
| EXPECT_FALSE(html_parse_.MoveCurrentBefore(node2_)); |
| CheckExpected("<div>132</div>"); |
| #endif |
| |
| // Test that current_ pointing to a containing object will not work. |
| HtmlElement* span = html_parse_.NewElement(NULL, HtmlName::kSpan); |
| EXPECT_TRUE(html_parse_.AddParentToSequence(div, div, span)); |
| CheckExpected("<span><div>132</div></span>"); |
| HtmlTestingPeer::SetCurrent(&html_parse_, span); |
| |
| EXPECT_FALSE(html_parse_.MoveCurrentBefore(node2_)); |
| CheckExpected("<span><div>132</div></span>"); |
| } |
| |
| TEST_F(EventListManipulationTest, TestCoalesceOnAdd) { |
| CheckExpected("1"); |
| HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node2_, -1)); |
| CheckExpected("12"); |
| |
| // this will coalesce node1 and node2 togethers. So there is only |
| // one node1_="12", and node2_ is gone. Deleting node1_ will now |
| // leave us empty |
| html_parse_.DeleteNode(node1_); |
| CheckExpected(""); |
| } |
| |
| TEST_F(EventListManipulationTest, TestCoalesceOnDelete) { |
| CheckExpected("1"); |
| HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv); |
| html_parse_.AddElement(div, -1); |
| HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node2_, -1)); |
| HtmlTestingPeer testing_peer; |
| testing_peer.SetNodeParent(node2_, div); |
| html_parse_.CloseElement(div, HtmlElement::EXPLICIT_CLOSE, -1); |
| HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node3_, -1)); |
| CheckExpected("1<div>2</div>3"); |
| |
| // Removing the div, leaving the children intact... |
| EXPECT_TRUE(html_parse_.DeleteSavingChildren(div)); |
| CheckExpected("123"); |
| |
| // At this point, node1, node2, and node3 are automatically coalesced. |
| // This means when we remove node1, all the content will disappear. |
| html_parse_.DeleteNode(node1_); |
| CheckExpected(""); |
| } |
| |
| TEST_F(EventListManipulationTest, TestHasChildren) { |
| CheckExpected("1"); |
| HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv); |
| html_parse_.AddElement(div, -1); |
| EXPECT_FALSE(html_parse_.HasChildrenInFlushWindow(div)); |
| HtmlTestingPeer::AddEvent(&html_parse_, new HtmlCharactersEvent(node2_, -1)); |
| HtmlTestingPeer testing_peer; |
| testing_peer.SetNodeParent(node2_, div); |
| |
| // Despite having added a new element into the stream, the div is not |
| // closed yet, so it's not recognized as a child. |
| EXPECT_FALSE(html_parse_.HasChildrenInFlushWindow(div)); |
| |
| html_parse_.CloseElement(div, HtmlElement::EXPLICIT_CLOSE, -1); |
| EXPECT_TRUE(html_parse_.HasChildrenInFlushWindow(div)); |
| EXPECT_TRUE(html_parse_.DeleteNode(node2_)); |
| EXPECT_FALSE(html_parse_.HasChildrenInFlushWindow(div)); |
| } |
| |
| TEST_F(EventListManipulationTest, AppendComment) { |
| html_parse_.InsertComment("hello"); |
| CheckExpected("1<!--hello-->"); |
| } |
| |
| TEST_F(EventListManipulationTest, AppendCommentWithEscaping) { |
| html_parse_.InsertComment("<i>hello</i> <!--world-->"); |
| CheckExpected("1<!--<i>hello</i> <!--world-->-->"); |
| } |
| |
| TEST_F(EventListManipulationTest, CommentBeforeDiv1) { |
| HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv); |
| html_parse_.AddElement(div, -1); |
| html_parse_.InsertComment("hello"); |
| html_parse_.CloseElement(div, HtmlElement::EXPLICIT_CLOSE, -1); |
| CheckExpected("1<!--hello--><div></div>"); |
| } |
| |
| TEST_F(EventListManipulationTest, CommentBeforeDiv2) { |
| HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv); |
| html_parse_.InsertComment("hello"); |
| html_parse_.AddElement(div, -1); |
| html_parse_.CloseElement(div, HtmlElement::EXPLICIT_CLOSE, -1); |
| CheckExpected("1<!--hello--><div></div>"); |
| } |
| |
| TEST_F(EventListManipulationTest, CommentAfterDiv) { |
| HtmlElement* div = html_parse_.NewElement(NULL, HtmlName::kDiv); |
| html_parse_.AddElement(div, -1); |
| html_parse_.CloseElement(div, HtmlElement::EXPLICIT_CLOSE, -1); |
| html_parse_.InsertComment("hello"); |
| CheckExpected("1<div></div><!--hello-->"); |
| } |
| |
| TEST_F(EventListManipulationTest, CommentAfterFirstDiv) { |
| HtmlElement* div1 = html_parse_.NewElement(NULL, HtmlName::kDiv); |
| html_parse_.AddElement(div1, -1); |
| html_parse_.CloseElement(div1, HtmlElement::EXPLICIT_CLOSE, -1); |
| HtmlElement* div2 = html_parse_.NewElement(NULL, HtmlName::kDiv); |
| html_parse_.AddElement(div2, -1); |
| html_parse_.CloseElement(div2, HtmlElement::EXPLICIT_CLOSE, -1); |
| HtmlTestingPeer::SetCurrent(&html_parse_, div1); |
| html_parse_.InsertComment("hello"); |
| CheckExpected("1<div></div><!--hello--><div></div>"); |
| } |
| |
| class InsertCommentOnFirstDivFilter : public EmptyHtmlFilter { |
| public: |
| InsertCommentOnFirstDivFilter(bool at_start, HtmlParse* parse) |
| : html_parse_(parse), |
| at_start_(at_start), |
| first_(true) { |
| } |
| |
| virtual void StartDocument() { first_ = true; } |
| virtual void StartElement(HtmlElement* element) { Insert(true, element); } |
| virtual void EndElement(HtmlElement* element) { Insert(false, element); } |
| virtual const char* Name() const { return "InsertCommentOnFirstDivFilter"; } |
| |
| private: |
| void Insert(bool at_start, HtmlElement* element) { |
| if (first_ && (at_start == at_start_) && |
| (element->keyword() == HtmlName::kDiv)) { |
| html_parse_->InsertComment("hello"); |
| first_ = false; |
| } |
| } |
| |
| |
| private: |
| HtmlParse* html_parse_; |
| bool at_start_; |
| bool first_; |
| |
| DISALLOW_COPY_AND_ASSIGN(InsertCommentOnFirstDivFilter); |
| }; |
| |
| TEST_F(HtmlParseTestNoBody, CommentInsideFirstDiv) { |
| InsertCommentOnFirstDivFilter insert_at_first_div(true, &html_parse_); |
| html_parse_.AddFilter(&insert_at_first_div); |
| SetupWriter(); |
| ValidateExpected("comment_inside_first_div", |
| "1<div>2</div>3<div>4</div>5", |
| "1<!--hello--><div>2</div>3<div>4</div>5"); |
| } |
| |
| TEST_F(HtmlParseTestNoBody, CommentAfterFirstDiv) { |
| InsertCommentOnFirstDivFilter insert_at_first_div(false, &html_parse_); |
| html_parse_.AddFilter(&insert_at_first_div); |
| SetupWriter(); |
| ValidateExpected("comment_inside_first_div", |
| "1<div>2</div>3<div>4</div>5", |
| "1<div>2</div><!--hello-->3<div>4</div>5"); |
| } |
| |
| TEST_F(HtmlParseTestNoBody, InsertCommentFromEmpty) { |
| html_parse_.InsertComment("hello"); |
| SetupWriter(); |
| html_parse()->ApplyFilter(html_writer_filter_.get()); |
| EXPECT_EQ("<!--hello-->", output_buffer_); |
| } |
| |
| TEST_F(HtmlParseTestNoBody, InsertCommentFromFlushInLargeCharactersBlock) { |
| SetupWriter(); |
| html_parse_.StartParse("http://test.com/blank_flush.html"); |
| html_parse_.ParseText("<style>bytes:"); |
| // This should be inserted before <style>. |
| EXPECT_TRUE(html_parse_.InsertComment("FLUSH1")); |
| html_parse_.Flush(); |
| html_parse_.ParseText(":more:"); |
| html_parse_.Flush(); |
| html_parse_.ParseText(":still more:"); |
| // We are inside a literal block, so it's not safe to insert a comment here. |
| // This should not show up in output_buffer_. |
| EXPECT_FALSE(html_parse_.InsertComment("FLUSH2")); |
| html_parse_.Flush(); |
| html_parse_.ParseText(":final bytes:</style>"); |
| EXPECT_TRUE(html_parse_.InsertComment("FLUSH3")); |
| html_parse_.FinishParse(); |
| |
| EXPECT_EQ("<!--FLUSH1--><style>bytes::more::still more::final bytes:</style>" |
| "<!--FLUSH3-->", |
| output_buffer_); |
| } |
| |
| TEST_F(HtmlParseTestNoBody, InsertCommentFromFlushInEmptyCharactersBlock) { |
| SetupWriter(); |
| html_parse_.StartParse("http://test.com/blank_flush.html"); |
| html_parse_.ParseText("<style>"); |
| // This should be inserted before <style>. |
| EXPECT_TRUE(html_parse_.InsertComment("FLUSH1")); |
| EXPECT_TRUE(html_parse_.InsertComment("FLUSH2")); |
| html_parse_.Flush(); |
| html_parse_.ParseText("</style>"); |
| EXPECT_TRUE(html_parse_.InsertComment("FLUSH3")); |
| html_parse_.FinishParse(); |
| |
| EXPECT_EQ("<!--FLUSH1--><!--FLUSH2--><style></style><!--FLUSH3-->", |
| output_buffer_); |
| } |
| |
| // Unit tests for attribute manipulation. |
| // Goal is to make sure we don't (eg) read deallocated storage |
| // while manipulating attribute values. |
| class AttributeManipulationTest : public HtmlParseTest { |
| protected: |
| AttributeManipulationTest() { } |
| |
| virtual void SetUp() { |
| HtmlParseTest::SetUp(); |
| static const char kUrl[] = |
| "http://html.parse.test/attribute_manipulation_test.html"; |
| ASSERT_TRUE(html_parse_.StartParse(kUrl)); |
| node_ = html_parse_.NewElement(NULL, HtmlName::kA); |
| html_parse_.AddElement(node_, 0); |
| html_parse_.AddAttribute(node_, HtmlName::kHref, "http://www.google.com/"); |
| node_->AddAttribute(html_parse_.MakeName(HtmlName::kId), "37", |
| HtmlElement::NO_QUOTE); |
| node_->AddAttribute(html_parse_.MakeName(HtmlName::kClass), "search!", |
| HtmlElement::SINGLE_QUOTE); |
| // Add a binary attribute (one without value). |
| node_->AddAttribute(html_parse_.MakeName(HtmlName::kSelected), NULL, |
| HtmlElement::NO_QUOTE); |
| html_parse_.CloseElement(node_, HtmlElement::BRIEF_CLOSE, 0); |
| } |
| |
| virtual void TearDown() { |
| html_parse_.FinishParse(); |
| HtmlParseTest::TearDown(); |
| } |
| |
| void CheckExpected(const GoogleString& expected) { |
| SetupWriter(); |
| html_parse_.ApplyFilter(html_writer_filter_.get()); |
| EXPECT_EQ(expected, output_buffer_); |
| } |
| |
| int NumAttributes(HtmlElement* element) { |
| int size = 0; |
| const HtmlElement::AttributeList& attrs = element->attributes(); |
| for (HtmlElement::AttributeConstIterator i(attrs.begin()); |
| i != attrs.end(); ++i) { |
| ++size; |
| } |
| |
| return size; |
| } |
| |
| HtmlElement::Attribute* AttributeAt(HtmlElement* element, int index) { |
| int pos = 0; |
| HtmlElement::AttributeList* attrs = element->mutable_attributes(); |
| for (HtmlElement::AttributeIterator i(attrs->begin()); |
| i != attrs->end(); ++i) { |
| if (pos == index) { |
| return i.Get(); |
| } |
| ++pos; |
| } |
| return NULL; |
| } |
| |
| HtmlElement* node_; |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(AttributeManipulationTest); |
| }; |
| |
| TEST_F(AttributeManipulationTest, PropertiesAndDeserialize) { |
| StringPiece google("http://www.google.com/"); |
| StringPiece number37("37"); |
| StringPiece search("search!"); |
| EXPECT_EQ(4, NumAttributes(node_)); |
| EXPECT_EQ(google, node_->AttributeValue(HtmlName::kHref)); |
| EXPECT_EQ(number37, node_->AttributeValue(HtmlName::kId)); |
| EXPECT_EQ(search, node_->AttributeValue(HtmlName::kClass)); |
| // Returns NULL for attributes that do not exist ... |
| EXPECT_TRUE(NULL == node_->AttributeValue(HtmlName::kNotAKeyword)); |
| // ... and for attributes which have no value. |
| EXPECT_TRUE(NULL == node_->AttributeValue(HtmlName::kSelected)); |
| // Returns NULL for attributes that do not exist. |
| EXPECT_TRUE(NULL == node_->FindAttribute(HtmlName::kNotAKeyword)); |
| // Returns an attribute reference for attributes without values. |
| HtmlElement::Attribute* selected = node_->FindAttribute(HtmlName::kSelected); |
| EXPECT_TRUE(NULL != selected); |
| EXPECT_TRUE(NULL == selected->DecodedValueOrNull()); |
| EXPECT_EQ(google, node_->AttributeValue(HtmlName::kHref)); |
| EXPECT_EQ(number37, node_->AttributeValue(HtmlName::kId)); |
| EXPECT_EQ(search, node_->AttributeValue(HtmlName::kClass)); |
| EXPECT_EQ(google, node_->FindAttribute(HtmlName::kHref)->escaped_value()); |
| EXPECT_EQ(number37, node_->FindAttribute(HtmlName::kId)->escaped_value()); |
| EXPECT_EQ(search, node_->FindAttribute(HtmlName::kClass)->escaped_value()); |
| CheckExpected("<a href=\"http://www.google.com/\" id=37 class='search!'" |
| " selected />"); |
| } |
| |
| TEST_F(AttributeManipulationTest, AddAttribute) { |
| html_parse_.AddAttribute(node_, HtmlName::kLang, "ENG-US"); |
| CheckExpected("<a href=\"http://www.google.com/\" id=37 class='search!'" |
| " selected lang=\"ENG-US\"/>"); |
| } |
| |
| TEST_F(AttributeManipulationTest, DeleteAttribute) { |
| node_->DeleteAttribute(HtmlName::kId); |
| CheckExpected("<a href=\"http://www.google.com/\" class='search!'" |
| " selected />"); |
| node_->DeleteAttribute(HtmlName::kSelected); |
| CheckExpected("<a href=\"http://www.google.com/\" class='search!'/>"); |
| } |
| |
| TEST_F(AttributeManipulationTest, ModifyAttribute) { |
| HtmlElement::Attribute* href = |
| node_->FindAttribute(HtmlName::kHref); |
| EXPECT_TRUE(href != NULL); |
| href->SetValue("google"); |
| href->set_quote_style(HtmlElement::SINGLE_QUOTE); |
| html_parse_.SetAttributeName(href, HtmlName::kSrc); |
| CheckExpected("<a src='google' id=37 class='search!' selected />"); |
| } |
| |
| TEST_F(AttributeManipulationTest, ModifyKeepAttribute) { |
| HtmlElement::Attribute* href = |
| node_->FindAttribute(HtmlName::kHref); |
| EXPECT_TRUE(href != NULL); |
| // This apparently do-nothing call to SetValue exposed an allocation bug. |
| href->SetValue(href->DecodedValueOrNull()); |
| href->set_quote_style(href->quote_style()); |
| href->set_name(href->name()); |
| CheckExpected("<a href=\"http://www.google.com/\" id=37 class='search!'" |
| " selected />"); |
| } |
| |
| TEST_F(AttributeManipulationTest, BadUrl) { |
| EXPECT_FALSE(html_parse_.StartParse(")(*&)(*&(*")); |
| |
| // To avoid having the TearDown crash, restart the parse. |
| html_parse_.StartParse("http://www.example.com"); |
| } |
| |
| TEST_F(AttributeManipulationTest, CloneElement) { |
| HtmlElement* clone = html_parse_.CloneElement(node_); |
| |
| // The clone is identical (but not the same object). |
| EXPECT_NE(clone, node_); |
| EXPECT_EQ(HtmlName::kA, clone->keyword()); |
| EXPECT_EQ(node_->style(), clone->style()); |
| EXPECT_EQ(4, NumAttributes(clone)); |
| EXPECT_EQ(HtmlName::kHref, AttributeAt(clone, 0)->keyword()); |
| EXPECT_STREQ("http://www.google.com/", |
| AttributeAt(clone, 0)->DecodedValueOrNull()); |
| EXPECT_EQ(HtmlName::kId, AttributeAt(clone, 1)->keyword()); |
| EXPECT_STREQ("37", AttributeAt(clone, 1)->DecodedValueOrNull()); |
| EXPECT_EQ(HtmlName::kClass, AttributeAt(clone, 2)->keyword()); |
| EXPECT_STREQ("search!", AttributeAt(clone, 2)->DecodedValueOrNull()); |
| EXPECT_EQ(HtmlName::kSelected, AttributeAt(clone, 3)->keyword()); |
| EXPECT_EQ(NULL, AttributeAt(clone, 3)->DecodedValueOrNull()); |
| |
| HtmlElement::Attribute* id = clone->FindAttribute(HtmlName::kId); |
| ASSERT_TRUE(id != NULL); |
| id->SetValue("38"); |
| |
| // Clone is not added initially, and the original is not touched. |
| CheckExpected("<a href=\"http://www.google.com/\" id=37 class='search!'" |
| " selected />"); |
| |
| // Looks sane when added. |
| html_parse_.InsertNodeBeforeNode(node_, clone); |
| CheckExpected("<a href=\"http://www.google.com/\" id=38 class='search!'" |
| " selected />" |
| "<a href=\"http://www.google.com/\" id=37 class='search!'" |
| " selected />"); |
| } |
| |
| TEST_F(HtmlParseTest, NoDisabledFilter) { |
| std::vector<GoogleString> disabled_filters; |
| ASSERT_TRUE(disabled_filters.empty()); |
| |
| html_parse_.SetDynamicallyDisabledFilterList(&disabled_filters); |
| |
| DisableTestFilter filter("not_disabled_filter", true, "Ignored reason"); |
| html_parse_.AddFilter(&filter); |
| |
| Parse("not_disabled_filter", "<!-- Empty body -->"); |
| |
| EXPECT_TRUE(disabled_filters.empty()); |
| } |
| |
| TEST_F(HtmlParseTest, DisabledFilters) { |
| std::vector<GoogleString> disabled_filters; |
| ASSERT_TRUE(disabled_filters.empty()); |
| |
| html_parse_.SetDynamicallyDisabledFilterList(&disabled_filters); |
| |
| DisableTestFilter filter1("not_disabled_filter1", true, "Ignored reason"); |
| html_parse_.AddFilter(&filter1); |
| |
| DisableTestFilter disabled_filter1("disabled_filter1", false, ""); |
| html_parse_.AddFilter(&disabled_filter1); |
| |
| DisableTestFilter filter2("not_disabled_filter2", true, "Ignored reason"); |
| html_parse_.AddFilter(&filter2); |
| |
| DisableTestFilter disabled_filter2("disabled_filter2", false, ""); |
| html_parse_.AddFilter(&disabled_filter2); |
| |
| DisableTestFilter filter3("not_disabled_filter3", true, "Ignored reason"); |
| html_parse_.AddFilter(&filter3); |
| |
| Parse("disabled_filter", "<!-- Empty body -->"); |
| |
| EXPECT_THAT(disabled_filters, |
| UnorderedElementsAre(disabled_filter1.ExpectedDisabledMessage(), |
| disabled_filter2.ExpectedDisabledMessage())); |
| } |
| |
| TEST_F(HtmlParseTest, DisabledFilterWithReason) { |
| std::vector<GoogleString> disabled_filters; |
| ASSERT_TRUE(disabled_filters.empty()); |
| html_parse_.SetDynamicallyDisabledFilterList(&disabled_filters); |
| |
| const GoogleString disabled_reason("Some reason"); |
| DisableTestFilter filter("disabled_filter_with_reason", false, |
| disabled_reason); |
| html_parse_.AddFilter(&filter); |
| |
| Parse("disabled_filter_with_reason", "<!-- Empty body -->"); |
| |
| EXPECT_THAT(disabled_filters, |
| UnorderedElementsAre(filter.ExpectedDisabledMessage())); |
| } |
| |
| class CountingCallbacksFilter : public EmptyHtmlFilter { |
| public: |
| CountingCallbacksFilter() |
| : num_start_elements_(0), |
| num_end_elements_(0), |
| num_char_elements_(0) { |
| } |
| int num_start_elements() const { return num_start_elements_; } |
| int num_end_elements() const { return num_end_elements_; } |
| int num_char_elements() const { return num_char_elements_; } |
| |
| protected: |
| virtual void StartDocument() { |
| num_start_elements_ = 0; |
| num_end_elements_ = 0; |
| num_char_elements_ = 0; |
| } |
| |
| virtual void StartElement(HtmlElement* element) { |
| ++num_start_elements_; |
| } |
| |
| virtual void EndElement(HtmlElement* element) { |
| ++num_end_elements_; |
| } |
| |
| virtual void Characters(HtmlCharactersNode* characters) { |
| ++num_char_elements_; |
| } |
| |
| virtual const char* Name() const { return "CountingCallbacksFilter"; } |
| |
| private: |
| int num_start_elements_; |
| int num_end_elements_; |
| int num_char_elements_; |
| |
| DISALLOW_COPY_AND_ASSIGN(CountingCallbacksFilter); |
| }; |
| |
| // Checks that deleting nodes does not change the expected order of |
| // HTML parse events. We delete any node of del_node_type_, but we |
| // only delete it when we see a tag of type del_from_type_ (and |
| // del_from_start_tag indicates whether we do it when we see the start |
| // tag or the end tag of del_from_type). Can be configured to remove |
| // nodes using DeleteSavingChildren, DeleteNode, or |
| // MakeElementInvisible. |
| class DeleteNodesFilter : public CountingCallbacksFilter { |
| public: |
| explicit DeleteNodesFilter(HtmlParse* html_parse) |
| : html_parse_(html_parse), |
| delete_node_type_(HtmlName::kNotAKeyword), |
| delete_from_type_(HtmlName::kNotAKeyword), |
| delete_on_open_tag_(false), |
| save_children_(true), |
| make_invisible_(false), |
| num_deleted_elements_(0), |
| flushes_preventing_delete_(0) { |
| } |
| |
| void set_delete_node_type(HtmlName::Keyword keyword) { |
| delete_node_type_ = keyword; |
| } |
| |
| void set_save_children(bool x) { save_children_ = x; } |
| void set_make_invisible(bool x) { make_invisible_ = x; } |
| |
| void set_delete_from_type(HtmlName::Keyword keyword) { |
| delete_from_type_ = keyword; |
| } |
| |
| void set_delete_on_open_tag(bool del_from_start) { |
| delete_on_open_tag_ = del_from_start; |
| } |
| |
| int num_deleted_elements() const { return num_deleted_elements_; } |
| int flushes_preventing_delete() const { return flushes_preventing_delete_; } |
| |
| protected: |
| virtual void StartDocument() { |
| pending_deletes_.clear(); |
| num_deleted_elements_ = 0; |
| flushes_preventing_delete_ = 0; |
| // Note: we do not clear save_children_ or make_invisible_ here because |
| // we re-use these settings when repeating tests with different flush |
| // windows. |
| } |
| |
| virtual void StartElement(HtmlElement* element) { |
| CountingCallbacksFilter::StartElement(element); |
| if (element->keyword() == delete_node_type_) { |
| pending_deletes_.push_back(element); |
| } |
| if (delete_on_open_tag_ && element->keyword() == delete_from_type_) { |
| DeleteElements(); |
| } |
| } |
| |
| virtual void EndElement(HtmlElement* element) { |
| CountingCallbacksFilter::EndElement(element); |
| if (!delete_on_open_tag_ && element->keyword() == delete_from_type_) { |
| DeleteElements(); |
| } |
| } |
| |
| virtual void Flush() { |
| // We can't delete an element that has been flushed. |
| for (int i = 0, n = pending_deletes_.size(); i < n; ++i) { |
| ++flushes_preventing_delete_; |
| } |
| pending_deletes_.clear(); |
| } |
| |
| virtual const char* Name() const { return "DeleteNodesFilter"; } |
| |
| private: |
| void DeleteElements() { |
| for (int i = 0, n = pending_deletes_.size(); i < n; ++i) { |
| HtmlElement* element = pending_deletes_[i]; |
| bool success = make_invisible_ |
| ? html_parse_->MakeElementInvisible(element) |
| : (save_children_ |
| ? html_parse_->DeleteSavingChildren(element) |
| : html_parse_->DeleteNode(element)); |
| if (success) { |
| ++num_deleted_elements_; |
| } |
| } |
| pending_deletes_.clear(); |
| } |
| |
| HtmlParse* html_parse_; |
| std::vector<HtmlElement*> pending_deletes_; |
| HtmlName::Keyword delete_node_type_; |
| HtmlName::Keyword delete_from_type_; |
| bool delete_on_open_tag_; |
| bool save_children_; |
| bool make_invisible_; |
| int num_deleted_elements_; |
| int flushes_preventing_delete_; |
| |
| DISALLOW_COPY_AND_ASSIGN(DeleteNodesFilter); |
| }; |
| |
| class HtmlParseDeleteTest : public HtmlParseTest { |
| protected: |
| HtmlParseDeleteTest() |
| : delete_filter_(html_parse()), |
| total_successes_(0), |
| total_failures_(0) { |
| html_parse()->AddFilter(&delete_filter_); |
| SetupWriter(); |
| } |
| |
| void DeleteTest(StringPiece input, |
| StringPiece expected_output_if_deletes_worked) { |
| for (int i = 0, n = input.size(); i < n; ++i) { |
| ParseWithFlush(input, i); |
| if (delete_filter_.num_deleted_elements() != 0) { |
| EXPECT_STREQ(expected_output_if_deletes_worked, |
| output_buffer_) << " flush " << i; |
| ++total_successes_; |
| } else { |
| EXPECT_STREQ(input, output_buffer_) << " flush " << i; |
| ++total_failures_; |
| } |
| output_buffer_.clear(); |
| } |
| } |
| |
| DeleteNodesFilter delete_filter_; |
| int total_successes_; |
| int total_failures_; |
| }; |
| |
| TEST_F(HtmlParseDeleteTest, DeleteAtStartAcrossFlush) { |
| delete_filter_.set_delete_on_open_tag(true); |
| delete_filter_.set_save_children(false); |
| delete_filter_.set_delete_node_type(HtmlName::kDiv); |
| delete_filter_.set_delete_from_type(HtmlName::kDiv); |
| const StringPiece kInput("1<div id=a>hello</div>2"); |
| DeleteTest(kInput, "12"); |
| |
| // We can utilize the infrastructure in DeferCurrentNode to make it |
| // possible to delete nodes from their StartElement even if their |
| // EndElement is not in the flush window |
| EXPECT_EQ(0, total_failures_); |
| |
| // If the both the StartElement and EndElement are visible, then |
| // we should successfully eliminate the div and its contents. That |
| // will happen every time. |
| EXPECT_EQ(kInput.size(), total_successes_); |
| } |
| |
| TEST_F(HtmlParseDeleteTest, DeleteAtEndAcrossFlush) { |
| delete_filter_.set_delete_on_open_tag(false); |
| delete_filter_.set_save_children(false); |
| delete_filter_.set_delete_node_type(HtmlName::kDiv); |
| delete_filter_.set_delete_from_type(HtmlName::kDiv); |
| DeleteTest("1<div id=a>hello</div>2", "12"); |
| |
| // If the flush happened in the middle of the div, then we will |
| // fail. That will happen at least sometimes. |
| EXPECT_LT(0, total_failures_); |
| |
| // If the both the StartElement and EndElement are visible, then |
| // we should successfully eliminate the div and its contents. That |
| // will happen at least sometimes. |
| EXPECT_LT(0, total_successes_); |
| } |
| |
| TEST_F(HtmlParseDeleteTest, InvisibleAtStart) { |
| delete_filter_.set_delete_on_open_tag(true); |
| delete_filter_.set_make_invisible(true); |
| delete_filter_.set_delete_node_type(HtmlName::kDiv); |
| delete_filter_.set_delete_from_type(HtmlName::kDiv); |
| const StringPiece kInput("1<div id=a>hello</div>2"); |
| DeleteTest(kInput, "1hello2"); |
| |
| // It is always possible to make nodes invisible as long as their |
| // StartElement has not been flushed. |
| EXPECT_EQ(0, total_failures_); |
| EXPECT_EQ(kInput.size(), total_successes_); |
| } |
| |
| TEST_F(HtmlParseDeleteTest, InvisibleAtEnd) { |
| delete_filter_.set_delete_on_open_tag(false); |
| delete_filter_.set_make_invisible(true); |
| delete_filter_.set_delete_node_type(HtmlName::kDiv); |
| delete_filter_.set_delete_from_type(HtmlName::kDiv); |
| DeleteTest("1<div id=a>hello</div>2", "1hello2"); |
| |
| // If the flush happened in the middle of the div, then we will |
| // fail. That will happen at least sometimes. |
| EXPECT_LT(0, total_failures_); |
| |
| // If the both the StartElement and EndElement are visible, then |
| // we should successfully eliminate the div and its contents. That |
| // will happen at least sometimes. |
| EXPECT_LT(0, total_successes_); |
| } |
| |
| class EventListOrderTest : public HtmlParseTest { |
| protected: |
| EventListOrderTest() |
| : delete_nodes_filter_(&html_parse_) { |
| html_parse_.AddFilter(&delete_nodes_filter_); |
| } |
| |
| virtual bool AddBody() const { return false; } |
| virtual bool AddHtmlTags() const { return false; } |
| |
| DeleteNodesFilter delete_nodes_filter_; |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(EventListOrderTest); |
| }; |
| |
| TEST_F(EventListOrderTest, DeleteSavingChildrenCalledOnOpen) { |
| delete_nodes_filter_.set_delete_on_open_tag(true); |
| delete_nodes_filter_.set_delete_node_type(HtmlName::kDiv); |
| delete_nodes_filter_.set_delete_from_type(HtmlName::kDiv); |
| ValidateExpected("delete_saving_children_open", |
| "<div><p>1</p></div><span>2</span>", |
| "<p>1</p><span>2</span>"); |
| EXPECT_EQ(delete_nodes_filter_.num_start_elements(), 3); |
| EXPECT_EQ(delete_nodes_filter_.num_end_elements(), 2); |
| EXPECT_EQ(delete_nodes_filter_.num_char_elements(), 2); |
| EXPECT_EQ(delete_nodes_filter_.num_deleted_elements(), 1); |
| } |
| |
| TEST_F(EventListOrderTest, DeleteSavingChildrenCalledOnClose) { |
| delete_nodes_filter_.set_delete_on_open_tag(false); |
| delete_nodes_filter_.set_delete_node_type(HtmlName::kDiv); |
| delete_nodes_filter_.set_delete_from_type(HtmlName::kDiv); |
| ValidateExpected("delete_saving_children_close", |
| "<div><p>1</p></div><span>2</span>", |
| "<p>1</p><span>2</span>"); |
| EXPECT_EQ(delete_nodes_filter_.num_start_elements(), 3); |
| EXPECT_EQ(delete_nodes_filter_.num_end_elements(), 3); |
| EXPECT_EQ(delete_nodes_filter_.num_char_elements(), 2); |
| EXPECT_EQ(delete_nodes_filter_.num_deleted_elements(), 1); |
| } |
| |
| TEST_F(EventListOrderTest, DeleteSavingChildrenCalledInner) { |
| delete_nodes_filter_.set_delete_on_open_tag(true); |
| delete_nodes_filter_.set_delete_node_type(HtmlName::kDiv); |
| delete_nodes_filter_.set_delete_from_type(HtmlName::kP); |
| ValidateExpected("delete_saving_children_inner", |
| "<div><p>1</p></div><span>2</span>", |
| "<p>1</p><span>2</span>"); |
| EXPECT_EQ(delete_nodes_filter_.num_start_elements(), 3); |
| EXPECT_EQ(delete_nodes_filter_.num_end_elements(), 2); |
| EXPECT_EQ(delete_nodes_filter_.num_char_elements(), 2); |
| EXPECT_EQ(delete_nodes_filter_.num_deleted_elements(), 1); |
| } |
| |
| TEST_F(EventListOrderTest, DeleteSavingChildrenCalledOuter) { |
| delete_nodes_filter_.set_delete_on_open_tag(true); |
| delete_nodes_filter_.set_delete_node_type(HtmlName::kDiv); |
| delete_nodes_filter_.set_delete_from_type(HtmlName::kSpan); |
| ValidateExpected("delete_saving_children_outer", |
| "<div><p>1</p></div><span>2</span>", |
| "<p>1</p><span>2</span>"); |
| EXPECT_EQ(delete_nodes_filter_.num_start_elements(), 3); |
| EXPECT_EQ(delete_nodes_filter_.num_end_elements(), 3); |
| EXPECT_EQ(delete_nodes_filter_.num_char_elements(), 2); |
| EXPECT_EQ(delete_nodes_filter_.num_deleted_elements(), 1); |
| } |
| |
| TEST_F(EventListOrderTest, DeleteSavingChildrenCalledInnerMiddle) { |
| delete_nodes_filter_.set_delete_on_open_tag(false); |
| delete_nodes_filter_.set_delete_node_type(HtmlName::kDiv); |
| delete_nodes_filter_.set_delete_from_type(HtmlName::kP); |
| ValidateExpected("delete_saving_children_inner_middle", |
| "<div><p>1</p>2<span>3</span></div><span>4</span>", |
| "<p>1</p>2<span>3</span><span>4</span>"); |
| EXPECT_EQ(delete_nodes_filter_.num_start_elements(), 4); |
| EXPECT_EQ(delete_nodes_filter_.num_end_elements(), 3); |
| EXPECT_EQ(delete_nodes_filter_.num_char_elements(), 4); |
| EXPECT_EQ(delete_nodes_filter_.num_deleted_elements(), 1); |
| } |
| |
| TEST_F(EventListOrderTest, DeleteSavingChildrenCalledInnerEnd) { |
| delete_nodes_filter_.set_delete_on_open_tag(false); |
| delete_nodes_filter_.set_delete_node_type(HtmlName::kDiv); |
| delete_nodes_filter_.set_delete_from_type(HtmlName::kP); |
| ValidateExpected("delete_saving_children_inner_end", |
| "<div><p>1</p></div><span>2</span>", |
| "<p>1</p><span>2</span>"); |
| EXPECT_EQ(delete_nodes_filter_.num_start_elements(), 3); |
| EXPECT_EQ(delete_nodes_filter_.num_end_elements(), 2); |
| EXPECT_EQ(delete_nodes_filter_.num_char_elements(), 2); |
| EXPECT_EQ(delete_nodes_filter_.num_deleted_elements(), 1); |
| } |
| |
| TEST_F(EventListOrderTest, DeleteSavingChildrenCalledInnerDeep) { |
| delete_nodes_filter_.set_delete_on_open_tag(false); |
| delete_nodes_filter_.set_delete_node_type(HtmlName::kDiv); |
| delete_nodes_filter_.set_delete_from_type(HtmlName::kP); |
| ValidateExpected("delete_saving_children_inner_deep", |
| "<div><a><p>1</p>2<span>3</span></a></div><span>4</span>", |
| "<a><p>1</p>2<span>3</span></a><span>4</span>"); |
| EXPECT_EQ(delete_nodes_filter_.num_start_elements(), 5); |
| EXPECT_EQ(delete_nodes_filter_.num_end_elements(), 4); |
| EXPECT_EQ(delete_nodes_filter_.num_char_elements(), 4); |
| EXPECT_EQ(delete_nodes_filter_.num_deleted_elements(), 1); |
| } |
| |
| TEST_F(EventListOrderTest, DeleteSavingChildrenCalledOuterDistant) { |
| delete_nodes_filter_.set_delete_on_open_tag(false); |
| delete_nodes_filter_.set_delete_node_type(HtmlName::kDiv); |
| delete_nodes_filter_.set_delete_from_type(HtmlName::kA); |
| ValidateExpected("delete_saving_children_outer_distant", |
| "<div><p>1</p></div><span>2</span><a>3</a>", |
| "<p>1</p><span>2</span><a>3</a>"); |
| EXPECT_EQ(delete_nodes_filter_.num_start_elements(), 4); |
| EXPECT_EQ(delete_nodes_filter_.num_end_elements(), 4); |
| EXPECT_EQ(delete_nodes_filter_.num_char_elements(), 3); |
| EXPECT_EQ(delete_nodes_filter_.num_deleted_elements(), 1); |
| } |
| |
| // Filter to remove nodes during parsing and restore them sometime later. |
| class RestoreNodesFilter : public CountingCallbacksFilter { |
| public: |
| explicit RestoreNodesFilter(HtmlParse* html_parse) |
| : html_parse_(html_parse), |
| outstanding_deferred_elements_(0), |
| num_deletes_(0), |
| restore_on_open_(false) { |
| } |
| |
| // Establishes the ID or text of an element to defer, and the ID of an |
| // element to move after. |
| void MoveOnStart(const char* id_or_text, const char* restore_point) { |
| remove_map_[id_or_text] = restore_point; |
| } |
| |
| void DeleteOnStart(const char* id_or_text) { |
| delete_set_.insert(id_or_text); |
| } |
| |
| void set_restore_on_open(bool restore) { |
| restore_on_open_ = restore; |
| } |
| |
| // Returns the number of nodes that have been deferred, but not yet restored. |
| bool AllRestored() const { return restore_map_.empty(); } |
| int outstanding_deferred_elements() const { |
| return outstanding_deferred_elements_; |
| } |
| int num_deletes() const { return num_deletes_; } |
| |
| protected: |
| virtual void StartDocument() { |
| CountingCallbacksFilter::StartDocument(); |
| restore_map_.clear(); |
| outstanding_deferred_elements_ = 0; |
| num_deletes_ = 0; |
| } |
| |
| virtual void Characters(HtmlCharactersNode* node) { |
| CountingCallbacksFilter::Characters(node); |
| const GoogleString& text = node->contents(); |
| if (!MaybeRemoveNode(text, node) && |
| !MaybeDeleteNode(text, node)) { |
| MaybeRestoreNode(text); |
| } |
| } |
| |
| virtual void StartElement(HtmlElement* element) { |
| CountingCallbacksFilter::StartElement(element); |
| const char* id = FindId(element); |
| if (id != NULL) { |
| if (!MaybeRemoveNode(id, element)) { |
| MaybeDeleteNode(id, element); |
| } |
| if (restore_on_open_) { |
| MaybeRestoreNode(id); |
| } |
| } |
| } |
| |
| virtual void EndElement(HtmlElement* element) { |
| CountingCallbacksFilter::EndElement(element); |
| const char* id = FindId(element); |
| if (id != NULL && !restore_on_open_) { |
| MaybeRestoreNode(id); |
| } |
| } |
| virtual const char* Name() const { return "RestoreNodesFilter"; } |
| |
| private: |
| typedef std::map<GoogleString, HtmlNode*> RestoreMap; |
| |
| const char* FindId(HtmlElement* element) { |
| const HtmlElement::Attribute* attr = element->FindAttribute("id"); |
| if (attr == NULL) { |
| return NULL; |
| } |
| return attr->DecodedValueOrNull(); |
| } |
| |
| bool MaybeRemoveNode(const GoogleString& id, HtmlNode* node) { |
| StringStringMap::iterator p = remove_map_.find(id); |
| if (p != remove_map_.end()) { |
| const GoogleString& restore_id = p->second; |
| EXPECT_TRUE(restore_map_[restore_id] == NULL); |
| restore_map_[restore_id] = node; |
| html_parse_->DeferCurrentNode(); |
| if (dynamic_cast<HtmlElement*>(node) != NULL) { |
| ++outstanding_deferred_elements_; |
| } |
| return true; |
| } |
| return false; |
| } |
| |
| bool MaybeDeleteNode(const GoogleString& id, HtmlNode* node) { |
| if (delete_set_.find(id) != delete_set_.end() && |
| html_parse_->DeleteNode(node)) { |
| ++num_deletes_; |
| return true; |
| } |
| return false; |
| } |
| |
| void MaybeRestoreNode(const GoogleString& id) { |
| RestoreMap::iterator p = restore_map_.find(id); |
| if (p != restore_map_.end()) { |
| HtmlNode* restore_node = p->second; |
| html_parse_->RestoreDeferredNode(restore_node); |
| restore_map_.erase(p); |
| if (dynamic_cast<HtmlElement*>(restore_node) != NULL) { |
| --outstanding_deferred_elements_; |
| } |
| } |
| } |
| |
| HtmlParse* html_parse_; |
| StringStringMap remove_map_; |
| StringSet delete_set_; |
| RestoreMap restore_map_; |
| int outstanding_deferred_elements_; |
| int num_deletes_; |
| bool restore_on_open_; |
| |
| DISALLOW_COPY_AND_ASSIGN(RestoreNodesFilter); |
| }; |
| |
| class HtmlRestoreTest : public HtmlParseTest { |
| protected: |
| HtmlRestoreTest() |
| : upstream_writer_filter_(&html_parse_), |
| upstream_writer_(&upstream_buffer_), |
| restore_nodes_filter_(&html_parse_), |
| expect_restored_(true) { |
| // We are interested in the effect on deferring nodes on (a) the |
| // filter that does the deferring, (b) upstream filters and (c) |
| // downstream filters. Downstream is covered by the normal |
| // HtmlWriterFilter filter that gets installed by SetupWriter. |
| // But Upstream is interesting too, especially when a FLUSH occurs |
| // while a deferred node is open. Sowe simply install another |
| // writer filter before the RestoreNodesFilter, which should see |
| // the input unmodified. |
| html_parse_.AddFilter(&upstream_writer_filter_); |
| html_parse_.AddFilter(&pre_counts_filter_); |
| upstream_writer_filter_.set_writer(&upstream_writer_); |
| html_parse_.AddFilter(&restore_nodes_filter_); |
| html_parse_.AddFilter(&post_counts_filter_); |
| } |
| |
| virtual bool AddBody() const { return false; } |
| virtual bool AddHtmlTags() const { return false; } |
| |
| // Runs a test like ValidateExpected, but puts one or two Flush |
| // calls at aribtrary points in the text, covering all n^2 places |
| // to put the two flushes. |
| // |
| // Don't call this with an especially large 'before', otherwise the |
| // time taken will grow quadratically. Calling this with 70 byte |
| // inputs appears to be OK, taking <300ms to run even in a debug build. |
| void RunTestsWithManyFlushWindows(StringPiece before, |
| StringPiece expected) { |
| SetupWriter(); |
| int before_size = before.size(); |
| for (int flush1 = 0; flush1 < before_size; ++flush1) { |
| for (int flush2 = flush1; flush2 < before_size; ++flush2) { |
| GoogleString this_id = |
| StringPrintf("http://test.com/%d_%d", flush1, flush2); |
| html_parse_.StartParse(this_id); |
| if (flush1 != 0) { |
| html_parse_.ParseText(before.substr(0, flush1)); |
| } |
| if (flush2 != flush1) { |
| html_parse_.Flush(); |
| html_parse_.ParseText(before.substr(flush1, flush2 - flush1)); |
| } |
| if (flush2 != before_size) { |
| html_parse_.Flush(); |
| html_parse_.ParseText(before.substr(flush2)); |
| } |
| html_parse_.FinishParse(); |
| ASSERT_STREQ(expected, output_buffer_) << this_id; |
| output_buffer_.clear(); |
| ASSERT_STREQ(before, upstream_buffer_) << this_id; |
| upstream_buffer_.clear(); |
| |
| // If we expect that everything that was removed was restored, then the |
| // start/end/char-counts should all match before and during |
| // the RestoreNodes filter. |
| if (expect_restored_) { |
| EXPECT_TRUE(restore_nodes_filter_.AllRestored()) << this_id; |
| if (restore_nodes_filter_.num_deletes() == 0) { |
| ASSERT_EQ(pre_counts_filter_.num_start_elements(), |
| restore_nodes_filter_.num_start_elements()) << this_id; |
| ASSERT_EQ(pre_counts_filter_.num_end_elements(), |
| restore_nodes_filter_.num_end_elements()) << this_id; |
| ASSERT_EQ(pre_counts_filter_.num_start_elements(), |
| post_counts_filter_.num_start_elements()) << this_id; |
| ASSERT_EQ(pre_counts_filter_.num_end_elements(), |
| post_counts_filter_.num_end_elements()) << this_id; |
| } |
| ASSERT_EQ(pre_counts_filter_.num_char_elements(), |
| restore_nodes_filter_.num_char_elements()) << this_id; |
| |
| // We use ASSERT_GE here because some of the tests will result in |
| // characters being coalesced on the defer or on the restore. |
| ASSERT_GE(pre_counts_filter_.num_char_elements(), |
| post_counts_filter_.num_char_elements()) << this_id; |
| |
| // Of course, start and end element count must be balanced, |
| // as long as all deferred nodes were restored. |
| ASSERT_EQ(restore_nodes_filter_.num_start_elements(), |
| (restore_nodes_filter_.num_end_elements() + |
| restore_nodes_filter_.num_deletes())) << this_id; |
| } else { |
| // Otherwise there will be an extra Start tag for every |
| // unrestored element. |
| EXPECT_FALSE(restore_nodes_filter_.AllRestored()) << this_id; |
| ASSERT_EQ( |
| restore_nodes_filter_.num_start_elements(), |
| (restore_nodes_filter_.num_end_elements() + |
| restore_nodes_filter_.outstanding_deferred_elements())) |
| << this_id; |
| } |
| |
| // Note that only the restore_nodes_filter itself can have mismatched |
| // start/end callback-counts. Filters running before or after that one |
| // see a balanced set of callbacks. |
| ASSERT_EQ(pre_counts_filter_.num_start_elements(), |
| pre_counts_filter_.num_end_elements()) << this_id; |
| ASSERT_EQ(post_counts_filter_.num_start_elements(), |
| post_counts_filter_.num_end_elements()) << this_id; |
| } |
| } |
| } |
| |
| void TestTwoFilters(const char* src1, const char* dest1, |
| const char* src2, const char* dest2, |
| const char* node_to_delete, |
| StringPiece input, |
| StringPiece expected) { |
| RestoreNodesFilter restore_nodes_filter2(&html_parse_); |
| html_parse_.AddFilter(&restore_nodes_filter2); |
| SetupWriter(); |
| restore_nodes_filter_.MoveOnStart(src1, dest1); |
| if (node_to_delete != NULL) { |
| restore_nodes_filter_.DeleteOnStart(node_to_delete); |
| } |
| restore_nodes_filter2.MoveOnStart(src2, dest2); |
| RunTestsWithManyFlushWindows(input, expected); |
| } |
| |
| HtmlWriterFilter upstream_writer_filter_; |
| CountingCallbacksFilter pre_counts_filter_; |
| StringWriter upstream_writer_; |
| GoogleString upstream_buffer_; |
| RestoreNodesFilter restore_nodes_filter_; |
| CountingCallbacksFilter post_counts_filter_; |
| bool expect_restored_; |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(HtmlRestoreTest); |
| }; |
| |
| TEST_F(HtmlRestoreTest, MoveAAfterB) { |
| restore_nodes_filter_.MoveOnStart("a", "b"); // moves div 'a' after div 'b' |
| RunTestsWithManyFlushWindows( |
| ("0<div id=a>1<span>2</span>3</div>" |
| "4<div id=b>5<span>6</span></div>7"), |
| ("04<div id=b>5<span>6</span></div>" |
| "<div id=a>1<span>2</span>3</div>7")); |
| } |
| |
| TEST_F(HtmlRestoreTest, MoveAAfterBUnclosed) { |
| restore_nodes_filter_.MoveOnStart("a", "b"); // moves div 'a' after div 'b' |
| RunTestsWithManyFlushWindows( |
| ("0<div id=a>1<span>2</span>3</div>" |
| "4<div id=b>5<span>6</span>7"), // b unclosed, but lexer auto-closes it. |
| ("04<div id=b>5<span>6</span>7" |
| "<div id=a>1<span>2</span>3</div>")); |
| } |
| |
| TEST_F(HtmlRestoreTest, MoveAAfterNestedB) { |
| restore_nodes_filter_.MoveOnStart("a", "b"); // moves div 'a' after div 'b' |
| RunTestsWithManyFlushWindows( |
| ("0<div id=a>1<span>2</span>3</div>" |
| "4<div><div id=b>5<span>6</span></div>7</div>"), |
| ("04<div><div id=b>5<span>6</span></div>" |
| "<div id=a>1<span>2</span>3</div>7</div>")); |
| } |
| |
| |
| TEST_F(HtmlRestoreTest, MoveABAfterC) { |
| restore_nodes_filter_.MoveOnStart("a", "c"); |
| restore_nodes_filter_.MoveOnStart("b", "a"); |
| RunTestsWithManyFlushWindows( |
| "0<img id=a />1<img id=b />2<img id=c />3", |
| "012<img id=c /><img id=a /><img id=b />3"); |
| } |
| |
| TEST_F(HtmlRestoreTest, MoveTextAfterDiv) { |
| restore_nodes_filter_.MoveOnStart("start", "a"); |
| RunTestsWithManyFlushWindows( |
| "start<div id=a></div>", |
| "<div id=a></div>start"); |
| } |
| |
| TEST_F(HtmlRestoreTest, MoveDivAfterText) { |
| restore_nodes_filter_.MoveOnStart("a", "hello"); |
| RunTestsWithManyFlushWindows( |
| "<div id=a></div>hello", |
| "hello<div id=a></div>"); |
| } |
| |
| TEST_F(HtmlRestoreTest, MoveTextfterText) { |
| restore_nodes_filter_.MoveOnStart("one", "two"); |
| RunTestsWithManyFlushWindows("one<p>two", "<p>twoone"); |
| } |
| |
| TEST_F(HtmlRestoreTest, MoveStartWithEndNotVisibleAUnclosed) { |
| message_handler_.AddPatternToSkipPrinting( |
| "*Removed node <div id=a> (unclosed)*"); |
| SetupWriter(); |
| restore_nodes_filter_.MoveOnStart("a", "b"); |
| expect_restored_ = false; |
| RunTestsWithManyFlushWindows("<div id=a>1<div id=b>2</div>", ""); |
| EXPECT_LT(0, message_handler_.MessagesOfType(kWarning)); |
| } |
| |
| TEST_F(HtmlRestoreTest, MoveDivWithMissingDestination) { |
| message_handler_.AddPatternToSkipPrinting("*Removed node <div id=a></div>*"); |
| SetupWriter(); |
| restore_nodes_filter_.MoveOnStart("a", "b"); |
| expect_restored_ = false; |
| RunTestsWithManyFlushWindows("<div id=a>1</div>", ""); |
| EXPECT_LT(0, message_handler_.MessagesOfType(kWarning)); |
| } |
| |
| TEST_F(HtmlRestoreTest, MoveCharsWithMissingDestination) { |
| message_handler_.AddPatternToSkipPrinting( |
| "*Removed node Characters text never replaced*"); |
| SetupWriter(); |
| restore_nodes_filter_.MoveOnStart("text", "no_such_destination"); |
| expect_restored_ = false; |
| RunTestsWithManyFlushWindows("text", ""); |
| EXPECT_LT(0, message_handler_.MessagesOfType(kWarning)); |
| } |
| |
| TEST_F(HtmlRestoreTest, TwoDeleteAcrossFlush) { |
| SetupWriter(); |
| restore_nodes_filter_.DeleteOnStart("a"); |
| RunTestsWithManyFlushWindows("1<div id=a></div>2", "12"); |
| } |
| |
| TEST_F(HtmlRestoreTest, RestoreOnOpenTag) { |
| SetupWriter(); |
| restore_nodes_filter_.MoveOnStart("a", "b"); |
| restore_nodes_filter_.set_restore_on_open(true); |
| RunTestsWithManyFlushWindows("<div id=a>abc</div><div id=b>def</div>", |
| "<div id=b><div id=a>abc</div>def</div>"); |
| } |
| |
| // This tests having two filters that each do deferrals. The |
| // interesting case is where the second filter in the chain defers a |
| // node first, and then, before restoring first deferred node, another |
| // filter defers a different node. |
| TEST_F(HtmlRestoreTest, TwoDeferringFilters) { |
| TestTwoFilters( |
| "b", "c", |
| "a", "d", |
| NULL, // Node to delete |
| "<img id=a /><img id=b /><img id=c /><img id=d />", |
| "<img id=c /><img id=b /><img id=d /><img id=a />"); |
| } |
| |
| TEST_F(HtmlRestoreTest, TwoDeferringFiltersWithDelete) { |
| TestTwoFilters( |
| "b", "c", // In first filter, mov div b to after to div c. |
| "a", "d", // In second filter, move div a to after div d. |
| "a", // In first filter, delete node "a" |
| "<img id=a /><img id=b /><img id=c /><img id=d />", |
| "<img id=c /><img id=b /><img id=d />"); |
| } |
| |
| TEST_F(HtmlRestoreTest, TwoDeferringFiltersNestingOuterFirst) { |
| TestTwoFilters( |
| "a", "d", |
| "b", "c", |
| NULL, // Node to delete |
| "<div id=a><div id=b></div><div id=c></div></div><div id=d></div>", |
| "<div id=d></div><div id=a><div id=c></div><div id=b></div></div>"); |
| } |
| |
| TEST_F(HtmlRestoreTest, TwoDeferringFiltersNestingOuterFirstWithDelete) { |
| TestTwoFilters( |
| "a", "d", |
| "b", "c", |
| "b", |
| "<div id=a><div id=b></div><div id=c></div></div><div id=d></div>", |
| "<div id=d></div><div id=a><div id=c></div></div>"); |
| } |
| |
| TEST_F(HtmlRestoreTest, TwoDeferringFiltersNestingInnerFirst) { |
| TestTwoFilters( |
| "b", "c", |
| "a", "d", |
| NULL, // Node to delete |
| "<div id=a><div id=b></div><div id=c></div></div><div id=d></div>", |
| "<div id=d></div><div id=a><div id=c></div><div id=b></div></div>"); |
| } |
| |
| TEST_F(HtmlRestoreTest, TwoDeferringFiltersNestingInnerFirstWithDelete) { |
| TestTwoFilters( |
| "b", "c", |
| "a", "d", |
| "a", |
| "<div id=a><div id=b></div><div id=c></div></div><div id=d></div>", |
| "<div id=d></div>"); |
| } |
| |
| TEST_F(HtmlRestoreTest, DeferringAndDeletingFilters) { |
| DeleteNodesFilter delete_nodes_filter(&html_parse_); |
| RestoreNodesFilter restore_nodes_filter2(&html_parse_); |
| html_parse_.AddFilter(&delete_nodes_filter); // Upstream |
| html_parse_.AddFilter(&restore_nodes_filter2); // Downstream |
| SetupWriter(); |
| // Don't do anything with restore_nodes_filter_ |
| delete_nodes_filter.set_delete_node_type(HtmlName::kSpan); |
| delete_nodes_filter.set_delete_from_type(HtmlName::kDiv); |
| delete_nodes_filter.set_delete_on_open_tag(true); |
| restore_nodes_filter2.MoveOnStart("a", "d"); |
| const StringPiece kInput( |
| "<span id=a><div id=b></div><div id=c></div></span><div id=d></div>"); |
| ValidateExpected("defer_and_delete", kInput, |
| "<div id=b></div><div id=c></div><div id=d></div>"); |
| EXPECT_EQ(1, delete_nodes_filter.num_deleted_elements()); |
| |
| // With the same filter setup, put a flush in the middle. |
| output_buffer_.clear(); |
| html_parse_.StartParse("http://test.com/with_flush"); |
| html_parse_.ParseText(kInput.substr(0, kInput.size() / 2)); |
| html_parse_.Flush(); |
| html_parse_.ParseText(kInput.substr(kInput.size() / 2)); |
| html_parse_.FinishParse(); |
| |
| // Because of the flush, deleting the 'span' did not work. However, |
| // moving the span (and all its contents) after the 'd' did. |
| // |
| // TODO(jmarantz): consider making DeleteSavingChildren work even if |
| // the EndElement is not yet parsed, in which case we can switch to |
| // using RunTestsWithManyFlushWindows and expect the same results |
| // regardless of when the flush occurs. |
| EXPECT_STREQ( |
| "<div id=d></div><span id=a><div id=b></div><div id=c></div></span>", |
| output_buffer_); |
| EXPECT_EQ(0, delete_nodes_filter.num_deleted_elements()); |
| } |
| |
| TEST_F(HtmlRestoreTest, DeleteDeferredNode) { |
| DeleteNodesFilter delete_nodes_filter(&html_parse_); |
| RestoreNodesFilter restore_nodes_filter2(&html_parse_); |
| html_parse_.AddFilter(&delete_nodes_filter); // Upstream |
| html_parse_.AddFilter(&restore_nodes_filter2); // Downstream |
| SetupWriter(); |
| // Don't do anything with restore_nodes_filter_ |
| delete_nodes_filter.set_delete_node_type(HtmlName::kSpan); |
| delete_nodes_filter.set_delete_from_type(HtmlName::kDiv); |
| delete_nodes_filter.set_delete_on_open_tag(true); |
| restore_nodes_filter2.MoveOnStart("a", "d"); |
| const StringPiece kInput("<span id=a></span><div id=d></div>"); |
| ValidateExpected("delete_deferred", kInput, "<div id=d></div>"); |
| EXPECT_EQ(1, delete_nodes_filter.num_deleted_elements()); |
| EXPECT_EQ(0, delete_nodes_filter.flushes_preventing_delete()); |
| |
| // With the same filter setup, put a flush in the middle. |
| output_buffer_.clear(); |
| html_parse_.StartParse("http://test.com/with_flush"); |
| html_parse_.ParseText(kInput.substr(0, kInput.size() / 2)); |
| html_parse_.Flush(); |
| html_parse_.ParseText(kInput.substr(kInput.size() / 2)); |
| html_parse_.FinishParse(); |
| |
| // Because of the flush, deleting the 'span' did not work. However, |
| // moving the span after the 'd' did. |
| // |
| // TODO(jmarantz): consider making DeleteSavingChildren work even if |
| // the EndElement is not yet parsed, in which case we can switch to |
| // using RunTestsWithManyFlushWindows and expect the same results |
| // regardless of when the flush occurs. |
| EXPECT_STREQ("<div id=d></div><span id=a></span>", output_buffer_); |
| EXPECT_EQ(0, delete_nodes_filter.num_deleted_elements()); |
| EXPECT_EQ(1, delete_nodes_filter.flushes_preventing_delete()); |
| } |
| |
| TEST_F(HtmlRestoreTest, CoalesceCharsAfterRestore) { |
| restore_nodes_filter_.MoveOnStart("1", "a"); |
| SetupWriter(); |
| |
| const StringPiece kInput("1<img id=a />2"); |
| int num_times_chars_are_coalesced = 0; |
| int num_times_chars_are_not_coalesced = 0; |
| for (int i = 0, n = kInput.size(); i < n; ++i) { |
| ParseWithFlush(kInput, i); |
| EXPECT_STREQ("<img id=a />12", output_buffer_) << i; |
| |
| // Before the deferral, we had two Characters nodes. |
| EXPECT_EQ(2, pre_counts_filter_.num_char_elements()) << i; |
| |
| // The filter that does the deferring also sees two Characters nodes. |
| EXPECT_EQ(2, restore_nodes_filter_.num_char_elements()) << i; |
| |
| // After the restore, the Characters nodes may be coalesced, |
| // depending on the flush window. |
| EXPECT_TRUE((post_counts_filter_.num_char_elements() == 1) || |
| (post_counts_filter_.num_char_elements() == 2)) << i; |
| if (post_counts_filter_.num_char_elements() == 1) { |
| ++num_times_chars_are_coalesced; |
| } else { |
| ++num_times_chars_are_not_coalesced; |
| } |
| } |
| EXPECT_LT(0, num_times_chars_are_coalesced); |
| EXPECT_LT(0, num_times_chars_are_not_coalesced); |
| } |
| |
| TEST_F(HtmlRestoreTest, CoalesceCharsOnDefer) { |
| restore_nodes_filter_.MoveOnStart("a", "b"); |
| SetupWriter(); |
| |
| const StringPiece kInput("1<img id=a />2<p id=b />"); |
| int num_times_chars_are_coalesced = 0; |
| int num_times_chars_are_not_coalesced = 0; |
| for (int i = 0, n = kInput.size(); i < n; ++i) { |
| ParseWithFlush(kInput, i); |
| EXPECT_STREQ(output_buffer_, "12<p id=b /><img id=a />"); |
| |
| // Before the deferral, we had two Characters nodes. |
| EXPECT_EQ(2, pre_counts_filter_.num_char_elements()) << i; |
| |
| // The filter that does the deferring also sees two Characters nodes. |
| EXPECT_EQ(2, restore_nodes_filter_.num_char_elements()) << i; |
| |
| // After the restore, the Characters nodes may be coalesced, |
| // depending on the flush window. |
| |
| // After the deferral, the Characters nodes may be coalesced, |
| // depending on the flush window. |
| EXPECT_TRUE((post_counts_filter_.num_char_elements() == 1) || |
| (post_counts_filter_.num_char_elements() == 2)) << i; |
| if (post_counts_filter_.num_char_elements() == 1) { |
| ++num_times_chars_are_coalesced; |
| } else { |
| ++num_times_chars_are_not_coalesced; |
| } |
| } |
| EXPECT_LT(0, num_times_chars_are_coalesced); |
| EXPECT_LT(0, num_times_chars_are_not_coalesced); |
| } |
| |
| // This test just shows that the lexer will, in the absense of Defer or |
| // Delete calls, coalesce Characters nodes across Flush. It does this |
| // by being lazy and not emitting literals until it seems some HTML syntax. |
| TEST_F(HtmlRestoreTest, CoalesceCharsAcrossFlush) { |
| SetupWriter(); |
| |
| const StringPiece kInput("12"); |
| for (int i = 0, n = kInput.size(); i < n; ++i) { |
| ParseWithFlush(kInput, i); |
| EXPECT_STREQ("12", output_buffer_) << i; |
| EXPECT_EQ(1, pre_counts_filter_.num_char_elements()) << i; |
| } |
| } |
| |
| class InsertScriptsFilter : public EmptyHtmlFilter { |
| public: |
| explicit InsertScriptsFilter(HtmlParse* parse) |
| : html_parse_(parse), |
| at_start_(false), |
| before_(false), |
| external_(false) { |
| } |
| |
| void set_insert_before(bool before) { before_ = before; } |
| void set_at_start(bool at_start) { at_start_ = at_start; } |
| void set_external(bool external) { external_ = external; } |
| |
| protected: |
| virtual void StartElement(HtmlElement* element) { Insert(true, element); } |
| virtual void EndElement(HtmlElement* element) { Insert(false, element); } |
| virtual const char* Name() const { return "InsertScriptsFilter"; } |
| |
| private: |
| void Insert(bool at_start, HtmlElement* element) { |
| if (element->keyword() == HtmlName::kHead) { |
| if (at_start == at_start_) { |
| if (before_) { |
| html_parse_->InsertScriptBeforeCurrent("inserted", external_); |
| } else { |
| html_parse_->InsertScriptAfterCurrent("inserted", external_); |
| } |
| } |
| } |
| } |
| |
| |
| private: |
| HtmlParse* html_parse_; |
| bool at_start_; |
| bool before_; |
| bool external_; |
| |
| DISALLOW_COPY_AND_ASSIGN(InsertScriptsFilter); |
| }; |
| |
| TEST_F(HtmlParseTestNoBody, InsertInlineScriptAfterStartOfHead) { |
| InsertScriptsFilter insert_scripts(&html_parse_); |
| insert_scripts.set_insert_before(false); |
| insert_scripts.set_at_start(true); |
| insert_scripts.set_external(false); |
| html_parse_.AddFilter(&insert_scripts); |
| SetupWriter(); |
| ValidateExpected("1", |
| "<head>text</head>", |
| "<head><script>inserted</script>text</head>"); |
| } |
| |
| TEST_F(HtmlParseTestNoBody, InsertInlineScriptBeforeEndOfHead) { |
| InsertScriptsFilter insert_scripts(&html_parse_); |
| insert_scripts.set_insert_before(true); |
| insert_scripts.set_at_start(false); |
| insert_scripts.set_external(false); |
| html_parse_.AddFilter(&insert_scripts); |
| SetupWriter(); |
| ValidateExpected("1", |
| "<head>text</head>", |
| "<head>text<script>inserted</script></head>"); |
| } |
| |
| TEST_F(HtmlParseTestNoBody, InsertInlineScriptBeforeStartOfHead) { |
| InsertScriptsFilter insert_scripts(&html_parse_); |
| insert_scripts.set_insert_before(true); |
| insert_scripts.set_at_start(true); |
| insert_scripts.set_external(false); |
| html_parse_.AddFilter(&insert_scripts); |
| SetupWriter(); |
| ValidateExpected("1", |
| "<head>text</head>", |
| "<script>inserted</script><head>text</head>"); |
| } |
| |
| TEST_F(HtmlParseTestNoBody, InsertInlineScriptAfterEndOfHead) { |
| InsertScriptsFilter insert_scripts(&html_parse_); |
| insert_scripts.set_insert_before(false); |
| insert_scripts.set_at_start(false); |
| insert_scripts.set_external(false); |
| html_parse_.AddFilter(&insert_scripts); |
| SetupWriter(); |
| ValidateExpected("1", |
| "<head>text</head>", |
| "<head>text</head><script>inserted</script>"); |
| } |
| |
| TEST_F(HtmlParseTestNoBody, InsertExternalScriptAfterStartOfHead) { |
| InsertScriptsFilter insert_scripts(&html_parse_); |
| insert_scripts.set_insert_before(false); |
| insert_scripts.set_at_start(true); |
| insert_scripts.set_external(true); |
| html_parse_.AddFilter(&insert_scripts); |
| SetupWriter(); |
| ValidateExpected("1", |
| "<head>text</head>", |
| "<head><script src=\"inserted\"></script>text</head>"); |
| } |
| |
| TEST_F(HtmlParseTestNoBody, InsertExternalScriptBeforeEndOfHead) { |
| InsertScriptsFilter insert_scripts(&html_parse_); |
| insert_scripts.set_insert_before(true); |
| insert_scripts.set_at_start(false); |
| insert_scripts.set_external(true); |
| html_parse_.AddFilter(&insert_scripts); |
| SetupWriter(); |
| ValidateExpected("1", |
| "<head>text</head>", |
| "<head>text<script src=\"inserted\"></script></head>"); |
| } |
| |
| TEST_F(HtmlParseTestNoBody, InsertExternalScriptBeforeStartOfHead) { |
| InsertScriptsFilter insert_scripts(&html_parse_); |
| insert_scripts.set_insert_before(true); |
| insert_scripts.set_at_start(true); |
| insert_scripts.set_external(true); |
| html_parse_.AddFilter(&insert_scripts); |
| SetupWriter(); |
| ValidateExpected("1", |
| "<head>text</head>", |
| "<script src=\"inserted\"></script><head>text</head>"); |
| } |
| |
| TEST_F(HtmlParseTestNoBody, InsertExternalScriptAfterEndOfHead) { |
| InsertScriptsFilter insert_scripts(&html_parse_); |
| insert_scripts.set_insert_before(false); |
| insert_scripts.set_at_start(false); |
| insert_scripts.set_external(true); |
| html_parse_.AddFilter(&insert_scripts); |
| SetupWriter(); |
| ValidateExpected("1", |
| "<head>text</head>", |
| "<head>text</head><script src=\"inserted\"></script>"); |
| } |
| |
| } // namespace net_instaweb |