| /* |
| * Copyright 2014 Google Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // Author: jmaessen@google.com (Jan-Willem Maessen) |
| |
| #include "net/instaweb/rewriter/public/mobilize_label_filter.h" |
| |
| #include "base/logging.h" |
| #include "net/instaweb/rewriter/public/add_ids_filter.h" |
| #include "net/instaweb/rewriter/public/rewrite_driver.h" |
| #include "net/instaweb/rewriter/public/rewrite_options.h" |
| #include "net/instaweb/rewriter/public/rewrite_test_base.h" |
| #include "net/instaweb/rewriter/public/server_context.h" |
| #include "pagespeed/kernel/base/gtest.h" |
| #include "pagespeed/kernel/base/mock_message_handler.h" |
| #include "pagespeed/kernel/base/scoped_ptr.h" |
| #include "pagespeed/kernel/base/statistics.h" |
| #include "pagespeed/kernel/base/stdio_file_system.h" |
| #include "pagespeed/kernel/base/string.h" |
| #include "pagespeed/kernel/base/string_util.h" |
| #include "pagespeed/kernel/html/html_parse_test_base.h" |
| #include "pagespeed/kernel/http/user_agent_matcher_test_base.h" |
| #include "pagespeed/opt/http/mock_property_page.h" |
| #include "pagespeed/opt/http/property_cache.h" |
| |
| namespace net_instaweb { |
| |
| namespace { |
| |
| const char kTestDataDir[] = "/net/instaweb/rewriter/testdata/"; |
| const char kOriginal[] = "mobilize_test.html"; |
| const char kOriginalHtml5[] = "mobilize_test_html5.html"; |
| const char kOriginalLabeled[] = "mobilize_test_labeled.html"; |
| const char kOriginalHtml5Labeled[] = "mobilize_test_html5_labeled.html"; |
| |
| class MobilizeLabelFilterTest : public RewriteTestBase { |
| protected: |
| MobilizeLabelFilterTest() {} |
| |
| virtual void SetUp() { |
| RewriteTestBase::SetUp(); |
| add_ids_filter_.reset(new AddIdsFilter(rewrite_driver())); |
| label_filter_.reset( |
| new MobilizeLabelFilter(false /* is_menu_subfetch */, |
| rewrite_driver())); |
| options()->set_mob_always(true); |
| html_parse()->AddFilter(add_ids_filter_.get()); |
| html_parse()->AddFilter(label_filter_.get()); |
| const PropertyCache::Cohort* dom_cohort = |
| SetupCohort(rewrite_driver()->server_context()->page_property_cache(), |
| RewriteDriver::kDomCohort); |
| server_context()->set_dom_cohort(dom_cohort); |
| SetHtmlMimetype(); |
| Statistics* stats = statistics(); |
| pages_labeled_ = |
| stats->GetVariable(MobilizeLabelFilter::kPagesLabeled); |
| pages_role_added_ = |
| stats->GetVariable(MobilizeLabelFilter::kPagesRoleAdded); |
| navigational_roles_ = |
| stats->GetVariable(MobilizeLabelFilter::kNavigationalRoles); |
| header_roles_ = |
| stats->GetVariable(MobilizeLabelFilter::kHeaderRoles); |
| content_roles_ = |
| stats->GetVariable(MobilizeLabelFilter::kContentRoles); |
| marginal_roles_ = |
| stats->GetVariable(MobilizeLabelFilter::kMarginalRoles); |
| divs_unlabeled_ = |
| stats->GetVariable(MobilizeLabelFilter::kDivsUnlabeled); |
| ambiguous_role_labels_ = |
| stats->GetVariable(MobilizeLabelFilter::kAmbiguousRoleLabels); |
| } |
| |
| // Remove data-mobile-role labeling from a labeled document |
| GoogleString Unlabel(StringPiece labeled) { |
| GoogleString result; |
| labeled.CopyToString(&result); |
| GlobalEraseBracketedSubstring(" data-mobile-role=\"", "\"", &result); |
| GlobalEraseBracketedSubstring("<!--id: ", "-->", &result); |
| GlobalReplaceSubstring("<!--No nodes labeled for mobilization-->", "", |
| &result); |
| GlobalEraseBracketedSubstring(" id=\"PageSpeed-", "\"", &result); |
| GlobalEraseBracketedSubstring("<script type=\"text/javascript\">", |
| "</script>", &result); |
| return result; |
| } |
| |
| // Remove percentages and previous content bytes, which are very |
| // input-sensitive, from output buffer so that we just check raw statistics |
| // counts. |
| void RemoveRedundantDataFromOutputBuffer() { |
| GlobalEraseBracketedSubstring( |
| "PreviousTagPercent:", ", ", &output_buffer_); |
| GlobalEraseBracketedSubstring( |
| "PreviousContentBytes:", ", ", &output_buffer_); |
| GlobalEraseBracketedSubstring( |
| "PreviousContentPercent:", ", ", &output_buffer_); |
| GlobalEraseBracketedSubstring( |
| "PreviousNonBlankBytes:", ", ", &output_buffer_); |
| GlobalEraseBracketedSubstring( |
| "PreviousNonBlankPercent:", ", ", &output_buffer_); |
| GlobalEraseBracketedSubstring( |
| "ContainedTagPercent:", ", ", &output_buffer_); |
| GlobalEraseBracketedSubstring( |
| "ContainedContentPercent:", ", ", &output_buffer_); |
| GlobalEraseBracketedSubstring( |
| "ContainedNonBlankPercent", ", ", &output_buffer_); |
| GlobalReplaceSubstring("-->", ", -->", &output_buffer_); |
| GlobalEraseBracketedSubstring("div percent:", ", ", &output_buffer_); |
| GlobalEraseBracketedSubstring("h1 percent:", ", ", &output_buffer_); |
| GlobalEraseBracketedSubstring("section percent:", ", ", &output_buffer_); |
| GlobalReplaceSubstring(", -->", "-->", &output_buffer_); |
| } |
| |
| void ResetStats() { |
| pages_labeled_->Clear(); |
| pages_role_added_->Clear(); |
| navigational_roles_->Clear(); |
| header_roles_->Clear(); |
| content_roles_->Clear(); |
| marginal_roles_->Clear(); |
| divs_unlabeled_->Clear(); |
| ambiguous_role_labels_->Clear(); |
| } |
| |
| void SetupPCache(StringPiece url) { |
| PropertyPage* page = NewMockPage(url); |
| rewrite_driver()->set_property_page(page); |
| rewrite_driver()->server_context()->page_property_cache()->Read(page); |
| } |
| |
| bool ExpectTwoRuns( |
| StringPiece case_id, StringPiece html_input, StringPiece html_output) { |
| GoogleString url = StrCat(kTestDomain, case_id, ".html"); |
| SetupPCache(url); |
| bool status1 = ValidateExpectedUrl(url, html_input, html_output); |
| int labeled = pages_labeled_->Get(); |
| int role_added = pages_role_added_->Get(); |
| int navigational = navigational_roles_->Get(); |
| int header = header_roles_->Get(); |
| int content = content_roles_->Get(); |
| int marginal = marginal_roles_->Get(); |
| int ambiguous = ambiguous_role_labels_->Get(); |
| int unlabeled = divs_unlabeled_->Get(); |
| int multiplier = 2; |
| if (rewrite_driver()->DebugMode()) { |
| multiplier = 1; |
| ResetStats(); |
| } |
| LOG(INFO) << "Second go. Multiplier " << multiplier; |
| bool status2 = ValidateExpectedUrl(url, html_input, html_output); |
| EXPECT_EQ(multiplier * labeled, pages_labeled_->Get()); |
| EXPECT_EQ(role_added, pages_role_added_->Get()); |
| EXPECT_EQ(navigational, navigational_roles_->Get()); |
| EXPECT_EQ(header, header_roles_->Get()); |
| EXPECT_EQ(content, content_roles_->Get()); |
| EXPECT_EQ(marginal, marginal_roles_->Get()); |
| EXPECT_EQ(ambiguous, ambiguous_role_labels_->Get()); |
| EXPECT_EQ(unlabeled, divs_unlabeled_->Get()); |
| return (status1 && status2); |
| } |
| |
| scoped_ptr<AddIdsFilter> add_ids_filter_; |
| scoped_ptr<MobilizeLabelFilter> label_filter_; |
| Variable* pages_labeled_; |
| Variable* pages_role_added_; |
| Variable* navigational_roles_; |
| Variable* header_roles_; |
| Variable* content_roles_; |
| Variable* marginal_roles_; |
| Variable* divs_unlabeled_; |
| Variable* ambiguous_role_labels_; |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(MobilizeLabelFilterTest); |
| }; |
| |
| TEST_F(MobilizeLabelFilterTest, AlreadyLabeled) { |
| StdioFileSystem filesystem; |
| GoogleString html5_filename = |
| StrCat(GTestSrcDir(), kTestDataDir, kOriginalHtml5); |
| GoogleString html5_contents; |
| ASSERT_TRUE(filesystem.ReadFile( |
| html5_filename.c_str(), &html5_contents, message_handler())); |
| // Classify fully, compare against gold labeling. |
| // Note that changes are fairly minimal. |
| GoogleString labeled_filename = |
| StrCat(GTestSrcDir(), kTestDataDir, kOriginalHtml5Labeled); |
| GoogleString labeled_contents; |
| ASSERT_TRUE(filesystem.ReadFile( |
| labeled_filename.c_str(), &labeled_contents, message_handler())); |
| ExpectTwoRuns("already_labeled_adding_labels", |
| html5_contents, labeled_contents); |
| EXPECT_EQ(2, pages_labeled_->Get()); |
| EXPECT_EQ(1, pages_role_added_->Get()); |
| EXPECT_EQ(2, navigational_roles_->Get()); |
| EXPECT_EQ(2, header_roles_->Get()); |
| EXPECT_EQ(3, content_roles_->Get()); |
| EXPECT_EQ(2, marginal_roles_->Get()); |
| EXPECT_EQ(0, ambiguous_role_labels_->Get()); |
| EXPECT_EQ(11, divs_unlabeled_->Get()); |
| } |
| |
| TEST_F(MobilizeLabelFilterTest, Html5TagsInHead) { |
| const char kOutputHtml[] = |
| "<head>\n" |
| "<menu id=\"PageSpeed-0-0\">Now treated as a menu</menu>\n" |
| "<header id=\"PageSpeed-0-1\"><h1>Also labeled</h1></header>\n" |
| "<article id=\"PageSpeed-0-2\">Still labeled</article>\n" |
| "<footer id=\"PageSpeed-0-3\">Also labeled</footer>\n" |
| "</head>\n" |
| "<script type=\"text/javascript\">" |
| "pagespeedHeaderIds=['PageSpeed-0-1'];\n" |
| "pagespeedNavigationalIds=['PageSpeed-0-0'];\n" |
| "pagespeedContentIds=['PageSpeed-0-2'];\n" |
| "pagespeedMarginalIds=['PageSpeed-0-3'];\n" |
| "</script>"; |
| ExpectTwoRuns("html5_tags_in_head", |
| Unlabel(kOutputHtml), kOutputHtml); |
| EXPECT_EQ(2, pages_labeled_->Get()); |
| EXPECT_EQ(0, pages_role_added_->Get()); |
| } |
| |
| TEST_F(MobilizeLabelFilterTest, NoLabelableContent) { |
| EnableDebug(); |
| const char kOutputHtml[] = |
| "<body><p>\n" |
| "Just a paragraph of plain old content. " |
| "Nothing to label here!" |
| "</p>\n" |
| "<!--No nodes labeled for mobilization-->"; |
| ExpectTwoRuns("no_labelable_content", |
| Unlabel(kOutputHtml), kOutputHtml); |
| EXPECT_EQ(1, pages_labeled_->Get()); |
| EXPECT_EQ(0, pages_role_added_->Get()); |
| } |
| |
| TEST_F(MobilizeLabelFilterTest, TinyCount) { |
| EnableDebug(); |
| const char kOutputHtml[] = |
| "<div role='header' id=\"PageSpeed-0\" data-mobile-role=\"header\">" |
| " Hello there," |
| " <a href='http://theworld.com/'>World</a></div>" |
| "<!--id: PageSpeed-0," |
| " role: header," |
| " ElementTagDepth: 1," |
| " ContainedTagDepth: 2," // <a> tag |
| " ContainedTagRelativeDepth: 1," |
| " ContainedTagCount: 2," // Includes <div> itself. |
| " ContainedTagPercent: 100.00," |
| " ContainedContentBytes: 17," // Whitespace before <a> ignored. |
| " ContainedContentPercent: 100.00," |
| " ContainedNonBlankBytes: 16," |
| " ContainedNonBlankPercent: 100.00," |
| " ContainedAContentBytes: 5," |
| " ContainedAContentLocalPercent: 29.41," |
| " ContainedNonAContentBytes: 12," |
| " head: 1," |
| " a count: 1," |
| " a percent: 100.00," |
| " div count: 1," |
| " div percent: 100.00-->\n" |
| "<script type=\"text/javascript\">" |
| "pagespeedHeaderIds=['PageSpeed-0'];\n" |
| "</script>"; |
| ExpectTwoRuns("Small_count_nav", |
| Unlabel(kOutputHtml), kOutputHtml); |
| EXPECT_EQ(1, pages_labeled_->Get()); |
| EXPECT_EQ(1, pages_role_added_->Get()); |
| EXPECT_EQ(0, navigational_roles_->Get()); |
| EXPECT_EQ(1, header_roles_->Get()); |
| EXPECT_EQ(0, content_roles_->Get()); |
| EXPECT_EQ(0, marginal_roles_->Get()); |
| EXPECT_EQ(0, ambiguous_role_labels_->Get()); |
| EXPECT_EQ(0, divs_unlabeled_->Get()); |
| } |
| |
| TEST_F(MobilizeLabelFilterTest, TinyCountNbsp) { |
| EnableDebug(); |
| const char kOutputHtml[] = |
| "<div role='header' id=\"PageSpeed-0\" data-mobile-role=\"header\">" |
| " Hello there, " |
| " <a href='http://theworld.com/'>World</a></div>" |
| "<!--id: PageSpeed-0," |
| " role: header," |
| " ElementTagDepth: 1," |
| " ContainedTagDepth: 2," // <a> tag |
| " ContainedTagRelativeDepth: 1," |
| " ContainedTagCount: 2," // Includes <div> itself. |
| " ContainedTagPercent: 100.00," |
| " ContainedContentBytes: 17," // Whitespace before <a> ignored. |
| " ContainedContentPercent: 100.00," |
| " ContainedNonBlankBytes: 16," |
| " ContainedNonBlankPercent: 100.00," |
| " ContainedAContentBytes: 5," |
| " ContainedAContentLocalPercent: 29.41," |
| " ContainedNonAContentBytes: 12," |
| " head: 1," |
| " a count: 1," |
| " a percent: 100.00," |
| " div count: 1," |
| " div percent: 100.00-->\n" |
| "<script type=\"text/javascript\">" |
| "pagespeedHeaderIds=['PageSpeed-0'];\n" |
| "</script>"; |
| ExpectTwoRuns("Small_count_nav_nbsp", |
| Unlabel(kOutputHtml), kOutputHtml); |
| EXPECT_EQ(1, pages_labeled_->Get()); |
| EXPECT_EQ(1, pages_role_added_->Get()); |
| EXPECT_EQ(0, navigational_roles_->Get()); |
| EXPECT_EQ(1, header_roles_->Get()); |
| EXPECT_EQ(0, content_roles_->Get()); |
| EXPECT_EQ(0, marginal_roles_->Get()); |
| EXPECT_EQ(0, ambiguous_role_labels_->Get()); |
| EXPECT_EQ(0, divs_unlabeled_->Get()); |
| } |
| |
| TEST_F(MobilizeLabelFilterTest, ImgInsideAndOutsideA) { |
| EnableDebug(); |
| const char kOutputHtml[] = |
| "<div role='content' id=\"PageSpeed-0\" data-mobile-role=\"header\">" |
| " <img src='a.png'>" |
| " <img src='b.jpg'>" |
| " <a href='http://theworld.com/'><img src='world.gif'></a></div>" |
| "<!--id: PageSpeed-0," |
| " role: header," |
| " ElementTagDepth: 1," |
| " ContainedTagDepth: 3," // <a><img></a> |
| " ContainedTagRelativeDepth: 2," |
| " ContainedTagCount: 5," // Includes <div> itself. |
| " ContainedTagPercent: 100.00," |
| " ContainedAImgTag: 1," |
| " ContainedAImgLocalPercent: 33.33," |
| " ContainedNonAImgTag: 2," |
| " content: 1," |
| " a count: 1," |
| " a percent: 100.00," |
| " div count: 1," |
| " div percent: 100.00," |
| " img count: 3," |
| " img percent: 100.00-->\n" |
| "<script type=\"text/javascript\">" |
| "pagespeedHeaderIds=['PageSpeed-0'];\n" |
| "</script>"; |
| ExpectTwoRuns("Small count nav", |
| Unlabel(kOutputHtml), kOutputHtml); |
| EXPECT_EQ(1, pages_labeled_->Get()); |
| EXPECT_EQ(1, pages_role_added_->Get()); |
| EXPECT_EQ(0, navigational_roles_->Get()); |
| EXPECT_EQ(1, header_roles_->Get()); |
| EXPECT_EQ(0, content_roles_->Get()); |
| EXPECT_EQ(0, marginal_roles_->Get()); |
| EXPECT_EQ(0, ambiguous_role_labels_->Get()); |
| EXPECT_EQ(0, divs_unlabeled_->Get()); |
| } |
| |
| TEST_F(MobilizeLabelFilterTest, DontCrashWithUnicodeId) { |
| const char kOutputHtml[] = |
| "<header id='g\xc5\x82\xc3\xb3wna'>Header</header>\n" |
| "<script type=\"text/javascript\">" |
| "pagespeedHeaderIds=['g\xc5\x82\xc3\xb3wna'];\n" |
| "</script>"; |
| ExpectTwoRuns("Unicode id", Unlabel(kOutputHtml), kOutputHtml); |
| } |
| |
| TEST_F(MobilizeLabelFilterTest, DontCrashWithEmptyId) { |
| const char kOutputHtml[] = |
| "<header id=''>Header</header>\n" |
| "<script type=\"text/javascript\">" |
| "pagespeedHeaderIds=[''];\n" |
| "</script>"; |
| ExpectTwoRuns("Empty id", Unlabel(kOutputHtml), kOutputHtml); |
| } |
| |
| TEST_F(MobilizeLabelFilterTest, DontCrashWithBlankId) { |
| const char kOutputHtml[] = |
| "<header id>Header</header>\n" |
| "<script type=\"text/javascript\">" |
| "pagespeedHeaderIds=[''];\n" |
| "</script>"; |
| ExpectTwoRuns("Blank id", Unlabel(kOutputHtml), kOutputHtml); |
| } |
| |
| TEST_F(MobilizeLabelFilterTest, InternalQuotesAndSpacesInId) { |
| const char kOutputHtml[] = |
| "<header id=\"'Quotes'\\slashes\">Header</header>\n" |
| "<script type=\"text/javascript\">" |
| "pagespeedHeaderIds=['\\'Quotes\\'\\\\slashes'];\n" |
| "</script>"; |
| ExpectTwoRuns("Quotes\\slashes in id", Unlabel(kOutputHtml), kOutputHtml); |
| } |
| |
| TEST_F(MobilizeLabelFilterTest, CloseScriptInId) { |
| const char kOutputHtml[] = |
| "<header id='</script>'>Header</header>\n" |
| "<script type=\"text/javascript\">" |
| "pagespeedHeaderIds=['<\\/script>'];\n" |
| "</script>"; |
| ExpectTwoRuns("Close script in id", Unlabel(kOutputHtml), kOutputHtml); |
| } |
| |
| TEST_F(MobilizeLabelFilterTest, DontCrashWithFlush) { |
| // Note that we cannot remove unused ids inserted before the flush. |
| const char kBody1[] = |
| "<html><head></head><body>\n" |
| "<div id=\"PageSpeed-1\">\n" |
| "<div role='nav' id=\"PageSpeed-1-0\"><a href='http://theworld.com/'>\n" |
| "Hello, World\n" |
| "</a></div>"; |
| const char kBody2[] = |
| "</div>\n" |
| "<script type=\"text/javascript\">" |
| "pagespeedNavigationalIds=['PageSpeed-1'];\n" |
| "</script>" |
| "</body></html>"; |
| SetupWriter(); |
| rewrite_driver()->StartParse(kTestDomain); |
| rewrite_driver()->ParseText(Unlabel(kBody1)); |
| rewrite_driver()->Flush(); |
| rewrite_driver()->ParseText(Unlabel(kBody2)); |
| rewrite_driver()->FinishParse(); |
| GoogleString expected = StrCat(kBody1, kBody2); |
| EXPECT_STREQ(expected, output_buffer_); |
| } |
| |
| TEST_F(MobilizeLabelFilterTest, DontCrashWithFlushAndDebug) { |
| EnableDebug(); |
| // We can't insert helpful comments because the tags aren't rewritable |
| // anymore. Note that this is true even for the spanning <div>, where we |
| // arguably ought to be able to insert *after* the closing tag as it's still |
| // in the flush window. |
| const char kBody1[] = |
| "<html><head></head><body>\n" |
| "<div id=\"PageSpeed-1\">\n" |
| "<div role='nav' id=\"PageSpeed-1-0\"><a href='http://theworld.com/'>\n" |
| "Hello, World\n" |
| "</a></div>"; |
| const char kBody2[] = |
| "</div>\n" |
| "<!--id: PageSpeed-1," |
| " role: navigational," |
| " ElementTagDepth: 1," |
| " ContainedTagDepth: 3," |
| " ContainedTagRelativeDepth: 2," |
| " ContainedTagCount: 3," |
| " ContainedTagPercent: 100.00," |
| " ContainedContentBytes: 12," |
| " ContainedContentPercent: 100.00," |
| " ContainedNonBlankBytes: 11," |
| " ContainedNonBlankPercent: 100.00," |
| " ContainedAContentBytes: 12," |
| " ContainedAContentLocalPercent: 100.00," |
| " a count: 1," |
| " a percent: 100.00," |
| " div count: 2," |
| " div percent: 100.00-->" |
| "<!--id: PageSpeed-1-0," |
| " ElementTagDepth: 2," |
| " PreviousTagCount: 1," |
| " PreviousTagPercent: 33.33," |
| " ContainedTagDepth: 3," |
| " ContainedTagRelativeDepth: 1," |
| " ContainedTagCount: 2," |
| " ContainedTagPercent: 66.67," |
| " ContainedContentBytes: 12," |
| " ContainedContentPercent: 100.00," |
| " ContainedNonBlankBytes: 11," |
| " ContainedNonBlankPercent: 100.00," |
| " ContainedAContentBytes: 12," |
| " ContainedAContentLocalPercent: 100.00," |
| " nav: 1," |
| " a count: 1," |
| " a percent: 100.00," |
| " div count: 1," |
| " div percent: 50.00," |
| " parent role is navigational-->" |
| "<script type=\"text/javascript\">" |
| "pagespeedNavigationalIds=['PageSpeed-1'];\n" |
| "</script>" |
| "</body></html>"; |
| SetupWriter(); |
| rewrite_driver()->StartParse(kTestDomain); |
| rewrite_driver()->ParseText(Unlabel(kBody1)); |
| rewrite_driver()->Flush(); |
| rewrite_driver()->ParseText(Unlabel(kBody2)); |
| rewrite_driver()->FinishParse(); |
| GoogleString expected = StrCat(kBody1, kBody2); |
| EXPECT_STREQ(expected, output_buffer_); |
| } |
| |
| TEST_F(MobilizeLabelFilterTest, DontCrashWithMarginalChildOfNav) { |
| const char kOutputHtml[] = |
| "<div id='a'>\n" |
| " <ul id='b'>\n" |
| " <li><a href='/'>Drive</a></li>\n" |
| " </ul>\n" |
| " <ul id='c'>\n" |
| " <li><a href='R8'>R8</a></li>\n" |
| " </ul>\n" |
| "</div>\n" |
| "<script type=\"text/javascript\">" |
| "pagespeedNavigationalIds=['a'];\n" |
| "pagespeedMarginalIds=['c'];\n" |
| "</script>"; |
| ExpectTwoRuns("DontCrashWithMarginalChild", |
| Unlabel(kOutputHtml), kOutputHtml); |
| } |
| |
| TEST_F(MobilizeLabelFilterTest, MarginalPropagation) { |
| // Test that marginal content gets labeled as such, and the |
| // labels get propagated up the DOM (but only as far as the |
| // outermost parent that isn't otherwise labeled). |
| const char kOutputHtml[] = |
| "<div>\n" |
| " <div data-mobile-role='header' id=\"PageSpeed-0-0\">header</div>\n" |
| " <div id=\"PageSpeed-0-1\">\n" |
| " <p>Content</p>\n" |
| " <p>More content</p>\n" |
| " <p>Still more content</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " <p>Are we still here? This is really quite a lot of content.</p>\n" |
| " </div>\n" |
| " <div id=\"PageSpeed-0-2\">\n" |
| " A Marginal Title\n" |
| " <div role='footer'><a>footer</a></div>\n" |
| " <div role='junk'><a>junk</a></div>\n" |
| " <div><a>more junk</a></div>\n" |
| " </div>\n" |
| "</div>" |
| "<script type=\"text/javascript\">" |
| "pagespeedHeaderIds=['PageSpeed-0-0'];\n" |
| "pagespeedContentIds=['PageSpeed-0-1'];\n" |
| "pagespeedMarginalIds=['PageSpeed-0-2'];\n" |
| "</script>"; |
| ExpectTwoRuns("Marginal propagation", |
| Unlabel(kOutputHtml), kOutputHtml); |
| EXPECT_EQ(2, pages_labeled_->Get()); |
| EXPECT_EQ(1, pages_role_added_->Get()); |
| EXPECT_EQ(0, navigational_roles_->Get()); |
| EXPECT_EQ(1, header_roles_->Get()); |
| EXPECT_EQ(1, content_roles_->Get()); |
| EXPECT_EQ(1, marginal_roles_->Get()); |
| EXPECT_EQ(0, ambiguous_role_labels_->Get()); |
| EXPECT_EQ(4, divs_unlabeled_->Get()); |
| } |
| |
| TEST_F(MobilizeLabelFilterTest, ParentPropagation) { |
| options()->set_log_mobilization_samples(true); |
| // Make sure an element all of whose children are labeled inherits the label, |
| // and an element whose children's labels conflict does not. |
| const char kOutputHtml[] = |
| "<div>\n" // One nav, one header, one content -> no label. |
| " <header id=\"PageSpeed-0-0\"></header>\n" |
| " <nav id=\"PageSpeed-0-1\"></nav>\n" |
| "</div>\n" |
| "<div id=\"PageSpeed-1\">\n" // Both children nav. |
| " <div>\n" // Only child is nav, so nav. |
| " <nav></nav>\n" |
| " </div>\n" |
| " <nav></nav>\n" |
| "</div>\n" |
| "<script type=\"text/javascript\">" |
| "pagespeedHeaderIds=['PageSpeed-0-0'];\n" |
| "pagespeedNavigationalIds=['PageSpeed-0-1','PageSpeed-1'];\n" |
| "</script>"; |
| ExpectTwoRuns("Parent propagation", |
| Unlabel(kOutputHtml), kOutputHtml); |
| } |
| |
| TEST_F(MobilizeLabelFilterTest, SmallCountNav) { |
| EnableDebug(); |
| const char kOutputHtml[] = |
| "<head></head><body>\n" |
| "<div class='container' id=\"PageSpeed-1\"" |
| " data-mobile-role=\"navigational\">\n" |
| " <a href='a'>a</a>\n" |
| " <div class='menu' id='hdr' role='nav'>\n" |
| " <ul id=\"PageSpeed-hdr-0\">\n" |
| " <li><a href='n1'>nav 1</a></li>\n" |
| " <li><a href='n2'>nav 2</a></li>\n" |
| " <li><a href='n3'>nav 3</a></li>\n" |
| " </ul>" |
| "<!--id: PageSpeed-hdr-0," |
| " ElementTagDepth: 3," |
| " PreviousTagCount: 3," |
| " PreviousTagPercent: 30.00," |
| " PreviousContentBytes: 1," |
| " PreviousContentPercent: 6.25," |
| " PreviousNonBlankBytes: 1," |
| " PreviousNonBlankPercent: 7.69," |
| " ContainedTagDepth: 5," |
| " ContainedTagRelativeDepth: 2," |
| " ContainedTagCount: 7," |
| " ContainedTagPercent: 70.00," |
| " ContainedContentBytes: 15," |
| " ContainedContentPercent: 93.75," |
| " ContainedNonBlankBytes: 12," |
| " ContainedNonBlankPercent: 92.31," |
| " ContainedAContentBytes: 15," |
| " ContainedAContentLocalPercent: 100.00," |
| " a count: 3," |
| " a percent: 75.00," |
| " li count: 3," |
| " li percent: 100.00," |
| " ul count: 1," |
| " ul percent: 100.00-->\n" |
| " </div>" |
| "<!--id: hdr," |
| " ElementTagDepth: 2," |
| " PreviousTagCount: 2," |
| " PreviousTagPercent: 20.00," |
| " PreviousContentBytes: 1," |
| " PreviousContentPercent: 6.25," |
| " PreviousNonBlankBytes: 1," |
| " PreviousNonBlankPercent: 7.69," |
| " ContainedTagDepth: 5," |
| " ContainedTagRelativeDepth: 3," |
| " ContainedTagCount: 8," |
| " ContainedTagPercent: 80.00," |
| " ContainedContentBytes: 15," |
| " ContainedContentPercent: 93.75," |
| " ContainedNonBlankBytes: 12," |
| " ContainedNonBlankPercent: 92.31," |
| " ContainedAContentBytes: 15," |
| " ContainedAContentLocalPercent: 100.00," |
| " hdr: 1," |
| " menu: 1," |
| " nav: 1," |
| " a count: 3," |
| " a percent: 75.00," |
| " div count: 1," |
| " div percent: 50.00," |
| " li count: 3," |
| " li percent: 100.00," |
| " ul count: 1," |
| " ul percent: 100.00-->\n" |
| "</div>" |
| "<!--id: PageSpeed-1," |
| " role: navigational," |
| " ElementTagDepth: 1," |
| " ContainedTagDepth: 5," |
| " ContainedTagRelativeDepth: 4," |
| " ContainedTagCount: 10," |
| " ContainedTagPercent: 100.00," |
| " ContainedContentBytes: 16," |
| " ContainedContentPercent: 100.00," |
| " ContainedNonBlankBytes: 13," |
| " ContainedNonBlankPercent: 100.00," |
| " ContainedAContentBytes: 16," |
| " ContainedAContentLocalPercent: 100.00," |
| " a count: 4," |
| " a percent: 100.00," |
| " div count: 2," |
| " div percent: 100.00," |
| " li count: 3," |
| " li percent: 100.00," |
| " ul count: 1," |
| " ul percent: 100.00-->\n" |
| "<script type=\"text/javascript\">" |
| "pagespeedNavigationalIds=['PageSpeed-1'];\n" |
| "</script></body>"; |
| ExpectTwoRuns("Small count nav", |
| Unlabel(kOutputHtml), kOutputHtml); |
| EXPECT_EQ(1, pages_labeled_->Get()); |
| EXPECT_EQ(1, pages_role_added_->Get()); |
| EXPECT_EQ(1, navigational_roles_->Get()); |
| EXPECT_EQ(0, header_roles_->Get()); |
| EXPECT_EQ(0, content_roles_->Get()); |
| EXPECT_EQ(0, marginal_roles_->Get()); |
| EXPECT_EQ(0, ambiguous_role_labels_->Get()); |
| EXPECT_EQ(2, divs_unlabeled_->Get()); |
| } |
| |
| TEST_F(MobilizeLabelFilterTest, SmallCountTabularNav) { |
| EnableDebug(); |
| const char kOutputHtml[] = |
| "<head></head><body>\n" |
| "<div class='container' id=\"PageSpeed-1\"" |
| " data-mobile-role=\"navigational\">\n" |
| " <a href='a'>a</a>\n" |
| " <table class='menu' id='hdr' role='nav'>\n" |
| " <tr id=\"PageSpeed-hdr-0\">\n" |
| " <td><a href='n1'>nav 1</a></td>\n" |
| " <td><a href='n2'>nav 2</a></td>\n" |
| " <td><a href='n3'>nav 3</a></td>\n" |
| " </tr>" |
| "<!--id: PageSpeed-hdr-0," |
| " ElementTagDepth: 3," |
| " PreviousTagCount: 3," |
| " PreviousTagPercent: 30.00," |
| " PreviousContentBytes: 1," |
| " PreviousContentPercent: 6.25," |
| " PreviousNonBlankBytes: 1," |
| " PreviousNonBlankPercent: 7.69," |
| " ContainedTagDepth: 5," |
| " ContainedTagRelativeDepth: 2," |
| " ContainedTagCount: 7," |
| " ContainedTagPercent: 70.00," |
| " ContainedContentBytes: 15," |
| " ContainedContentPercent: 93.75," |
| " ContainedNonBlankBytes: 12," |
| " ContainedNonBlankPercent: 92.31," |
| " ContainedAContentBytes: 15," |
| " ContainedAContentLocalPercent: 100.00," |
| " a count: 3," |
| " a percent: 75.00," |
| " td count: 3," |
| " td percent: 100.00," |
| " tr count: 1," |
| " tr percent: 100.00," |
| " parent role is navigational-->\n" |
| " </table>" |
| "<!--id: hdr," |
| " ElementTagDepth: 2," |
| " PreviousTagCount: 2," |
| " PreviousTagPercent: 20.00," |
| " PreviousContentBytes: 1," |
| " PreviousContentPercent: 6.25," |
| " PreviousNonBlankBytes: 1," |
| " PreviousNonBlankPercent: 7.69," |
| " ContainedTagDepth: 5," |
| " ContainedTagRelativeDepth: 3," |
| " ContainedTagCount: 8," |
| " ContainedTagPercent: 80.00," |
| " ContainedContentBytes: 15," |
| " ContainedContentPercent: 93.75," |
| " ContainedNonBlankBytes: 12," |
| " ContainedNonBlankPercent: 92.31," |
| " ContainedAContentBytes: 15," |
| " ContainedAContentLocalPercent: 100.00," |
| " hdr: 1," |
| " menu: 1," |
| " nav: 1," |
| " a count: 3," |
| " a percent: 75.00," |
| " table count: 1," |
| " table percent: 100.00," |
| " td count: 3," |
| " td percent: 100.00," |
| " tr count: 1," |
| " tr percent: 100.00," |
| " parent role is navigational-->\n" |
| "</div>" |
| "<!--id: PageSpeed-1," |
| " role: navigational," |
| " ElementTagDepth: 1," |
| " ContainedTagDepth: 5," |
| " ContainedTagRelativeDepth: 4," |
| " ContainedTagCount: 10," |
| " ContainedTagPercent: 100.00," |
| " ContainedContentBytes: 16," |
| " ContainedContentPercent: 100.00," |
| " ContainedNonBlankBytes: 13," |
| " ContainedNonBlankPercent: 100.00," |
| " ContainedAContentBytes: 16," |
| " ContainedAContentLocalPercent: 100.00," |
| " a count: 4," |
| " a percent: 100.00," |
| " div count: 1," |
| " div percent: 100.00," |
| " table count: 1," |
| " table percent: 100.00," |
| " td count: 3," |
| " td percent: 100.00," |
| " tr count: 1," |
| " tr percent: 100.00-->\n" |
| "<script type=\"text/javascript\">" |
| "pagespeedNavigationalIds=['PageSpeed-1'];\n" |
| "</script></body>"; |
| ExpectTwoRuns("Small tabular nav", |
| Unlabel(kOutputHtml), kOutputHtml); |
| EXPECT_EQ(1, pages_labeled_->Get()); |
| EXPECT_EQ(1, pages_role_added_->Get()); |
| EXPECT_EQ(1, navigational_roles_->Get()); |
| EXPECT_EQ(0, header_roles_->Get()); |
| EXPECT_EQ(0, content_roles_->Get()); |
| EXPECT_EQ(0, marginal_roles_->Get()); |
| EXPECT_EQ(0, ambiguous_role_labels_->Get()); |
| EXPECT_EQ(2, divs_unlabeled_->Get()); |
| } |
| |
| TEST_F(MobilizeLabelFilterTest, NoLabelInsideA) { |
| // First, make sure we identify things correctly without <a> |
| const char kOutputHtmlNoA[] = |
| "<head></head><body>\n" |
| " <div role='header' id=\"PageSpeed-1\">\n" |
| " <h1><img src='logo.gif'>Header!</h1></div>\n" |
| " <div class='container' id=\"PageSpeed-2\">\n" |
| " <a href='a'>a</a>\n" |
| " <div class='menu' id='hdr' role='nav'>\n" |
| " <ul>\n" |
| " <li><a href='n1'>nav 1</a></li>\n" |
| " <li><a href='n2'>nav 2</a></li>\n" |
| " <li><a href='n3'>nav 3</a></li>\n" |
| " </ul>\n" |
| " </div>\n" |
| " </div>\n" |
| "<script type=\"text/javascript\">" |
| "pagespeedHeaderIds=['PageSpeed-1'];\n" |
| "pagespeedNavigationalIds=['PageSpeed-2'];\n" |
| "</script></body>"; |
| ExpectTwoRuns("Label not inside <a>", |
| Unlabel(kOutputHtmlNoA), kOutputHtmlNoA); |
| EXPECT_EQ(2, pages_labeled_->Get()); |
| EXPECT_EQ(1, pages_role_added_->Get()); |
| EXPECT_EQ(1, navigational_roles_->Get()); |
| EXPECT_EQ(1, header_roles_->Get()); |
| EXPECT_EQ(0, content_roles_->Get()); |
| EXPECT_EQ(0, marginal_roles_->Get()); |
| EXPECT_EQ(0, ambiguous_role_labels_->Get()); |
| EXPECT_EQ(2, divs_unlabeled_->Get()); |
| // Now make sure that inside <a> we don't identify the header, but we do |
| // identify the nav because it in turn contains nested links indicating that |
| // the outer <a> was an error. |
| const char kOutputHtmlWithA[] = |
| "<head></head><body>\n" |
| "<a href=top.html>\n" // Now enclosed by a |
| " <div role='header'>\n" // Not labeled => no id |
| " <h1><img src='logo.gif'>Header!</h1></div></a>\n" |
| "<a href=menu.html>\n" // Also enclosed by a |
| " <div class='container' id=\"PageSpeed-2-0\">\n" |
| " <a href='a'>a</a>\n" // But contains lots of a, so still label. |
| " <div class='menu' id='hdr' role='nav'>\n" |
| " <ul>\n" |
| " <li><a href='n1'>nav 1</a></li>\n" |
| " <li><a href='n2'>nav 2</a></li>\n" |
| " <li><a href='n3'>nav 3</a></li>\n" |
| " </ul>\n" |
| " </div>\n" |
| " </div>\n" |
| "</a>\n" |
| "<script type=\"text/javascript\">" // No header divs remain. |
| "pagespeedNavigationalIds=['PageSpeed-2-0'];\n" |
| "</script></body>"; |
| ResetStats(); |
| ExpectTwoRuns("No label inside <a>", |
| Unlabel(kOutputHtmlWithA), kOutputHtmlWithA); |
| EXPECT_EQ(2, pages_labeled_->Get()); |
| EXPECT_EQ(1, pages_role_added_->Get()); |
| EXPECT_EQ(1, navigational_roles_->Get()); |
| EXPECT_EQ(0, header_roles_->Get()); |
| EXPECT_EQ(0, content_roles_->Get()); |
| EXPECT_EQ(0, marginal_roles_->Get()); |
| EXPECT_EQ(0, ambiguous_role_labels_->Get()); |
| EXPECT_EQ(3, divs_unlabeled_->Get()); |
| } |
| |
| TEST_F(MobilizeLabelFilterTest, ConfiguredInclusionAndExclusion) { |
| // Here we use HTML5 tags to create a document that would normally classify a |
| // particular way, and use configuration to change the classification of those |
| // tags. |
| options()->set_mob_nav_classes("+ok,-no,yes"); |
| const char kOutputHtml[] = |
| "<head></head><body>\n" |
| " <header class='maybe ok yup' id=\"PageSpeed-1\">\n" |
| " <ul><li><a href='n1'>Actually navigational</a></ul>\n" |
| " </header>\n" |
| " <nav class='maybe ok yup' id='no'>\n" |
| " <nav id='no-a'>Nested forced nav</nav>\n" |
| " <header class='yes' id='no-b'>Overridden</header>\n" |
| " <ul><li><a href='n2'>Inherited non-navigational</a></ul>\n" |
| " </nav>\n" |
| " <nav class='yes no ok' id=\"PageSpeed-3\">\n" |
| " <ul><li><a href='n1'>Not navigational</a></ul>\n" |
| " </nav>\n" |
| " <em class='no' id='yes'>\n" |
| " Navigational\n" |
| " </em>\n" |
| " <span class='no' id=\"PageSpeed-5\">\n" |
| " Non-navigational\n" |
| " </span>\n" |
| " <script type=\"text/javascript\">" |
| "pagespeedNavigationalIds=['PageSpeed-1','no-a','no-b','yes'];\n" |
| "pagespeedMarginalIds=['no','PageSpeed-3','PageSpeed-5'];\n" |
| "</script>"; |
| ExpectTwoRuns("Configured inclusion and exclusion", |
| Unlabel(kOutputHtml), kOutputHtml); |
| } |
| |
| TEST_F(MobilizeLabelFilterTest, NavInsideHeader) { |
| // A common pattern in sites is to have a header area with a logo and some |
| // navigational content. We'd like to flag the navigational content! |
| EnableDebug(); |
| const char kOutputHtml[] = |
| "<head></head><body>\n" |
| " <header id=\"PageSpeed-1\" data-mobile-role=\"header\">\n" |
| " <img src='logo.gif'>\n" |
| " <ul id='nav_menu' data-mobile-role=\"navigational\">\n" |
| " <li><a href='about.html'>About us</a>\n" |
| " <li><a href='contact.html'>Contact</a>\n" |
| " <li><a href='faq.html'>FAQ</a>\n" |
| " </ul>" |
| "<!--id: nav_menu," |
| " role: navigational," |
| " ElementTagDepth: 2," |
| " PreviousTagCount: 2," |
| " PreviousTagPercent: 22.22," |
| " ContainedTagDepth: 4," |
| " ContainedTagRelativeDepth: 2," |
| " ContainedTagCount: 7," |
| " ContainedTagPercent: 77.78," |
| " ContainedContentBytes: 18," |
| " ContainedContentPercent: 100.00," |
| " ContainedNonBlankBytes: 17," |
| " ContainedNonBlankPercent: 100.00," |
| " ContainedAContentBytes: 18," |
| " ContainedAContentLocalPercent: 100.00," |
| " menu: 1," |
| " nav: 1," |
| " a count: 3," |
| " a percent: 100.00," |
| " li count: 3," |
| " li percent: 100.00," |
| " ul count: 1," |
| " ul percent: 100.00," |
| " parent role is header-->\n" |
| " </header>" |
| "<!--id: PageSpeed-1," |
| " role: header," |
| " ElementTagDepth: 1," |
| " ContainedTagDepth: 4," |
| " ContainedTagRelativeDepth: 3," |
| " ContainedTagCount: 9," |
| " ContainedTagPercent: 100.00," |
| " ContainedContentBytes: 18," |
| " ContainedContentPercent: 100.00," |
| " ContainedNonBlankBytes: 17," |
| " ContainedNonBlankPercent: 100.00," |
| " ContainedAContentBytes: 18," |
| " ContainedAContentLocalPercent: 100.00," |
| " ContainedNonAImgTag: 1," |
| " a count: 3," |
| " a percent: 100.00," |
| " div count: 1," |
| " div percent: 100.00," |
| " img count: 1," |
| " img percent: 100.00," |
| " li count: 3," |
| " li percent: 100.00," |
| " ul count: 1," |
| " ul percent: 100.00-->\n" |
| "<script type=\"text/javascript\">" |
| "pagespeedHeaderIds=['PageSpeed-1'];\n" |
| "pagespeedNavigationalIds=['nav_menu'];\n" |
| "</script></body>"; |
| ExpectTwoRuns("Nav inside header", |
| Unlabel(kOutputHtml), kOutputHtml); |
| EXPECT_EQ(1, pages_labeled_->Get()); |
| EXPECT_EQ(1, pages_role_added_->Get()); |
| EXPECT_EQ(1, navigational_roles_->Get()); |
| EXPECT_EQ(1, header_roles_->Get()); |
| EXPECT_EQ(0, content_roles_->Get()); |
| EXPECT_EQ(0, marginal_roles_->Get()); |
| EXPECT_EQ(0, ambiguous_role_labels_->Get()); |
| EXPECT_EQ(0, divs_unlabeled_->Get()); |
| } |
| |
| TEST_F(MobilizeLabelFilterTest, Html5TagsInBody) { |
| EnableDebug(); |
| // Just for clarity we include the labeled HTML without the sample comments |
| // emitted by debug. The input HTML is this with the data-mobile-role |
| // annotations stripped out. |
| const char kLabeledHtml[] = |
| "<head></head><body>\n" |
| "<nav data-mobile-role=\"navigational\">Labeled\n" |
| " <menu>unlabeled</menu>\n" |
| "</nav>\n" |
| "<menu data-mobile-role=\"navigational\">\n" |
| " Labeled</menu>\n" |
| "<header data-mobile-role=\"header\">\n" |
| " <h1>Labeled</h1></header>\n" |
| "<div id='body' data-mobile-role=\"content\">\n" |
| " <main>labeled\n" |
| " <article>\n" |
| " <section>unlabeled</section>\n" |
| " </article>\n" |
| " </main>\n" |
| " <article data-mobile-role=\"content\">also labeled</article>\n" |
| " <section data-mobile-role=\"content\">this too\n" |
| " <aside data-mobile-role=\"marginal\">\n" |
| " and this, it differs.</aside>\n" |
| " </section>\n" |
| "</div>\n" |
| "<aside data-mobile-role=\"marginal\">Labeled</aside>\n" |
| "<footer data-mobile-role=\"marginal\">labeled\n" |
| " <menu data-mobile-role=\"navigational\">\n" |
| " navvy</menu>\n" |
| "</footer>\n" |
| "</body>"; |
| // Note how the HTML5 tags used for training / instant classification are |
| // treated as divs in the instrumented data. |
| const char kOutputHtml[] = |
| "<head></head><body>\n" |
| "<nav id=\"PageSpeed-1\" data-mobile-role=\"navigational\">Labeled\n" |
| " <menu id=\"PageSpeed-1-0\">unlabeled</menu>" |
| "<!--id: PageSpeed-1-0," |
| " ElementTagDepth: 2," |
| " PreviousTagCount: 1," |
| " ContainedTagDepth: 2," |
| " ContainedTagRelativeDepth: 0," |
| " ContainedTagCount: 1," |
| " ContainedContentBytes: 9," |
| " ContainedNonBlankBytes: 9," |
| " ContainedNonAContentBytes: 9," |
| " div count: 1," |
| " parent role is navigational-->\n" |
| "</nav>" |
| "<!--id: PageSpeed-1," |
| " role: navigational," |
| " ElementTagDepth: 1," |
| " ContainedTagDepth: 2," |
| " ContainedTagRelativeDepth: 1," |
| " ContainedTagCount: 2," |
| " ContainedContentBytes: 16," |
| " ContainedNonBlankBytes: 16," |
| " ContainedNonAContentBytes: 16," |
| " div count: 2-->\n" |
| "<menu id=\"PageSpeed-2\" data-mobile-role=\"navigational\">\n" |
| " Labeled</menu>" |
| "<!--id: PageSpeed-2," |
| " role: navigational," |
| " ElementTagDepth: 1," |
| " PreviousTagCount: 2," |
| " ContainedTagDepth: 1," |
| " ContainedTagRelativeDepth: 0," |
| " ContainedTagCount: 1," |
| " ContainedContentBytes: 7," |
| " ContainedNonBlankBytes: 7," |
| " ContainedNonAContentBytes: 7," |
| " div count: 1-->\n" |
| "<header id=\"PageSpeed-3\" data-mobile-role=\"header\">\n" |
| " <h1>Labeled</h1></header>" |
| "<!--id: PageSpeed-3," |
| " role: header," |
| " ElementTagDepth: 1," |
| " PreviousTagCount: 3," |
| " ContainedTagDepth: 2," |
| " ContainedTagRelativeDepth: 1," |
| " ContainedTagCount: 2," |
| " ContainedContentBytes: 7," |
| " ContainedNonBlankBytes: 7," |
| " ContainedNonAContentBytes: 7," |
| " div count: 1," |
| " h1 count: 1-->\n" |
| "<div id='body' data-mobile-role=\"content\">\n" |
| " <main id=\"PageSpeed-body-0\">labeled\n" |
| " <article id=\"PageSpeed-body-0-0\">\n" |
| " <section id=\"PageSpeed-body-0-0-0\">unlabeled</section>" |
| "<!--id: PageSpeed-body-0-0-0," |
| " ElementTagDepth: 4," |
| " PreviousTagCount: 8," |
| " ContainedTagDepth: 4," |
| " ContainedTagRelativeDepth: 0," |
| " ContainedTagCount: 1," |
| " ContainedContentBytes: 9," |
| " ContainedNonBlankBytes: 9," |
| " ContainedNonAContentBytes: 9," |
| " section count: 1," |
| " parent role is content-->\n" |
| " </article>" |
| "<!--id: PageSpeed-body-0-0," |
| " ElementTagDepth: 3," |
| " PreviousTagCount: 7," |
| " ContainedTagDepth: 4," |
| " ContainedTagRelativeDepth: 1," |
| " ContainedTagCount: 2," |
| " ContainedContentBytes: 9," |
| " ContainedNonBlankBytes: 9," |
| " ContainedNonAContentBytes: 9," |
| " div count: 1," |
| " section count: 1," |
| " parent role is content-->\n" |
| " </main>" |
| "<!--id: PageSpeed-body-0," |
| " ElementTagDepth: 2," |
| " PreviousTagCount: 6," |
| " ContainedTagDepth: 4," |
| " ContainedTagRelativeDepth: 2," |
| " ContainedTagCount: 3," |
| " ContainedContentBytes: 16," |
| " ContainedNonBlankBytes: 16," |
| " ContainedNonAContentBytes: 16," |
| " div count: 2," |
| " section count: 1," |
| " parent role is content-->\n" |
| " <article id=\"PageSpeed-body-1\">also labeled</article>" |
| "<!--id: PageSpeed-body-1," |
| " ElementTagDepth: 2," |
| " PreviousTagCount: 9," |
| " ContainedTagDepth: 2," |
| " ContainedTagRelativeDepth: 0," |
| " ContainedTagCount: 1," |
| " ContainedContentBytes: 12," |
| " ContainedNonBlankBytes: 11," |
| " ContainedNonAContentBytes: 12," |
| " div count: 1," |
| " parent role is content-->\n" |
| " <section id=\"PageSpeed-body-2\">this too\n" |
| " <aside id=\"PageSpeed-body-2-0\" data-mobile-role=\"marginal\">\n" |
| " and this, it differs.</aside>" |
| "<!--id: PageSpeed-body-2-0," |
| " role: marginal," |
| " ElementTagDepth: 3," |
| " PreviousTagCount: 11," |
| " ContainedTagDepth: 3," |
| " ContainedTagRelativeDepth: 0," |
| " ContainedTagCount: 1," |
| " ContainedContentBytes: 21," |
| " ContainedNonBlankBytes: 18," |
| " ContainedNonAContentBytes: 21," |
| " div count: 1," |
| " parent role is content-->\n" |
| " </section>" |
| "<!--id: PageSpeed-body-2," |
| " ElementTagDepth: 2," |
| " PreviousTagCount: 10," |
| " ContainedTagDepth: 3," |
| " ContainedTagRelativeDepth: 1," |
| " ContainedTagCount: 2," |
| " ContainedContentBytes: 29," |
| " ContainedNonBlankBytes: 25," |
| " ContainedNonAContentBytes: 29," |
| " div count: 1," |
| " section count: 1," |
| " parent role is content-->\n" |
| "</div>" |
| "<!--id: body," |
| " role: content," |
| " ElementTagDepth: 1," |
| " PreviousTagCount: 5," |
| " ContainedTagDepth: 4," |
| " ContainedTagRelativeDepth: 3," |
| " ContainedTagCount: 7," |
| " ContainedContentBytes: 57," |
| " ContainedNonBlankBytes: 52," |
| " ContainedNonAContentBytes: 57," |
| " body: 1," |
| " div count: 5," |
| " section count: 2-->\n" |
| "<aside id=\"PageSpeed-5\" data-mobile-role=\"marginal\">Labeled</aside>" |
| "<!--id: PageSpeed-5," |
| " role: marginal," |
| " ElementTagDepth: 1," |
| " PreviousTagCount: 12," |
| " ContainedTagDepth: 1," |
| " ContainedTagRelativeDepth: 0," |
| " ContainedTagCount: 1," |
| " ContainedContentBytes: 7," |
| " ContainedNonBlankBytes: 7," |
| " ContainedNonAContentBytes: 7," |
| " div count: 1-->\n" |
| "<footer id=\"PageSpeed-6\" data-mobile-role=\"marginal\">labeled\n" |
| " <menu id=\"PageSpeed-6-0\" data-mobile-role=\"navigational\">\n" |
| " navvy</menu>" |
| "<!--id: PageSpeed-6-0," |
| " role: navigational," |
| " ElementTagDepth: 2," |
| " PreviousTagCount: 14," |
| " ContainedTagDepth: 2," |
| " ContainedTagRelativeDepth: 0," |
| " ContainedTagCount: 1," |
| " ContainedContentBytes: 5," |
| " ContainedNonBlankBytes: 5," |
| " ContainedNonAContentBytes: 5," |
| " div count: 1-->\n" |
| "</footer>" |
| "<!--id: PageSpeed-6," |
| " role: marginal," |
| " ElementTagDepth: 1," |
| " PreviousTagCount: 13," |
| " ContainedTagDepth: 2," |
| " ContainedTagRelativeDepth: 1," |
| " ContainedTagCount: 2," |
| " ContainedContentBytes: 12," |
| " ContainedNonBlankBytes: 12," |
| " ContainedNonAContentBytes: 12," |
| " div count: 2-->\n" |
| "<script type=\"text/javascript\">" |
| "pagespeedHeaderIds=['PageSpeed-3'];\n" |
| "pagespeedNavigationalIds=" |
| "['PageSpeed-1','PageSpeed-2','PageSpeed-6-0'];\n" |
| "pagespeedContentIds=['body'];\n" |
| "pagespeedMarginalIds=" |
| "['PageSpeed-body-2-0','PageSpeed-5','PageSpeed-6'];\n" |
| "</script></body>"; |
| Parse("html5_tags_in_body", Unlabel(kLabeledHtml)); |
| GoogleString xbody = StrCat(doctype_string_, AddHtmlBody(kOutputHtml)); |
| RemoveRedundantDataFromOutputBuffer(); |
| EXPECT_STREQ(xbody, output_buffer_) << "html5_tags_in_body"; |
| EXPECT_EQ(1, pages_labeled_->Get()); |
| EXPECT_EQ(1, pages_role_added_->Get()); |
| EXPECT_EQ(3, navigational_roles_->Get()); |
| EXPECT_EQ(1, header_roles_->Get()); |
| EXPECT_EQ(1, content_roles_->Get()); |
| EXPECT_EQ(3, marginal_roles_->Get()); |
| } |
| |
| TEST_F(MobilizeLabelFilterTest, LargeUnlabeled) { |
| options()->set_mob_always(false); |
| SetCurrentUserAgent( |
| UserAgentMatcherTestBase::kAndroidChrome21UserAgent); |
| StdioFileSystem filesystem; |
| GoogleString original_filename = |
| StrCat(GTestSrcDir(), kTestDataDir, kOriginal); |
| GoogleString original_contents; |
| ASSERT_TRUE(filesystem.ReadFile( |
| original_filename.c_str(), &original_contents, message_handler())); |
| GoogleString unlabeled_contents = Unlabel(original_contents); |
| // Classify fully, compare against gold labeling. |
| // Note that we don't necessarily match the labeling of the original! |
| GoogleString labeled_filename = |
| StrCat(GTestSrcDir(), kTestDataDir, kOriginalLabeled); |
| GoogleString labeled_contents; |
| ASSERT_TRUE(filesystem.ReadFile( |
| labeled_filename.c_str(), &labeled_contents, message_handler())); |
| ExpectTwoRuns("unlabeled_mobile", |
| unlabeled_contents, labeled_contents); |
| EXPECT_EQ(2, pages_labeled_->Get()); |
| EXPECT_EQ(1, pages_role_added_->Get()); |
| EXPECT_EQ(2, navigational_roles_->Get()); |
| EXPECT_EQ(2, header_roles_->Get()); |
| EXPECT_EQ(3, content_roles_->Get()); |
| EXPECT_EQ(1, marginal_roles_->Get()); |
| EXPECT_EQ(1, ambiguous_role_labels_->Get()); |
| EXPECT_EQ(12, divs_unlabeled_->Get()); |
| } |
| |
| TEST_F(MobilizeLabelFilterTest, LargeUnlabeledDesktop) { |
| options()->set_mob_always(false); |
| SetCurrentUserAgent(UserAgentMatcherTestBase::kChrome37UserAgent); |
| StdioFileSystem filesystem; |
| GoogleString original_filename = |
| StrCat(GTestSrcDir(), kTestDataDir, kOriginal); |
| GoogleString original_contents; |
| ASSERT_TRUE(filesystem.ReadFile( |
| original_filename.c_str(), &original_contents, message_handler())); |
| GoogleString unlabeled_contents = Unlabel(original_contents); |
| SetupPCache(StrCat(kTestDomain, "unlabeled_desktop.html")); |
| ValidateNoChanges("unlabeled_desktop", unlabeled_contents); |
| // The stats here reflect the fact that we compute the labeling on desktop but |
| // don't alter the page. |
| EXPECT_EQ(0, pages_labeled_->Get()); |
| EXPECT_EQ(1, pages_role_added_->Get()); |
| EXPECT_EQ(2, navigational_roles_->Get()); |
| EXPECT_EQ(2, header_roles_->Get()); |
| EXPECT_EQ(3, content_roles_->Get()); |
| EXPECT_EQ(1, marginal_roles_->Get()); |
| EXPECT_EQ(1, ambiguous_role_labels_->Get()); |
| EXPECT_EQ(12, divs_unlabeled_->Get()); |
| ResetStats(); |
| ValidateNoChanges("unlabeled_desktop", unlabeled_contents); |
| // And thereafter we should do no further computation for desktop. |
| EXPECT_EQ(0, pages_labeled_->Get()); |
| EXPECT_EQ(0, pages_role_added_->Get()); |
| EXPECT_EQ(0, navigational_roles_->Get()); |
| EXPECT_EQ(0, header_roles_->Get()); |
| EXPECT_EQ(0, content_roles_->Get()); |
| EXPECT_EQ(0, marginal_roles_->Get()); |
| EXPECT_EQ(0, ambiguous_role_labels_->Get()); |
| EXPECT_EQ(0, divs_unlabeled_->Get()); |
| } |
| |
| } // namespace |
| |
| } // namespace net_instaweb |