/*
 * Copyright 2014 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// Author: jmaessen@google.com (Jan-Willem Maessen)

#include "net/instaweb/rewriter/public/mobilize_label_filter.h"

#include "base/logging.h"
#include "net/instaweb/rewriter/public/add_ids_filter.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/rewriter/public/rewrite_options.h"
#include "net/instaweb/rewriter/public/rewrite_test_base.h"
#include "net/instaweb/rewriter/public/server_context.h"
#include "pagespeed/kernel/base/gtest.h"
#include "pagespeed/kernel/base/mock_message_handler.h"
#include "pagespeed/kernel/base/scoped_ptr.h"
#include "pagespeed/kernel/base/statistics.h"
#include "pagespeed/kernel/base/stdio_file_system.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/html/html_parse_test_base.h"
#include "pagespeed/kernel/http/user_agent_matcher_test_base.h"
#include "pagespeed/opt/http/mock_property_page.h"
#include "pagespeed/opt/http/property_cache.h"

namespace net_instaweb {

namespace {

const char kTestDataDir[] = "/net/instaweb/rewriter/testdata/";
const char kOriginal[] = "mobilize_test.html";
const char kOriginalHtml5[] = "mobilize_test_html5.html";
const char kOriginalLabeled[] = "mobilize_test_labeled.html";
const char kOriginalHtml5Labeled[] = "mobilize_test_html5_labeled.html";

class MobilizeLabelFilterTest : public RewriteTestBase {
 protected:
  MobilizeLabelFilterTest() {}

  virtual void SetUp() {
    RewriteTestBase::SetUp();
    add_ids_filter_.reset(new AddIdsFilter(rewrite_driver()));
    label_filter_.reset(
        new MobilizeLabelFilter(false /* is_menu_subfetch */,
                                rewrite_driver()));
    options()->set_mob_always(true);
    html_parse()->AddFilter(add_ids_filter_.get());
    html_parse()->AddFilter(label_filter_.get());
    const PropertyCache::Cohort* dom_cohort =
        SetupCohort(rewrite_driver()->server_context()->page_property_cache(),
                    RewriteDriver::kDomCohort);
    server_context()->set_dom_cohort(dom_cohort);
    SetHtmlMimetype();
    Statistics* stats = statistics();
    pages_labeled_ =
        stats->GetVariable(MobilizeLabelFilter::kPagesLabeled);
    pages_role_added_ =
        stats->GetVariable(MobilizeLabelFilter::kPagesRoleAdded);
    navigational_roles_ =
        stats->GetVariable(MobilizeLabelFilter::kNavigationalRoles);
    header_roles_ =
        stats->GetVariable(MobilizeLabelFilter::kHeaderRoles);
    content_roles_ =
        stats->GetVariable(MobilizeLabelFilter::kContentRoles);
    marginal_roles_ =
        stats->GetVariable(MobilizeLabelFilter::kMarginalRoles);
    divs_unlabeled_ =
        stats->GetVariable(MobilizeLabelFilter::kDivsUnlabeled);
    ambiguous_role_labels_ =
        stats->GetVariable(MobilizeLabelFilter::kAmbiguousRoleLabels);
  }

  // Remove data-mobile-role labeling from a labeled document
  GoogleString Unlabel(StringPiece labeled) {
    GoogleString result;
    labeled.CopyToString(&result);
    GlobalEraseBracketedSubstring(" data-mobile-role=\"", "\"", &result);
    GlobalEraseBracketedSubstring("<!--id: ", "-->", &result);
    GlobalReplaceSubstring("<!--No nodes labeled for mobilization-->", "",
                           &result);
    GlobalEraseBracketedSubstring(" id=\"PageSpeed-", "\"", &result);
    GlobalEraseBracketedSubstring("<script type=\"text/javascript\">",
                                  "</script>", &result);
    return result;
  }

  // Remove percentages and previous content bytes, which are very
  // input-sensitive, from output buffer so that we just check raw statistics
  // counts.
  void RemoveRedundantDataFromOutputBuffer() {
    GlobalEraseBracketedSubstring(
        "PreviousTagPercent:", ", ", &output_buffer_);
    GlobalEraseBracketedSubstring(
        "PreviousContentBytes:", ", ", &output_buffer_);
    GlobalEraseBracketedSubstring(
        "PreviousContentPercent:", ", ", &output_buffer_);
    GlobalEraseBracketedSubstring(
        "PreviousNonBlankBytes:", ", ", &output_buffer_);
    GlobalEraseBracketedSubstring(
        "PreviousNonBlankPercent:", ", ", &output_buffer_);
    GlobalEraseBracketedSubstring(
        "ContainedTagPercent:", ", ", &output_buffer_);
    GlobalEraseBracketedSubstring(
        "ContainedContentPercent:", ", ", &output_buffer_);
    GlobalEraseBracketedSubstring(
        "ContainedNonBlankPercent", ", ", &output_buffer_);
    GlobalReplaceSubstring("-->", ", -->", &output_buffer_);
    GlobalEraseBracketedSubstring("div percent:", ", ", &output_buffer_);
    GlobalEraseBracketedSubstring("h1 percent:", ", ", &output_buffer_);
    GlobalEraseBracketedSubstring("section percent:", ", ", &output_buffer_);
    GlobalReplaceSubstring(", -->", "-->", &output_buffer_);
  }

  void ResetStats() {
    pages_labeled_->Clear();
    pages_role_added_->Clear();
    navigational_roles_->Clear();
    header_roles_->Clear();
    content_roles_->Clear();
    marginal_roles_->Clear();
    divs_unlabeled_->Clear();
    ambiguous_role_labels_->Clear();
  }

  void SetupPCache(StringPiece url) {
    PropertyPage* page = NewMockPage(url);
    rewrite_driver()->set_property_page(page);
    rewrite_driver()->server_context()->page_property_cache()->Read(page);
  }

  bool ExpectTwoRuns(
      StringPiece case_id, StringPiece html_input, StringPiece html_output) {
    GoogleString url = StrCat(kTestDomain, case_id, ".html");
    SetupPCache(url);
    bool status1 = ValidateExpectedUrl(url, html_input, html_output);
    int labeled = pages_labeled_->Get();
    int role_added = pages_role_added_->Get();
    int navigational = navigational_roles_->Get();
    int header = header_roles_->Get();
    int content = content_roles_->Get();
    int marginal = marginal_roles_->Get();
    int ambiguous = ambiguous_role_labels_->Get();
    int unlabeled = divs_unlabeled_->Get();
    int multiplier = 2;
    if (rewrite_driver()->DebugMode()) {
      multiplier = 1;
      ResetStats();
    }
    LOG(INFO) << "Second go.  Multiplier " << multiplier;
    bool status2 = ValidateExpectedUrl(url, html_input, html_output);
    EXPECT_EQ(multiplier * labeled, pages_labeled_->Get());
    EXPECT_EQ(role_added, pages_role_added_->Get());
    EXPECT_EQ(navigational, navigational_roles_->Get());
    EXPECT_EQ(header, header_roles_->Get());
    EXPECT_EQ(content, content_roles_->Get());
    EXPECT_EQ(marginal, marginal_roles_->Get());
    EXPECT_EQ(ambiguous, ambiguous_role_labels_->Get());
    EXPECT_EQ(unlabeled, divs_unlabeled_->Get());
    return (status1 && status2);
  }

  scoped_ptr<AddIdsFilter> add_ids_filter_;
  scoped_ptr<MobilizeLabelFilter> label_filter_;
  Variable* pages_labeled_;
  Variable* pages_role_added_;
  Variable* navigational_roles_;
  Variable* header_roles_;
  Variable* content_roles_;
  Variable* marginal_roles_;
  Variable* divs_unlabeled_;
  Variable* ambiguous_role_labels_;

 private:
  DISALLOW_COPY_AND_ASSIGN(MobilizeLabelFilterTest);
};

TEST_F(MobilizeLabelFilterTest, AlreadyLabeled) {
  StdioFileSystem filesystem;
  GoogleString html5_filename =
      StrCat(GTestSrcDir(), kTestDataDir, kOriginalHtml5);
  GoogleString html5_contents;
  ASSERT_TRUE(filesystem.ReadFile(
      html5_filename.c_str(), &html5_contents, message_handler()));
  // Classify fully, compare against gold labeling.
  // Note that changes are fairly minimal.
  GoogleString labeled_filename =
      StrCat(GTestSrcDir(), kTestDataDir, kOriginalHtml5Labeled);
  GoogleString labeled_contents;
  ASSERT_TRUE(filesystem.ReadFile(
      labeled_filename.c_str(), &labeled_contents, message_handler()));
  ExpectTwoRuns("already_labeled_adding_labels",
                html5_contents, labeled_contents);
  EXPECT_EQ(2, pages_labeled_->Get());
  EXPECT_EQ(1, pages_role_added_->Get());
  EXPECT_EQ(2, navigational_roles_->Get());
  EXPECT_EQ(2, header_roles_->Get());
  EXPECT_EQ(3, content_roles_->Get());
  EXPECT_EQ(2, marginal_roles_->Get());
  EXPECT_EQ(0, ambiguous_role_labels_->Get());
  EXPECT_EQ(11, divs_unlabeled_->Get());
}

TEST_F(MobilizeLabelFilterTest, Html5TagsInHead) {
  const char kOutputHtml[] =
      "<head>\n"
      "<menu id=\"PageSpeed-0-0\">Now treated as a menu</menu>\n"
      "<header id=\"PageSpeed-0-1\"><h1>Also labeled</h1></header>\n"
      "<article id=\"PageSpeed-0-2\">Still labeled</article>\n"
      "<footer id=\"PageSpeed-0-3\">Also labeled</footer>\n"
      "</head>\n"
      "<script type=\"text/javascript\">"
      "pagespeedHeaderIds=['PageSpeed-0-1'];\n"
      "pagespeedNavigationalIds=['PageSpeed-0-0'];\n"
      "pagespeedContentIds=['PageSpeed-0-2'];\n"
      "pagespeedMarginalIds=['PageSpeed-0-3'];\n"
      "</script>";
  ExpectTwoRuns("html5_tags_in_head",
                Unlabel(kOutputHtml), kOutputHtml);
  EXPECT_EQ(2, pages_labeled_->Get());
  EXPECT_EQ(0, pages_role_added_->Get());
}

TEST_F(MobilizeLabelFilterTest, NoLabelableContent) {
  EnableDebug();
  const char kOutputHtml[] =
      "<body><p>\n"
      "Just a paragraph of plain old content.  "
      "Nothing to label here!"
      "</p>\n"
      "<!--No nodes labeled for mobilization-->";
  ExpectTwoRuns("no_labelable_content",
                Unlabel(kOutputHtml), kOutputHtml);
  EXPECT_EQ(1, pages_labeled_->Get());
  EXPECT_EQ(0, pages_role_added_->Get());
}

TEST_F(MobilizeLabelFilterTest, TinyCount) {
  EnableDebug();
  const char kOutputHtml[] =
      "<div role='header' id=\"PageSpeed-0\" data-mobile-role=\"header\">"
      "  Hello there,"
      " <a href='http://theworld.com/'>World</a></div>"
      "<!--id: PageSpeed-0,"
      " role: header,"
      " ElementTagDepth: 1,"
      " ContainedTagDepth: 2,"       // <a> tag
      " ContainedTagRelativeDepth: 1,"
      " ContainedTagCount: 2,"       // Includes <div> itself.
      " ContainedTagPercent: 100.00,"
      " ContainedContentBytes: 17,"  // Whitespace before <a> ignored.
      " ContainedContentPercent: 100.00,"
      " ContainedNonBlankBytes: 16,"
      " ContainedNonBlankPercent: 100.00,"
      " ContainedAContentBytes: 5,"
      " ContainedAContentLocalPercent: 29.41,"
      " ContainedNonAContentBytes: 12,"
      " head: 1,"
      " a count: 1,"
      " a percent: 100.00,"
      " div count: 1,"
      " div percent: 100.00-->\n"
      "<script type=\"text/javascript\">"
      "pagespeedHeaderIds=['PageSpeed-0'];\n"
      "</script>";
  ExpectTwoRuns("Small_count_nav",
                Unlabel(kOutputHtml), kOutputHtml);
  EXPECT_EQ(1, pages_labeled_->Get());
  EXPECT_EQ(1, pages_role_added_->Get());
  EXPECT_EQ(0, navigational_roles_->Get());
  EXPECT_EQ(1, header_roles_->Get());
  EXPECT_EQ(0, content_roles_->Get());
  EXPECT_EQ(0, marginal_roles_->Get());
  EXPECT_EQ(0, ambiguous_role_labels_->Get());
  EXPECT_EQ(0, divs_unlabeled_->Get());
}

TEST_F(MobilizeLabelFilterTest, TinyCountNbsp) {
  EnableDebug();
  const char kOutputHtml[] =
      "<div role='header' id=\"PageSpeed-0\" data-mobile-role=\"header\">"
      "  &nbsp;Hello&nbsp;there,&nbsp;&nbsp;  "
      " <a href='http://theworld.com/'>World</a></div>"
      "<!--id: PageSpeed-0,"
      " role: header,"
      " ElementTagDepth: 1,"
      " ContainedTagDepth: 2,"       // <a> tag
      " ContainedTagRelativeDepth: 1,"
      " ContainedTagCount: 2,"       // Includes <div> itself.
      " ContainedTagPercent: 100.00,"
      " ContainedContentBytes: 17,"  // Whitespace before <a> ignored.
      " ContainedContentPercent: 100.00,"
      " ContainedNonBlankBytes: 16,"
      " ContainedNonBlankPercent: 100.00,"
      " ContainedAContentBytes: 5,"
      " ContainedAContentLocalPercent: 29.41,"
      " ContainedNonAContentBytes: 12,"
      " head: 1,"
      " a count: 1,"
      " a percent: 100.00,"
      " div count: 1,"
      " div percent: 100.00-->\n"
      "<script type=\"text/javascript\">"
      "pagespeedHeaderIds=['PageSpeed-0'];\n"
      "</script>";
  ExpectTwoRuns("Small_count_nav_nbsp",
                Unlabel(kOutputHtml), kOutputHtml);
  EXPECT_EQ(1, pages_labeled_->Get());
  EXPECT_EQ(1, pages_role_added_->Get());
  EXPECT_EQ(0, navigational_roles_->Get());
  EXPECT_EQ(1, header_roles_->Get());
  EXPECT_EQ(0, content_roles_->Get());
  EXPECT_EQ(0, marginal_roles_->Get());
  EXPECT_EQ(0, ambiguous_role_labels_->Get());
  EXPECT_EQ(0, divs_unlabeled_->Get());
}

TEST_F(MobilizeLabelFilterTest, ImgInsideAndOutsideA) {
  EnableDebug();
  const char kOutputHtml[] =
      "<div role='content' id=\"PageSpeed-0\" data-mobile-role=\"header\">"
      " <img src='a.png'>"
      " <img src='b.jpg'>"
      " <a href='http://theworld.com/'><img src='world.gif'></a></div>"
      "<!--id: PageSpeed-0,"
      " role: header,"
      " ElementTagDepth: 1,"
      " ContainedTagDepth: 3,"       // <a><img></a>
      " ContainedTagRelativeDepth: 2,"
      " ContainedTagCount: 5,"       // Includes <div> itself.
      " ContainedTagPercent: 100.00,"
      " ContainedAImgTag: 1,"
      " ContainedAImgLocalPercent: 33.33,"
      " ContainedNonAImgTag: 2,"
      " content: 1,"
      " a count: 1,"
      " a percent: 100.00,"
      " div count: 1,"
      " div percent: 100.00,"
      " img count: 3,"
      " img percent: 100.00-->\n"
      "<script type=\"text/javascript\">"
      "pagespeedHeaderIds=['PageSpeed-0'];\n"
      "</script>";
  ExpectTwoRuns("Small count nav",
                Unlabel(kOutputHtml), kOutputHtml);
  EXPECT_EQ(1, pages_labeled_->Get());
  EXPECT_EQ(1, pages_role_added_->Get());
  EXPECT_EQ(0, navigational_roles_->Get());
  EXPECT_EQ(1, header_roles_->Get());
  EXPECT_EQ(0, content_roles_->Get());
  EXPECT_EQ(0, marginal_roles_->Get());
  EXPECT_EQ(0, ambiguous_role_labels_->Get());
  EXPECT_EQ(0, divs_unlabeled_->Get());
}

TEST_F(MobilizeLabelFilterTest, DontCrashWithUnicodeId) {
  const char kOutputHtml[] =
      "<header id='g\xc5\x82\xc3\xb3wna'>Header</header>\n"
      "<script type=\"text/javascript\">"
      "pagespeedHeaderIds=['g\xc5\x82\xc3\xb3wna'];\n"
      "</script>";
  ExpectTwoRuns("Unicode id", Unlabel(kOutputHtml), kOutputHtml);
}

TEST_F(MobilizeLabelFilterTest, DontCrashWithEmptyId) {
  const char kOutputHtml[] =
      "<header id=''>Header</header>\n"
      "<script type=\"text/javascript\">"
      "pagespeedHeaderIds=[''];\n"
      "</script>";
  ExpectTwoRuns("Empty id", Unlabel(kOutputHtml), kOutputHtml);
}

TEST_F(MobilizeLabelFilterTest, DontCrashWithBlankId) {
  const char kOutputHtml[] =
      "<header id>Header</header>\n"
      "<script type=\"text/javascript\">"
      "pagespeedHeaderIds=[''];\n"
      "</script>";
  ExpectTwoRuns("Blank id", Unlabel(kOutputHtml), kOutputHtml);
}

TEST_F(MobilizeLabelFilterTest, InternalQuotesAndSpacesInId) {
  const char kOutputHtml[] =
      "<header id=\"'Quotes'\\slashes\">Header</header>\n"
      "<script type=\"text/javascript\">"
      "pagespeedHeaderIds=['\\'Quotes\\'\\\\slashes'];\n"
      "</script>";
  ExpectTwoRuns("Quotes\\slashes in id", Unlabel(kOutputHtml), kOutputHtml);
}

TEST_F(MobilizeLabelFilterTest, CloseScriptInId) {
  const char kOutputHtml[] =
      "<header id='</script>'>Header</header>\n"
      "<script type=\"text/javascript\">"
      "pagespeedHeaderIds=['<\\/script>'];\n"
      "</script>";
  ExpectTwoRuns("Close script in id", Unlabel(kOutputHtml), kOutputHtml);
}

TEST_F(MobilizeLabelFilterTest, DontCrashWithFlush) {
  // Note that we cannot remove unused ids inserted before the flush.
  const char kBody1[] =
      "<html><head></head><body>\n"
      "<div id=\"PageSpeed-1\">\n"
      "<div role='nav' id=\"PageSpeed-1-0\"><a href='http://theworld.com/'>\n"
      "Hello, World\n"
      "</a></div>";
  const char kBody2[] =
      "</div>\n"
      "<script type=\"text/javascript\">"
      "pagespeedNavigationalIds=['PageSpeed-1'];\n"
      "</script>"
      "</body></html>";
  SetupWriter();
  rewrite_driver()->StartParse(kTestDomain);
  rewrite_driver()->ParseText(Unlabel(kBody1));
  rewrite_driver()->Flush();
  rewrite_driver()->ParseText(Unlabel(kBody2));
  rewrite_driver()->FinishParse();
  GoogleString expected = StrCat(kBody1, kBody2);
  EXPECT_STREQ(expected, output_buffer_);
}

TEST_F(MobilizeLabelFilterTest, DontCrashWithFlushAndDebug) {
  EnableDebug();
  // We can't insert helpful comments because the tags aren't rewritable
  // anymore.  Note that this is true even for the spanning <div>, where we
  // arguably ought to be able to insert *after* the closing tag as it's still
  // in the flush window.
  const char kBody1[] =
      "<html><head></head><body>\n"
      "<div id=\"PageSpeed-1\">\n"
      "<div role='nav' id=\"PageSpeed-1-0\"><a href='http://theworld.com/'>\n"
      "Hello, World\n"
      "</a></div>";
  const char kBody2[] =
      "</div>\n"
      "<!--id: PageSpeed-1,"
      " role: navigational,"
      " ElementTagDepth: 1,"
      " ContainedTagDepth: 3,"
      " ContainedTagRelativeDepth: 2,"
      " ContainedTagCount: 3,"
      " ContainedTagPercent: 100.00,"
      " ContainedContentBytes: 12,"
      " ContainedContentPercent: 100.00,"
      " ContainedNonBlankBytes: 11,"
      " ContainedNonBlankPercent: 100.00,"
      " ContainedAContentBytes: 12,"
      " ContainedAContentLocalPercent: 100.00,"
      " a count: 1,"
      " a percent: 100.00,"
      " div count: 2,"
      " div percent: 100.00-->"
      "<!--id: PageSpeed-1-0,"
      " ElementTagDepth: 2,"
      " PreviousTagCount: 1,"
      " PreviousTagPercent: 33.33,"
      " ContainedTagDepth: 3,"
      " ContainedTagRelativeDepth: 1,"
      " ContainedTagCount: 2,"
      " ContainedTagPercent: 66.67,"
      " ContainedContentBytes: 12,"
      " ContainedContentPercent: 100.00,"
      " ContainedNonBlankBytes: 11,"
      " ContainedNonBlankPercent: 100.00,"
      " ContainedAContentBytes: 12,"
      " ContainedAContentLocalPercent: 100.00,"
      " nav: 1,"
      " a count: 1,"
      " a percent: 100.00,"
      " div count: 1,"
      " div percent: 50.00,"
      " parent role is navigational-->"
      "<script type=\"text/javascript\">"
      "pagespeedNavigationalIds=['PageSpeed-1'];\n"
      "</script>"
      "</body></html>";
  SetupWriter();
  rewrite_driver()->StartParse(kTestDomain);
  rewrite_driver()->ParseText(Unlabel(kBody1));
  rewrite_driver()->Flush();
  rewrite_driver()->ParseText(Unlabel(kBody2));
  rewrite_driver()->FinishParse();
  GoogleString expected = StrCat(kBody1, kBody2);
  EXPECT_STREQ(expected, output_buffer_);
}

TEST_F(MobilizeLabelFilterTest, DontCrashWithMarginalChildOfNav) {
  const char kOutputHtml[] =
      "<div id='a'>\n"
      "  <ul id='b'>\n"
      "    <li><a href='/'>Drive</a></li>\n"
      "  </ul>\n"
      "  <ul id='c'>\n"
      "    <li><a href='R8'>R8</a></li>\n"
      "  </ul>\n"
      "</div>\n"
      "<script type=\"text/javascript\">"
      "pagespeedNavigationalIds=['a'];\n"
      "pagespeedMarginalIds=['c'];\n"
      "</script>";
  ExpectTwoRuns("DontCrashWithMarginalChild",
                Unlabel(kOutputHtml), kOutputHtml);
}

TEST_F(MobilizeLabelFilterTest, MarginalPropagation) {
  // Test that marginal content gets labeled as such, and the
  // labels get propagated up the DOM (but only as far as the
  // outermost parent that isn't otherwise labeled).
  const char kOutputHtml[] =
      "<div>\n"
      " <div data-mobile-role='header' id=\"PageSpeed-0-0\">header</div>\n"
      " <div id=\"PageSpeed-0-1\">\n"
      "  <p>Content</p>\n"
      "  <p>More content</p>\n"
      "  <p>Still more content</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      "  <p>Are we still here? This is really quite a lot of content.</p>\n"
      " </div>\n"
      " <div id=\"PageSpeed-0-2\">\n"
      "  A Marginal Title\n"
      "  <div role='footer'><a>footer</a></div>\n"
      "  <div role='junk'><a>junk</a></div>\n"
      "  <div><a>more junk</a></div>\n"
      " </div>\n"
      "</div>"
      "<script type=\"text/javascript\">"
      "pagespeedHeaderIds=['PageSpeed-0-0'];\n"
      "pagespeedContentIds=['PageSpeed-0-1'];\n"
      "pagespeedMarginalIds=['PageSpeed-0-2'];\n"
      "</script>";
  ExpectTwoRuns("Marginal propagation",
                Unlabel(kOutputHtml), kOutputHtml);
  EXPECT_EQ(2, pages_labeled_->Get());
  EXPECT_EQ(1, pages_role_added_->Get());
  EXPECT_EQ(0, navigational_roles_->Get());
  EXPECT_EQ(1, header_roles_->Get());
  EXPECT_EQ(1, content_roles_->Get());
  EXPECT_EQ(1, marginal_roles_->Get());
  EXPECT_EQ(0, ambiguous_role_labels_->Get());
  EXPECT_EQ(4, divs_unlabeled_->Get());
}

TEST_F(MobilizeLabelFilterTest, ParentPropagation) {
  options()->set_log_mobilization_samples(true);
  // Make sure an element all of whose children are labeled inherits the label,
  // and an element whose children's labels conflict does not.
  const char kOutputHtml[] =
      "<div>\n"  // One nav, one header, one content -> no label.
      " <header id=\"PageSpeed-0-0\"></header>\n"
      " <nav id=\"PageSpeed-0-1\"></nav>\n"
      "</div>\n"
      "<div id=\"PageSpeed-1\">\n"  // Both children nav.
      " <div>\n"  // Only child is nav, so nav.
      "  <nav></nav>\n"
      " </div>\n"
      " <nav></nav>\n"
      "</div>\n"
      "<script type=\"text/javascript\">"
      "pagespeedHeaderIds=['PageSpeed-0-0'];\n"
      "pagespeedNavigationalIds=['PageSpeed-0-1','PageSpeed-1'];\n"
      "</script>";
  ExpectTwoRuns("Parent propagation",
                Unlabel(kOutputHtml), kOutputHtml);
}

TEST_F(MobilizeLabelFilterTest, SmallCountNav) {
  EnableDebug();
  const char kOutputHtml[] =
      "<head></head><body>\n"
      "<div class='container' id=\"PageSpeed-1\""
      " data-mobile-role=\"navigational\">\n"
      " <a href='a'>a</a>\n"
      " <div class='menu' id='hdr' role='nav'>\n"
      "  <ul id=\"PageSpeed-hdr-0\">\n"
      "   <li><a href='n1'>nav 1</a></li>\n"
      "   <li><a href='n2'>nav 2</a></li>\n"
      "   <li><a href='n3'>nav 3</a></li>\n"
      "  </ul>"
      "<!--id: PageSpeed-hdr-0,"
      " ElementTagDepth: 3,"
      " PreviousTagCount: 3,"
      " PreviousTagPercent: 30.00,"
      " PreviousContentBytes: 1,"
      " PreviousContentPercent: 6.25,"
      " PreviousNonBlankBytes: 1,"
      " PreviousNonBlankPercent: 7.69,"
      " ContainedTagDepth: 5,"
      " ContainedTagRelativeDepth: 2,"
      " ContainedTagCount: 7,"
      " ContainedTagPercent: 70.00,"
      " ContainedContentBytes: 15,"
      " ContainedContentPercent: 93.75,"
      " ContainedNonBlankBytes: 12,"
      " ContainedNonBlankPercent: 92.31,"
      " ContainedAContentBytes: 15,"
      " ContainedAContentLocalPercent: 100.00,"
      " a count: 3,"
      " a percent: 75.00,"
      " li count: 3,"
      " li percent: 100.00,"
      " ul count: 1,"
      " ul percent: 100.00-->\n"
      " </div>"
      "<!--id: hdr,"
      " ElementTagDepth: 2,"
      " PreviousTagCount: 2,"
      " PreviousTagPercent: 20.00,"
      " PreviousContentBytes: 1,"
      " PreviousContentPercent: 6.25,"
      " PreviousNonBlankBytes: 1,"
      " PreviousNonBlankPercent: 7.69,"
      " ContainedTagDepth: 5,"
      " ContainedTagRelativeDepth: 3,"
      " ContainedTagCount: 8,"
      " ContainedTagPercent: 80.00,"
      " ContainedContentBytes: 15,"
      " ContainedContentPercent: 93.75,"
      " ContainedNonBlankBytes: 12,"
      " ContainedNonBlankPercent: 92.31,"
      " ContainedAContentBytes: 15,"
      " ContainedAContentLocalPercent: 100.00,"
      " hdr: 1,"
      " menu: 1,"
      " nav: 1,"
      " a count: 3,"
      " a percent: 75.00,"
      " div count: 1,"
      " div percent: 50.00,"
      " li count: 3,"
      " li percent: 100.00,"
      " ul count: 1,"
      " ul percent: 100.00-->\n"
      "</div>"
      "<!--id: PageSpeed-1,"
      " role: navigational,"
      " ElementTagDepth: 1,"
      " ContainedTagDepth: 5,"
      " ContainedTagRelativeDepth: 4,"
      " ContainedTagCount: 10,"
      " ContainedTagPercent: 100.00,"
      " ContainedContentBytes: 16,"
      " ContainedContentPercent: 100.00,"
      " ContainedNonBlankBytes: 13,"
      " ContainedNonBlankPercent: 100.00,"
      " ContainedAContentBytes: 16,"
      " ContainedAContentLocalPercent: 100.00,"
      " a count: 4,"
      " a percent: 100.00,"
      " div count: 2,"
      " div percent: 100.00,"
      " li count: 3,"
      " li percent: 100.00,"
      " ul count: 1,"
      " ul percent: 100.00-->\n"
      "<script type=\"text/javascript\">"
      "pagespeedNavigationalIds=['PageSpeed-1'];\n"
      "</script></body>";
  ExpectTwoRuns("Small count nav",
                Unlabel(kOutputHtml), kOutputHtml);
  EXPECT_EQ(1, pages_labeled_->Get());
  EXPECT_EQ(1, pages_role_added_->Get());
  EXPECT_EQ(1, navigational_roles_->Get());
  EXPECT_EQ(0, header_roles_->Get());
  EXPECT_EQ(0, content_roles_->Get());
  EXPECT_EQ(0, marginal_roles_->Get());
  EXPECT_EQ(0, ambiguous_role_labels_->Get());
  EXPECT_EQ(2, divs_unlabeled_->Get());
}

TEST_F(MobilizeLabelFilterTest, SmallCountTabularNav) {
  EnableDebug();
  const char kOutputHtml[] =
      "<head></head><body>\n"
      "<div class='container' id=\"PageSpeed-1\""
      " data-mobile-role=\"navigational\">\n"
      " <a href='a'>a</a>\n"
      " <table class='menu' id='hdr' role='nav'>\n"
      "  <tr id=\"PageSpeed-hdr-0\">\n"
      "   <td><a href='n1'>nav 1</a></td>\n"
      "   <td><a href='n2'>nav 2</a></td>\n"
      "   <td><a href='n3'>nav 3</a></td>\n"
      "  </tr>"
      "<!--id: PageSpeed-hdr-0,"
      " ElementTagDepth: 3,"
      " PreviousTagCount: 3,"
      " PreviousTagPercent: 30.00,"
      " PreviousContentBytes: 1,"
      " PreviousContentPercent: 6.25,"
      " PreviousNonBlankBytes: 1,"
      " PreviousNonBlankPercent: 7.69,"
      " ContainedTagDepth: 5,"
      " ContainedTagRelativeDepth: 2,"
      " ContainedTagCount: 7,"
      " ContainedTagPercent: 70.00,"
      " ContainedContentBytes: 15,"
      " ContainedContentPercent: 93.75,"
      " ContainedNonBlankBytes: 12,"
      " ContainedNonBlankPercent: 92.31,"
      " ContainedAContentBytes: 15,"
      " ContainedAContentLocalPercent: 100.00,"
      " a count: 3,"
      " a percent: 75.00,"
      " td count: 3,"
      " td percent: 100.00,"
      " tr count: 1,"
      " tr percent: 100.00,"
      " parent role is navigational-->\n"
      " </table>"
      "<!--id: hdr,"
      " ElementTagDepth: 2,"
      " PreviousTagCount: 2,"
      " PreviousTagPercent: 20.00,"
      " PreviousContentBytes: 1,"
      " PreviousContentPercent: 6.25,"
      " PreviousNonBlankBytes: 1,"
      " PreviousNonBlankPercent: 7.69,"
      " ContainedTagDepth: 5,"
      " ContainedTagRelativeDepth: 3,"
      " ContainedTagCount: 8,"
      " ContainedTagPercent: 80.00,"
      " ContainedContentBytes: 15,"
      " ContainedContentPercent: 93.75,"
      " ContainedNonBlankBytes: 12,"
      " ContainedNonBlankPercent: 92.31,"
      " ContainedAContentBytes: 15,"
      " ContainedAContentLocalPercent: 100.00,"
      " hdr: 1,"
      " menu: 1,"
      " nav: 1,"
      " a count: 3,"
      " a percent: 75.00,"
      " table count: 1,"
      " table percent: 100.00,"
      " td count: 3,"
      " td percent: 100.00,"
      " tr count: 1,"
      " tr percent: 100.00,"
      " parent role is navigational-->\n"
      "</div>"
      "<!--id: PageSpeed-1,"
      " role: navigational,"
      " ElementTagDepth: 1,"
      " ContainedTagDepth: 5,"
      " ContainedTagRelativeDepth: 4,"
      " ContainedTagCount: 10,"
      " ContainedTagPercent: 100.00,"
      " ContainedContentBytes: 16,"
      " ContainedContentPercent: 100.00,"
      " ContainedNonBlankBytes: 13,"
      " ContainedNonBlankPercent: 100.00,"
      " ContainedAContentBytes: 16,"
      " ContainedAContentLocalPercent: 100.00,"
      " a count: 4,"
      " a percent: 100.00,"
      " div count: 1,"
      " div percent: 100.00,"
      " table count: 1,"
      " table percent: 100.00,"
      " td count: 3,"
      " td percent: 100.00,"
      " tr count: 1,"
      " tr percent: 100.00-->\n"
      "<script type=\"text/javascript\">"
      "pagespeedNavigationalIds=['PageSpeed-1'];\n"
      "</script></body>";
  ExpectTwoRuns("Small tabular nav",
                Unlabel(kOutputHtml), kOutputHtml);
  EXPECT_EQ(1, pages_labeled_->Get());
  EXPECT_EQ(1, pages_role_added_->Get());
  EXPECT_EQ(1, navigational_roles_->Get());
  EXPECT_EQ(0, header_roles_->Get());
  EXPECT_EQ(0, content_roles_->Get());
  EXPECT_EQ(0, marginal_roles_->Get());
  EXPECT_EQ(0, ambiguous_role_labels_->Get());
  EXPECT_EQ(2, divs_unlabeled_->Get());
}

TEST_F(MobilizeLabelFilterTest, NoLabelInsideA) {
  // First, make sure we identify things correctly without <a>
  const char kOutputHtmlNoA[] =
      "<head></head><body>\n"
      "  <div role='header' id=\"PageSpeed-1\">\n"
      "    <h1><img src='logo.gif'>Header!</h1></div>\n"
      " <div class='container' id=\"PageSpeed-2\">\n"
      "  <a href='a'>a</a>\n"
      "  <div class='menu' id='hdr' role='nav'>\n"
      "   <ul>\n"
      "    <li><a href='n1'>nav 1</a></li>\n"
      "    <li><a href='n2'>nav 2</a></li>\n"
      "    <li><a href='n3'>nav 3</a></li>\n"
      "   </ul>\n"
      "  </div>\n"
      " </div>\n"
      "<script type=\"text/javascript\">"
      "pagespeedHeaderIds=['PageSpeed-1'];\n"
      "pagespeedNavigationalIds=['PageSpeed-2'];\n"
      "</script></body>";
  ExpectTwoRuns("Label not inside <a>",
                Unlabel(kOutputHtmlNoA), kOutputHtmlNoA);
  EXPECT_EQ(2, pages_labeled_->Get());
  EXPECT_EQ(1, pages_role_added_->Get());
  EXPECT_EQ(1, navigational_roles_->Get());
  EXPECT_EQ(1, header_roles_->Get());
  EXPECT_EQ(0, content_roles_->Get());
  EXPECT_EQ(0, marginal_roles_->Get());
  EXPECT_EQ(0, ambiguous_role_labels_->Get());
  EXPECT_EQ(2, divs_unlabeled_->Get());
  // Now make sure that inside <a> we don't identify the header, but we do
  // identify the nav because it in turn contains nested links indicating that
  // the outer <a> was an error.
  const char kOutputHtmlWithA[] =
      "<head></head><body>\n"
      "<a href=top.html>\n"  // Now enclosed by a
      "  <div role='header'>\n"  // Not labeled => no id
      "    <h1><img src='logo.gif'>Header!</h1></div></a>\n"
      "<a href=menu.html>\n"  // Also enclosed by a
      " <div class='container' id=\"PageSpeed-2-0\">\n"
      "  <a href='a'>a</a>\n"  // But contains lots of a, so still label.
      "  <div class='menu' id='hdr' role='nav'>\n"
      "   <ul>\n"
      "    <li><a href='n1'>nav 1</a></li>\n"
      "    <li><a href='n2'>nav 2</a></li>\n"
      "    <li><a href='n3'>nav 3</a></li>\n"
      "   </ul>\n"
      "  </div>\n"
      " </div>\n"
      "</a>\n"
      "<script type=\"text/javascript\">"  // No header divs remain.
      "pagespeedNavigationalIds=['PageSpeed-2-0'];\n"
      "</script></body>";
  ResetStats();
  ExpectTwoRuns("No label inside <a>",
                Unlabel(kOutputHtmlWithA), kOutputHtmlWithA);
  EXPECT_EQ(2, pages_labeled_->Get());
  EXPECT_EQ(1, pages_role_added_->Get());
  EXPECT_EQ(1, navigational_roles_->Get());
  EXPECT_EQ(0, header_roles_->Get());
  EXPECT_EQ(0, content_roles_->Get());
  EXPECT_EQ(0, marginal_roles_->Get());
  EXPECT_EQ(0, ambiguous_role_labels_->Get());
  EXPECT_EQ(3, divs_unlabeled_->Get());
}

TEST_F(MobilizeLabelFilterTest, ConfiguredInclusionAndExclusion) {
  // Here we use HTML5 tags to create a document that would normally classify a
  // particular way, and use configuration to change the classification of those
  // tags.
  options()->set_mob_nav_classes("+ok,-no,yes");
  const char kOutputHtml[] =
      "<head></head><body>\n"
      " <header class='maybe ok yup' id=\"PageSpeed-1\">\n"
      "  <ul><li><a href='n1'>Actually navigational</a></ul>\n"
      " </header>\n"
      " <nav class='maybe ok yup' id='no'>\n"
      "  <nav id='no-a'>Nested forced nav</nav>\n"
      "  <header class='yes' id='no-b'>Overridden</header>\n"
      "  <ul><li><a href='n2'>Inherited non-navigational</a></ul>\n"
      " </nav>\n"
      " <nav class='yes no ok' id=\"PageSpeed-3\">\n"
      "  <ul><li><a href='n1'>Not navigational</a></ul>\n"
      " </nav>\n"
      " <em class='no' id='yes'>\n"
      "  Navigational\n"
      " </em>\n"
      " <span class='no' id=\"PageSpeed-5\">\n"
      "  Non-navigational\n"
      " </span>\n"
      " <script type=\"text/javascript\">"
      "pagespeedNavigationalIds=['PageSpeed-1','no-a','no-b','yes'];\n"
      "pagespeedMarginalIds=['no','PageSpeed-3','PageSpeed-5'];\n"
      "</script>";
  ExpectTwoRuns("Configured inclusion and exclusion",
                Unlabel(kOutputHtml), kOutputHtml);
}

TEST_F(MobilizeLabelFilterTest, NavInsideHeader) {
  // A common pattern in sites is to have a header area with a logo and some
  // navigational content.  We'd like to flag the navigational content!
  EnableDebug();
  const char kOutputHtml[] =
      "<head></head><body>\n"
      " <header id=\"PageSpeed-1\" data-mobile-role=\"header\">\n"
      "  <img src='logo.gif'>\n"
      "  <ul id='nav_menu' data-mobile-role=\"navigational\">\n"
      "   <li><a href='about.html'>About us</a>\n"
      "   <li><a href='contact.html'>Contact</a>\n"
      "   <li><a href='faq.html'>FAQ</a>\n"
      "  </ul>"
      "<!--id: nav_menu,"
      " role: navigational,"
      " ElementTagDepth: 2,"
      " PreviousTagCount: 2,"
      " PreviousTagPercent: 22.22,"
      " ContainedTagDepth: 4,"
      " ContainedTagRelativeDepth: 2,"
      " ContainedTagCount: 7,"
      " ContainedTagPercent: 77.78,"
      " ContainedContentBytes: 18,"
      " ContainedContentPercent: 100.00,"
      " ContainedNonBlankBytes: 17,"
      " ContainedNonBlankPercent: 100.00,"
      " ContainedAContentBytes: 18,"
      " ContainedAContentLocalPercent: 100.00,"
      " menu: 1,"
      " nav: 1,"
      " a count: 3,"
      " a percent: 100.00,"
      " li count: 3,"
      " li percent: 100.00,"
      " ul count: 1,"
      " ul percent: 100.00,"
      " parent role is header-->\n"
      " </header>"
      "<!--id: PageSpeed-1,"
      " role: header,"
      " ElementTagDepth: 1,"
      " ContainedTagDepth: 4,"
      " ContainedTagRelativeDepth: 3,"
      " ContainedTagCount: 9,"
      " ContainedTagPercent: 100.00,"
      " ContainedContentBytes: 18,"
      " ContainedContentPercent: 100.00,"
      " ContainedNonBlankBytes: 17,"
      " ContainedNonBlankPercent: 100.00,"
      " ContainedAContentBytes: 18,"
      " ContainedAContentLocalPercent: 100.00,"
      " ContainedNonAImgTag: 1,"
      " a count: 3,"
      " a percent: 100.00,"
      " div count: 1,"
      " div percent: 100.00,"
      " img count: 1,"
      " img percent: 100.00,"
      " li count: 3,"
      " li percent: 100.00,"
      " ul count: 1,"
      " ul percent: 100.00-->\n"
      "<script type=\"text/javascript\">"
      "pagespeedHeaderIds=['PageSpeed-1'];\n"
      "pagespeedNavigationalIds=['nav_menu'];\n"
      "</script></body>";
  ExpectTwoRuns("Nav inside header",
                Unlabel(kOutputHtml), kOutputHtml);
  EXPECT_EQ(1, pages_labeled_->Get());
  EXPECT_EQ(1, pages_role_added_->Get());
  EXPECT_EQ(1, navigational_roles_->Get());
  EXPECT_EQ(1, header_roles_->Get());
  EXPECT_EQ(0, content_roles_->Get());
  EXPECT_EQ(0, marginal_roles_->Get());
  EXPECT_EQ(0, ambiguous_role_labels_->Get());
  EXPECT_EQ(0, divs_unlabeled_->Get());
}

TEST_F(MobilizeLabelFilterTest, Html5TagsInBody) {
  EnableDebug();
  // Just for clarity we include the labeled HTML without the sample comments
  // emitted by debug.  The input HTML is this with the data-mobile-role
  // annotations stripped out.
  const char kLabeledHtml[] =
      "<head></head><body>\n"
      "<nav data-mobile-role=\"navigational\">Labeled\n"
      "  <menu>unlabeled</menu>\n"
      "</nav>\n"
      "<menu data-mobile-role=\"navigational\">\n"
      "  Labeled</menu>\n"
      "<header data-mobile-role=\"header\">\n"
      "  <h1>Labeled</h1></header>\n"
      "<div id='body' data-mobile-role=\"content\">\n"
      "  <main>labeled\n"
      "    <article>\n"
      "      <section>unlabeled</section>\n"
      "    </article>\n"
      "  </main>\n"
      "  <article data-mobile-role=\"content\">also labeled</article>\n"
      "  <section data-mobile-role=\"content\">this too\n"
      "    <aside data-mobile-role=\"marginal\">\n"
      "      and this, it differs.</aside>\n"
      "  </section>\n"
      "</div>\n"
      "<aside data-mobile-role=\"marginal\">Labeled</aside>\n"
      "<footer data-mobile-role=\"marginal\">labeled\n"
      "  <menu data-mobile-role=\"navigational\">\n"
      "    navvy</menu>\n"
      "</footer>\n"
      "</body>";
  // Note how the HTML5 tags used for training / instant classification are
  // treated as divs in the instrumented data.
  const char kOutputHtml[] =
      "<head></head><body>\n"
      "<nav id=\"PageSpeed-1\" data-mobile-role=\"navigational\">Labeled\n"
      "  <menu id=\"PageSpeed-1-0\">unlabeled</menu>"
      "<!--id: PageSpeed-1-0,"
      " ElementTagDepth: 2,"
      " PreviousTagCount: 1,"
      " ContainedTagDepth: 2,"
      " ContainedTagRelativeDepth: 0,"
      " ContainedTagCount: 1,"
      " ContainedContentBytes: 9,"
      " ContainedNonBlankBytes: 9,"
      " ContainedNonAContentBytes: 9,"
      " div count: 1,"
      " parent role is navigational-->\n"
      "</nav>"
      "<!--id: PageSpeed-1,"
      " role: navigational,"
      " ElementTagDepth: 1,"
      " ContainedTagDepth: 2,"
      " ContainedTagRelativeDepth: 1,"
      " ContainedTagCount: 2,"
      " ContainedContentBytes: 16,"
      " ContainedNonBlankBytes: 16,"
      " ContainedNonAContentBytes: 16,"
      " div count: 2-->\n"
      "<menu id=\"PageSpeed-2\" data-mobile-role=\"navigational\">\n"
      "  Labeled</menu>"
      "<!--id: PageSpeed-2,"
      " role: navigational,"
      " ElementTagDepth: 1,"
      " PreviousTagCount: 2,"
      " ContainedTagDepth: 1,"
      " ContainedTagRelativeDepth: 0,"
      " ContainedTagCount: 1,"
      " ContainedContentBytes: 7,"
      " ContainedNonBlankBytes: 7,"
      " ContainedNonAContentBytes: 7,"
      " div count: 1-->\n"
      "<header id=\"PageSpeed-3\" data-mobile-role=\"header\">\n"
      "  <h1>Labeled</h1></header>"
      "<!--id: PageSpeed-3,"
      " role: header,"
      " ElementTagDepth: 1,"
      " PreviousTagCount: 3,"
      " ContainedTagDepth: 2,"
      " ContainedTagRelativeDepth: 1,"
      " ContainedTagCount: 2,"
      " ContainedContentBytes: 7,"
      " ContainedNonBlankBytes: 7,"
      " ContainedNonAContentBytes: 7,"
      " div count: 1,"
      " h1 count: 1-->\n"
      "<div id='body' data-mobile-role=\"content\">\n"
      "  <main id=\"PageSpeed-body-0\">labeled\n"
      "    <article id=\"PageSpeed-body-0-0\">\n"
      "      <section id=\"PageSpeed-body-0-0-0\">unlabeled</section>"
      "<!--id: PageSpeed-body-0-0-0,"
      " ElementTagDepth: 4,"
      " PreviousTagCount: 8,"
      " ContainedTagDepth: 4,"
      " ContainedTagRelativeDepth: 0,"
      " ContainedTagCount: 1,"
      " ContainedContentBytes: 9,"
      " ContainedNonBlankBytes: 9,"
      " ContainedNonAContentBytes: 9,"
      " section count: 1,"
      " parent role is content-->\n"
      "    </article>"
      "<!--id: PageSpeed-body-0-0,"
      " ElementTagDepth: 3,"
      " PreviousTagCount: 7,"
      " ContainedTagDepth: 4,"
      " ContainedTagRelativeDepth: 1,"
      " ContainedTagCount: 2,"
      " ContainedContentBytes: 9,"
      " ContainedNonBlankBytes: 9,"
      " ContainedNonAContentBytes: 9,"
      " div count: 1,"
      " section count: 1,"
      " parent role is content-->\n"
      "  </main>"
      "<!--id: PageSpeed-body-0,"
      " ElementTagDepth: 2,"
      " PreviousTagCount: 6,"
      " ContainedTagDepth: 4,"
      " ContainedTagRelativeDepth: 2,"
      " ContainedTagCount: 3,"
      " ContainedContentBytes: 16,"
      " ContainedNonBlankBytes: 16,"
      " ContainedNonAContentBytes: 16,"
      " div count: 2,"
      " section count: 1,"
      " parent role is content-->\n"
      "  <article id=\"PageSpeed-body-1\">also labeled</article>"
      "<!--id: PageSpeed-body-1,"
      " ElementTagDepth: 2,"
      " PreviousTagCount: 9,"
      " ContainedTagDepth: 2,"
      " ContainedTagRelativeDepth: 0,"
      " ContainedTagCount: 1,"
      " ContainedContentBytes: 12,"
      " ContainedNonBlankBytes: 11,"
      " ContainedNonAContentBytes: 12,"
      " div count: 1,"
      " parent role is content-->\n"
      "  <section id=\"PageSpeed-body-2\">this too\n"
      "    <aside id=\"PageSpeed-body-2-0\" data-mobile-role=\"marginal\">\n"
      "      and this, it differs.</aside>"
      "<!--id: PageSpeed-body-2-0,"
      " role: marginal,"
      " ElementTagDepth: 3,"
      " PreviousTagCount: 11,"
      " ContainedTagDepth: 3,"
      " ContainedTagRelativeDepth: 0,"
      " ContainedTagCount: 1,"
      " ContainedContentBytes: 21,"
      " ContainedNonBlankBytes: 18,"
      " ContainedNonAContentBytes: 21,"
      " div count: 1,"
      " parent role is content-->\n"
      "  </section>"
      "<!--id: PageSpeed-body-2,"
      " ElementTagDepth: 2,"
      " PreviousTagCount: 10,"
      " ContainedTagDepth: 3,"
      " ContainedTagRelativeDepth: 1,"
      " ContainedTagCount: 2,"
      " ContainedContentBytes: 29,"
      " ContainedNonBlankBytes: 25,"
      " ContainedNonAContentBytes: 29,"
      " div count: 1,"
      " section count: 1,"
      " parent role is content-->\n"
      "</div>"
      "<!--id: body,"
      " role: content,"
      " ElementTagDepth: 1,"
      " PreviousTagCount: 5,"
      " ContainedTagDepth: 4,"
      " ContainedTagRelativeDepth: 3,"
      " ContainedTagCount: 7,"
      " ContainedContentBytes: 57,"
      " ContainedNonBlankBytes: 52,"
      " ContainedNonAContentBytes: 57,"
      " body: 1,"
      " div count: 5,"
      " section count: 2-->\n"
      "<aside id=\"PageSpeed-5\" data-mobile-role=\"marginal\">Labeled</aside>"
      "<!--id: PageSpeed-5,"
      " role: marginal,"
      " ElementTagDepth: 1,"
      " PreviousTagCount: 12,"
      " ContainedTagDepth: 1,"
      " ContainedTagRelativeDepth: 0,"
      " ContainedTagCount: 1,"
      " ContainedContentBytes: 7,"
      " ContainedNonBlankBytes: 7,"
      " ContainedNonAContentBytes: 7,"
      " div count: 1-->\n"
      "<footer id=\"PageSpeed-6\" data-mobile-role=\"marginal\">labeled\n"
      "  <menu id=\"PageSpeed-6-0\" data-mobile-role=\"navigational\">\n"
      "    navvy</menu>"
      "<!--id: PageSpeed-6-0,"
      " role: navigational,"
      " ElementTagDepth: 2,"
      " PreviousTagCount: 14,"
      " ContainedTagDepth: 2,"
      " ContainedTagRelativeDepth: 0,"
      " ContainedTagCount: 1,"
      " ContainedContentBytes: 5,"
      " ContainedNonBlankBytes: 5,"
      " ContainedNonAContentBytes: 5,"
      " div count: 1-->\n"
      "</footer>"
      "<!--id: PageSpeed-6,"
      " role: marginal,"
      " ElementTagDepth: 1,"
      " PreviousTagCount: 13,"
      " ContainedTagDepth: 2,"
      " ContainedTagRelativeDepth: 1,"
      " ContainedTagCount: 2,"
      " ContainedContentBytes: 12,"
      " ContainedNonBlankBytes: 12,"
      " ContainedNonAContentBytes: 12,"
      " div count: 2-->\n"
      "<script type=\"text/javascript\">"
      "pagespeedHeaderIds=['PageSpeed-3'];\n"
      "pagespeedNavigationalIds="
      "['PageSpeed-1','PageSpeed-2','PageSpeed-6-0'];\n"
      "pagespeedContentIds=['body'];\n"
      "pagespeedMarginalIds="
      "['PageSpeed-body-2-0','PageSpeed-5','PageSpeed-6'];\n"
      "</script></body>";
  Parse("html5_tags_in_body", Unlabel(kLabeledHtml));
  GoogleString xbody = StrCat(doctype_string_, AddHtmlBody(kOutputHtml));
  RemoveRedundantDataFromOutputBuffer();
  EXPECT_STREQ(xbody, output_buffer_) << "html5_tags_in_body";
  EXPECT_EQ(1, pages_labeled_->Get());
  EXPECT_EQ(1, pages_role_added_->Get());
  EXPECT_EQ(3, navigational_roles_->Get());
  EXPECT_EQ(1, header_roles_->Get());
  EXPECT_EQ(1, content_roles_->Get());
  EXPECT_EQ(3, marginal_roles_->Get());
}

TEST_F(MobilizeLabelFilterTest, LargeUnlabeled) {
  options()->set_mob_always(false);
  SetCurrentUserAgent(
      UserAgentMatcherTestBase::kAndroidChrome21UserAgent);
  StdioFileSystem filesystem;
  GoogleString original_filename =
      StrCat(GTestSrcDir(), kTestDataDir, kOriginal);
  GoogleString original_contents;
  ASSERT_TRUE(filesystem.ReadFile(
      original_filename.c_str(), &original_contents, message_handler()));
  GoogleString unlabeled_contents = Unlabel(original_contents);
  // Classify fully, compare against gold labeling.
  // Note that we don't necessarily match the labeling of the original!
  GoogleString labeled_filename =
      StrCat(GTestSrcDir(), kTestDataDir, kOriginalLabeled);
  GoogleString labeled_contents;
  ASSERT_TRUE(filesystem.ReadFile(
      labeled_filename.c_str(), &labeled_contents, message_handler()));
  ExpectTwoRuns("unlabeled_mobile",
                unlabeled_contents, labeled_contents);
  EXPECT_EQ(2, pages_labeled_->Get());
  EXPECT_EQ(1, pages_role_added_->Get());
  EXPECT_EQ(2, navigational_roles_->Get());
  EXPECT_EQ(2, header_roles_->Get());
  EXPECT_EQ(3, content_roles_->Get());
  EXPECT_EQ(1, marginal_roles_->Get());
  EXPECT_EQ(1, ambiguous_role_labels_->Get());
  EXPECT_EQ(12, divs_unlabeled_->Get());
}

TEST_F(MobilizeLabelFilterTest, LargeUnlabeledDesktop) {
  options()->set_mob_always(false);
  SetCurrentUserAgent(UserAgentMatcherTestBase::kChrome37UserAgent);
  StdioFileSystem filesystem;
  GoogleString original_filename =
      StrCat(GTestSrcDir(), kTestDataDir, kOriginal);
  GoogleString original_contents;
  ASSERT_TRUE(filesystem.ReadFile(
      original_filename.c_str(), &original_contents, message_handler()));
  GoogleString unlabeled_contents = Unlabel(original_contents);
  SetupPCache(StrCat(kTestDomain, "unlabeled_desktop.html"));
  ValidateNoChanges("unlabeled_desktop", unlabeled_contents);
  // The stats here reflect the fact that we compute the labeling on desktop but
  // don't alter the page.
  EXPECT_EQ(0, pages_labeled_->Get());
  EXPECT_EQ(1, pages_role_added_->Get());
  EXPECT_EQ(2, navigational_roles_->Get());
  EXPECT_EQ(2, header_roles_->Get());
  EXPECT_EQ(3, content_roles_->Get());
  EXPECT_EQ(1, marginal_roles_->Get());
  EXPECT_EQ(1, ambiguous_role_labels_->Get());
  EXPECT_EQ(12, divs_unlabeled_->Get());
  ResetStats();
  ValidateNoChanges("unlabeled_desktop", unlabeled_contents);
  // And thereafter we should do no further computation for desktop.
  EXPECT_EQ(0, pages_labeled_->Get());
  EXPECT_EQ(0, pages_role_added_->Get());
  EXPECT_EQ(0, navigational_roles_->Get());
  EXPECT_EQ(0, header_roles_->Get());
  EXPECT_EQ(0, content_roles_->Get());
  EXPECT_EQ(0, marginal_roles_->Get());
  EXPECT_EQ(0, ambiguous_role_labels_->Get());
  EXPECT_EQ(0, divs_unlabeled_->Get());
}

}  // namespace

}  // namespace net_instaweb
