blob: cf02c3c2029619d7532056995600f45b6269d2c3 [file] [log] [blame]
/*
* Copyright 2015 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: jmaessen@google.com (Jan-Willem Maessen)
#include "net/instaweb/rewriter/public/mobilize_menu_filter.h"
#include <algorithm>
#include "base/logging.h"
#include "net/instaweb/rewriter/mobilize_labeling.pb.h"
#include "net/instaweb/rewriter/mobilize_menu.pb.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "pagespeed/kernel/base/statistics.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/html/html_element.h"
#include "pagespeed/kernel/html/html_name.h"
#include "pagespeed/kernel/html/html_node.h"
namespace net_instaweb {
namespace {
const int kPreferredMaxEntrySize = 60;
} // namespace
const char MobilizeMenuFilter::kMenusComputed[] =
"mobilization_menus_computed";
// TODO(jmaessen): Store into pcache if we're not in a nested context.
MobilizeMenuFilter::MobilizeMenuFilter(
RewriteDriver* rewrite_driver, const MobilizeLabelFilter* label_filter)
: MobilizeFilterBase(rewrite_driver),
label_filter_(label_filter),
outer_nav_element_(NULL),
menu_item_trailing_whitespace_(false),
menu_item_initial_segment_length_(0),
is_next_menu_item_new_(true),
cleanup_menu_(true) {
Statistics* stats = rewrite_driver->statistics();
num_menus_computed_ = stats->GetVariable(kMenusComputed);
}
MobilizeMenuFilter::~MobilizeMenuFilter() {
DCHECK(outer_nav_element_ == NULL);
DCHECK(menu_item_text_.empty());
}
void MobilizeMenuFilter::InitStats(Statistics* statistics) {
statistics->AddVariable(kMenusComputed);
}
void MobilizeMenuFilter::StartDocumentImpl() {
menu_.reset(new MobilizeMenu);
// Initialize navigational_ids_ from the labeling.
navigational_ids_.clear();
const MobilizeLabeling* labeling = label_filter_->labeling();
if (labeling != NULL) {
for (int i = 0, n = labeling->navigational_ids_size(); i < n; ++i) {
navigational_ids_.insert(labeling->navigational_ids(i));
}
}
}
void MobilizeMenuFilter::EndDocumentImpl() {
if (cleanup_menu_) {
CleanupMenu(menu_.get());
}
num_menus_computed_->Add(1);
DCHECK(outer_nav_element_ == NULL);
DCHECK(menu_item_text_.empty());
DCHECK(menu_stack_.empty());
outer_nav_element_ = NULL;
menu_item_text_.clear();
menu_item_trailing_whitespace_ = false;
menu_item_initial_segment_length_ = 0;
is_next_menu_item_new_ = true;
menu_stack_.clear();
}
void MobilizeMenuFilter::StartNonSkipElement(
MobileRole::Level role_attribute, HtmlElement* element) {
if (outer_nav_element_ == NULL) {
const HtmlElement::Attribute* id = element->FindAttribute(HtmlName::kId);
if (id == NULL ||
navigational_ids_.count(id->escaped_value()) == 0) {
CHECK_NE(MobileRole::kNavigational, role_attribute);
return;
}
outer_nav_element_ = element;
StartTopMenu();
}
switch (element->keyword()) {
case HtmlName::kUl:
StartDeepMenu();
break;
case HtmlName::kLi:
is_next_menu_item_new_ = true;
StartMenuItem(NULL);
break;
case HtmlName::kA: {
StartMenuItem(element->EscapedAttributeValue(HtmlName::kHref));
break;
}
default:
break;
}
}
void MobilizeMenuFilter::EndNonSkipElement(HtmlElement* element) {
if (outer_nav_element_ == NULL) {
return;
}
switch (element->keyword()) {
case HtmlName::kLi:
is_next_menu_item_new_ = true;
EndMenuItem();
break;
case HtmlName::kA:
EndMenuItem();
break;
case HtmlName::kUl:
EndDeepMenu();
break;
default:
break;
}
if (element == outer_nav_element_) {
outer_nav_element_ = NULL;
EndTopMenu();
}
}
void MobilizeMenuFilter::Characters(HtmlCharactersNode* characters) {
if (outer_nav_element_ == NULL || AreInSkip()) {
return;
}
StringPiece contents(characters->contents());
// Insert a space before trimmed contents if:
// 1) There is already menu_item_text_.
// 2) That text ended with whitespace or this text starts with whitespace.
// 3) If this is only whitespace, just set the flag and move on.
if (TrimLeadingWhitespace(&contents) && !menu_item_text_.empty()) {
menu_item_trailing_whitespace_ = true;
}
if (contents.empty()) {
return;
}
StringPiece lead(menu_item_trailing_whitespace_ ? " " : "");
if (menu_item_trailing_whitespace_ &&
menu_item_initial_segment_length_ == 0 &&
menu_item_text_.size() > 3) {
menu_item_initial_segment_length_ = menu_item_text_.size();
}
menu_item_trailing_whitespace_ = TrimTrailingWhitespace(&contents);
if (menu_item_text_.size() > kPreferredMaxEntrySize &&
menu_item_initial_segment_length_ > 0) {
// Drop the characters and any subsequent content to keep titles short.
return;
}
StrAppend(&menu_item_text_, lead, contents);
}
void MobilizeMenuFilter::SetEntryName(MobilizeMenuItem* entry) {
if (!menu_item_text_.empty()) {
if (menu_item_text_.size() > kPreferredMaxEntrySize &&
menu_item_initial_segment_length_ > 0) {
// If we got here, the menu text is awfully long. There's an initial
// portion that was in a distinguished DOM element and is shorter
// (menu_item_initial_segment_length_). Use that.
driver()->InfoHere("Dropping long menu text %s...",
menu_item_text_.c_str() +
menu_item_initial_segment_length_);
menu_item_text_.resize(menu_item_initial_segment_length_);
}
std::swap(*entry->mutable_name(), menu_item_text_);
menu_item_text_.clear();
}
menu_item_trailing_whitespace_ = false;
menu_item_initial_segment_length_ = 0;
}
void MobilizeMenuFilter::StartTopMenu() {
DCHECK(menu_item_text_.empty());
DCHECK(!menu_item_trailing_whitespace_);
DCHECK_EQ(0, menu_item_initial_segment_length_);
DCHECK(menu_stack_.empty());
menu_stack_.push_back(menu_.get());
}
void MobilizeMenuFilter::StartDeepMenu() {
CHECK(!menu_stack_.empty());
MobilizeMenuItem* entry = EnsureMenuItem();
if (!menu_item_text_.empty() &&
(entry->has_name() || entry->has_url())) {
entry = menu_stack_.back()->add_entries();
}
SetEntryName(entry);
menu_stack_.push_back(entry->mutable_submenu());
}
// Clear and discard the collected text in menu_item_text_, complaining if there
// actually was any.
void MobilizeMenuFilter::ClearMenuText() {
if (!menu_item_text_.empty()) {
driver()->InfoHere("Discarding unrooted nav text: %s",
menu_item_text_.c_str());
}
menu_item_text_.clear();
menu_item_trailing_whitespace_ = false;
menu_item_initial_segment_length_ = 0;
}
// Don't call this except from End[Top,Deep]Menu
void MobilizeMenuFilter::EndMenuCommon() {
CHECK(!menu_stack_.empty());
menu_stack_.pop_back();
ClearMenuText();
is_next_menu_item_new_ = true;
}
void MobilizeMenuFilter::EndTopMenu() {
EndMenuCommon();
CHECK(menu_stack_.empty());
}
void MobilizeMenuFilter::EndDeepMenu() {
EndMenuCommon();
CHECK(!menu_stack_.empty());
}
MobilizeMenuItem* MobilizeMenuFilter::EnsureMenuItem() {
MobilizeMenu* current_menu = menu_stack_.back();
int sz = current_menu->entries_size();
MobilizeMenuItem* entry;
if (is_next_menu_item_new_ || sz == 0) {
entry = current_menu->add_entries();
} else {
entry = current_menu->mutable_entries(sz - 1);
if (entry->has_submenu()) {
entry = current_menu->add_entries();
}
}
is_next_menu_item_new_ = false;
return entry;
}
void MobilizeMenuFilter::StartMenuItem(const char* href_or_null) {
CHECK(!menu_stack_.empty());
ClearMenuText();
MobilizeMenuItem* entry = EnsureMenuItem();
if (entry->has_url() || entry->has_submenu() || entry->has_name()) {
entry = menu_stack_.back()->add_entries();
}
if (href_or_null != NULL && *href_or_null != '\0') {
entry->set_url(href_or_null);
}
}
void MobilizeMenuFilter::EndMenuItem() {
CHECK(!menu_stack_.empty());
MobilizeMenu* current_menu = menu_stack_.back();
CHECK_LT(0, current_menu->entries_size());
MobilizeMenuItem* entry =
current_menu->mutable_entries(current_menu->entries_size() - 1);
if (entry->has_name() && !menu_item_text_.empty()) {
ClearMenuText();
} else {
SetEntryName(entry);
}
}
namespace {
// Choose the best level for a url occurring at menu levels a and b.
// * 0 is used for absent values. Choose the other in that case.
// * Level 2 is preferred (one level nested).
// * Otherwise prefer the minimum level.
// Only value a can be absent.
int BestLevel(int a, int b) {
if (a == 0) {
return b;
} else if (a == 2 || b == 2) {
return 2;
} else {
return std::min(a, b);
}
}
} // namespace
// Clean up the constructed menu by removing duplicate elements, empty submenus,
// etc. We try to keep a url as close to level 2 as possible (inside a single
// nested menu). If it's deeper, we favor a shallower occurrence. If it's
// shallower, we favor the nested one.
void MobilizeMenuFilter::CleanupMenu(MobilizeMenu* menu) {
if (menu->entries_size() == 0) {
return;
}
UrlLevelMap url_level;
MobilizeMenu swept_menu;
SweepMenu(*menu, &swept_menu);
DCHECK(IsMenuOk(swept_menu));
CollectMenuUrls(1, swept_menu, &url_level);
ClearDuplicateEntries(1, &swept_menu, &url_level);
menu->Clear();
SweepMenu(swept_menu, menu);
DCHECK(IsMenuOk(*menu));
}
// Sweep valid entries from menu into new_menu, throwing out garbage and
// flattening useless nesting.
void MobilizeMenuFilter::SweepNestedMenu(
const MobilizeMenu& menu, MobilizeMenu* new_menu) {
int n = menu.entries_size();
for (int i = 0; i < n; ++i) {
const MobilizeMenuItem& item = menu.entries(i);
if (item.has_name()) {
if (item.has_submenu()) {
MobilizeMenu new_submenu;
SweepNestedMenu(item.submenu(), &new_submenu);
if (new_submenu.entries_size() > 0) {
if (item.has_url()) {
// This is a common case on non-mobile friendly web sites: A link
// that on hover opens a menu, but on click opens a page with more
// general options. There are various ways to deal with this case.
// * Right now, we add a new item to the end of the submenu
// that has the same name and link as the parent menu.
// * We could add an item that had some special marker instead.
// * We could add the item to the beginning rather than the end,
// but we'd want to handle de-duplication specially as the menu
// sometimes already contains the link and we'd like to prefer
// the one that was there.
// * We could drop the link. The JS menu extraction does this.
// * We could just flatten the entire menu (we did this originally).
// This causes long, scrolly top menus. There's a serious
// usability question here: what's easier, a long top menu or a
// lot of hierarchy?
MobilizeMenuItem* nested_link = new_submenu.add_entries();
nested_link->set_name(item.name());
nested_link->set_url(item.url());
}
MobilizeMenuItem* new_item = new_menu->add_entries();
if (new_submenu.entries_size() == 1) {
const MobilizeMenuItem& single_entry = new_submenu.entries(0);
if (single_entry.has_name()) {
LOG(INFO) << "Flattening away 1-entry submenu "
<< single_entry.name();
}
// Pull the data out of the single submenu entry. Warning:
// assignment here is O(n) if the nested entry is itself a submenu.
// We could work around this if it came up often and we expected
// large menus, but we don't expect either.
*new_item = single_entry;
} else {
new_item->set_name(item.name());
new_item->mutable_submenu()->mutable_entries()->Swap(
new_submenu.mutable_entries());
}
continue;
}
DCHECK_EQ(0, new_submenu.entries_size());
// Fall through in case empty submenu had url attached.
}
if (!item.has_url()) {
LOG(INFO) << "Dropping item " << item.name() << " without link.";
continue;
}
MobilizeMenuItem* new_item = new_menu->add_entries();
new_item->set_url(item.url());
new_item->set_name(item.name());
} else {
if (item.has_url()) {
LOG(INFO) << "Dropping link " << item.url()
<< " without name (image only?)";
}
if (item.has_submenu()) {
// submenu without title. Flatten into new_menu.
SweepNestedMenu(item.submenu(), new_menu);
}
}
}
}
// Sweep top-level menu, flattening a singleton outer submenu.
void MobilizeMenuFilter::SweepMenu(
const MobilizeMenu& menu, MobilizeMenu* new_menu) {
SweepNestedMenu(menu, new_menu);
if (new_menu->entries_size() == 1 && new_menu->entries(0).has_submenu()) {
// We move the nested menu into temp, swapping empty entries into the place
// where the nested menu used to be.
MobilizeMenu temp;
temp.mutable_entries()->Swap(
new_menu->mutable_entries(0)->mutable_submenu()->mutable_entries());
// Now we replace new_menu's entries with the nested entries.
new_menu->mutable_entries()->Swap(temp.mutable_entries());
// At this point temp contains the original top-level new_menu entry, and
// will be deallocated on block exit.
}
}
// Find canonical occurrences of menu urls.
void MobilizeMenuFilter::CollectMenuUrls(
int level, const MobilizeMenu& menu, UrlLevelMap* url_level) {
int n = menu.entries_size();
for (int i = 0; i < n; ++i) {
const MobilizeMenuItem& item = menu.entries(i);
DCHECK(item.has_name());
if (item.has_submenu()) {
DCHECK(!item.has_url());
CollectMenuUrls(level + 1, item.submenu(), url_level);
}
if (item.has_url()) {
DCHECK(!item.has_submenu());
int& preferred_level = (*url_level)[item.url()];
preferred_level = BestLevel(preferred_level, level);
}
}
}
// Take duplicate url entries and clear them from *menu, based on the
// data previously collected in *url_level by CollectNestedMenu.
void MobilizeMenuFilter::ClearDuplicateEntries(
int level, MobilizeMenu* menu, UrlLevelMap* url_level) {
int n = menu->entries_size();
for (int i = 0; i < n; ++i) {
MobilizeMenuItem* item = menu->mutable_entries(i);
if (item->has_submenu()) {
ClearDuplicateEntries(level + 1, item->mutable_submenu(), url_level);
} else if (item->has_url()) {
int& preferred_level = (*url_level)[item->url()];
if (level == preferred_level) {
// First occurrence at preferred_level. Clear preferred_level so
// subsequent occurrences at the same level have their menu entries
// cleared.
preferred_level = 0;
} else {
// Duplicated. Clear it.
LOG(INFO) << "Dropping duplicate entry " << item->name()
<< " for " << item->url() << " at level " << level;
item->clear_url();
item->clear_name();
}
}
}
}
// Rules for a well-formed menu:
// * Every entry has a name.
// * Every entry has either a submenu or a url, not both.
// * Every submenu has at least two entries.
// These conditions are enforced by SweepNestedMenu.
// For debug purposes. Usage: DCHECK(IsMenuOK(menu))
bool MobilizeMenuFilter::IsMenuOk(const MobilizeMenu& menu) {
bool ok = true;
int n = menu.entries_size();
for (int i = 0; i < n; ++i) {
const MobilizeMenuItem& item = menu.entries(i);
if (!item.has_name()) {
ok = false;
LOG(ERROR) << "Menu item without name.";
}
if (item.has_submenu()) {
const MobilizeMenu& submenu = item.submenu();
if (item.has_url()) {
ok = false;
LOG(ERROR) << "Submenu " << item.name() << " with url " << item.url();
}
if (submenu.entries_size() <= 1) {
ok = false;
LOG(ERROR) << "Submenu " << item.name() << " has <= 1 entry.";
}
ok = IsMenuOk(submenu) && ok;
} else if (!item.has_url()) {
ok = false;
LOG(ERROR) << "Item " << item.name() << " without link.";
}
}
return ok;
}
} // namespace net_instaweb