| /* |
| * Copyright 2015 Google Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // Author: jmaessen@google.com (Jan-Willem Maessen) |
| |
| #include "net/instaweb/rewriter/public/mobilize_menu_filter.h" |
| |
| #include <algorithm> |
| |
| #include "base/logging.h" |
| #include "net/instaweb/rewriter/mobilize_labeling.pb.h" |
| #include "net/instaweb/rewriter/mobilize_menu.pb.h" |
| #include "net/instaweb/rewriter/public/rewrite_driver.h" |
| #include "pagespeed/kernel/base/statistics.h" |
| #include "pagespeed/kernel/base/string.h" |
| #include "pagespeed/kernel/base/string_util.h" |
| #include "pagespeed/kernel/html/html_element.h" |
| #include "pagespeed/kernel/html/html_name.h" |
| #include "pagespeed/kernel/html/html_node.h" |
| |
| namespace net_instaweb { |
| |
| namespace { |
| |
| const int kPreferredMaxEntrySize = 60; |
| |
| } // namespace |
| |
| const char MobilizeMenuFilter::kMenusComputed[] = |
| "mobilization_menus_computed"; |
| |
| // TODO(jmaessen): Store into pcache if we're not in a nested context. |
| MobilizeMenuFilter::MobilizeMenuFilter( |
| RewriteDriver* rewrite_driver, const MobilizeLabelFilter* label_filter) |
| : MobilizeFilterBase(rewrite_driver), |
| label_filter_(label_filter), |
| outer_nav_element_(NULL), |
| menu_item_trailing_whitespace_(false), |
| menu_item_initial_segment_length_(0), |
| is_next_menu_item_new_(true), |
| cleanup_menu_(true) { |
| Statistics* stats = rewrite_driver->statistics(); |
| num_menus_computed_ = stats->GetVariable(kMenusComputed); |
| } |
| |
| MobilizeMenuFilter::~MobilizeMenuFilter() { |
| DCHECK(outer_nav_element_ == NULL); |
| DCHECK(menu_item_text_.empty()); |
| } |
| |
| void MobilizeMenuFilter::InitStats(Statistics* statistics) { |
| statistics->AddVariable(kMenusComputed); |
| } |
| |
| void MobilizeMenuFilter::StartDocumentImpl() { |
| menu_.reset(new MobilizeMenu); |
| // Initialize navigational_ids_ from the labeling. |
| navigational_ids_.clear(); |
| const MobilizeLabeling* labeling = label_filter_->labeling(); |
| if (labeling != NULL) { |
| for (int i = 0, n = labeling->navigational_ids_size(); i < n; ++i) { |
| navigational_ids_.insert(labeling->navigational_ids(i)); |
| } |
| } |
| } |
| |
| void MobilizeMenuFilter::EndDocumentImpl() { |
| if (cleanup_menu_) { |
| CleanupMenu(menu_.get()); |
| } |
| num_menus_computed_->Add(1); |
| DCHECK(outer_nav_element_ == NULL); |
| DCHECK(menu_item_text_.empty()); |
| DCHECK(menu_stack_.empty()); |
| outer_nav_element_ = NULL; |
| menu_item_text_.clear(); |
| menu_item_trailing_whitespace_ = false; |
| menu_item_initial_segment_length_ = 0; |
| is_next_menu_item_new_ = true; |
| menu_stack_.clear(); |
| } |
| |
| void MobilizeMenuFilter::StartNonSkipElement( |
| MobileRole::Level role_attribute, HtmlElement* element) { |
| if (outer_nav_element_ == NULL) { |
| const HtmlElement::Attribute* id = element->FindAttribute(HtmlName::kId); |
| if (id == NULL || |
| navigational_ids_.count(id->escaped_value()) == 0) { |
| CHECK_NE(MobileRole::kNavigational, role_attribute); |
| return; |
| } |
| outer_nav_element_ = element; |
| StartTopMenu(); |
| } |
| switch (element->keyword()) { |
| case HtmlName::kUl: |
| StartDeepMenu(); |
| break; |
| case HtmlName::kLi: |
| is_next_menu_item_new_ = true; |
| StartMenuItem(NULL); |
| break; |
| case HtmlName::kA: { |
| StartMenuItem(element->EscapedAttributeValue(HtmlName::kHref)); |
| break; |
| } |
| default: |
| break; |
| } |
| } |
| |
| void MobilizeMenuFilter::EndNonSkipElement(HtmlElement* element) { |
| if (outer_nav_element_ == NULL) { |
| return; |
| } |
| switch (element->keyword()) { |
| case HtmlName::kLi: |
| is_next_menu_item_new_ = true; |
| EndMenuItem(); |
| break; |
| case HtmlName::kA: |
| EndMenuItem(); |
| break; |
| case HtmlName::kUl: |
| EndDeepMenu(); |
| break; |
| default: |
| break; |
| } |
| if (element == outer_nav_element_) { |
| outer_nav_element_ = NULL; |
| EndTopMenu(); |
| } |
| } |
| |
| void MobilizeMenuFilter::Characters(HtmlCharactersNode* characters) { |
| if (outer_nav_element_ == NULL || AreInSkip()) { |
| return; |
| } |
| StringPiece contents(characters->contents()); |
| |
| // Insert a space before trimmed contents if: |
| // 1) There is already menu_item_text_. |
| // 2) That text ended with whitespace or this text starts with whitespace. |
| // 3) If this is only whitespace, just set the flag and move on. |
| if (TrimLeadingWhitespace(&contents) && !menu_item_text_.empty()) { |
| menu_item_trailing_whitespace_ = true; |
| } |
| if (contents.empty()) { |
| return; |
| } |
| StringPiece lead(menu_item_trailing_whitespace_ ? " " : ""); |
| if (menu_item_trailing_whitespace_ && |
| menu_item_initial_segment_length_ == 0 && |
| menu_item_text_.size() > 3) { |
| menu_item_initial_segment_length_ = menu_item_text_.size(); |
| } |
| menu_item_trailing_whitespace_ = TrimTrailingWhitespace(&contents); |
| if (menu_item_text_.size() > kPreferredMaxEntrySize && |
| menu_item_initial_segment_length_ > 0) { |
| // Drop the characters and any subsequent content to keep titles short. |
| return; |
| } |
| StrAppend(&menu_item_text_, lead, contents); |
| } |
| |
| void MobilizeMenuFilter::SetEntryName(MobilizeMenuItem* entry) { |
| if (!menu_item_text_.empty()) { |
| if (menu_item_text_.size() > kPreferredMaxEntrySize && |
| menu_item_initial_segment_length_ > 0) { |
| // If we got here, the menu text is awfully long. There's an initial |
| // portion that was in a distinguished DOM element and is shorter |
| // (menu_item_initial_segment_length_). Use that. |
| driver()->InfoHere("Dropping long menu text %s...", |
| menu_item_text_.c_str() + |
| menu_item_initial_segment_length_); |
| menu_item_text_.resize(menu_item_initial_segment_length_); |
| } |
| std::swap(*entry->mutable_name(), menu_item_text_); |
| menu_item_text_.clear(); |
| } |
| menu_item_trailing_whitespace_ = false; |
| menu_item_initial_segment_length_ = 0; |
| } |
| |
| void MobilizeMenuFilter::StartTopMenu() { |
| DCHECK(menu_item_text_.empty()); |
| DCHECK(!menu_item_trailing_whitespace_); |
| DCHECK_EQ(0, menu_item_initial_segment_length_); |
| DCHECK(menu_stack_.empty()); |
| menu_stack_.push_back(menu_.get()); |
| } |
| |
| void MobilizeMenuFilter::StartDeepMenu() { |
| CHECK(!menu_stack_.empty()); |
| MobilizeMenuItem* entry = EnsureMenuItem(); |
| if (!menu_item_text_.empty() && |
| (entry->has_name() || entry->has_url())) { |
| entry = menu_stack_.back()->add_entries(); |
| } |
| SetEntryName(entry); |
| menu_stack_.push_back(entry->mutable_submenu()); |
| } |
| |
| // Clear and discard the collected text in menu_item_text_, complaining if there |
| // actually was any. |
| void MobilizeMenuFilter::ClearMenuText() { |
| if (!menu_item_text_.empty()) { |
| driver()->InfoHere("Discarding unrooted nav text: %s", |
| menu_item_text_.c_str()); |
| } |
| menu_item_text_.clear(); |
| menu_item_trailing_whitespace_ = false; |
| menu_item_initial_segment_length_ = 0; |
| } |
| |
| // Don't call this except from End[Top,Deep]Menu |
| void MobilizeMenuFilter::EndMenuCommon() { |
| CHECK(!menu_stack_.empty()); |
| menu_stack_.pop_back(); |
| ClearMenuText(); |
| is_next_menu_item_new_ = true; |
| } |
| |
| void MobilizeMenuFilter::EndTopMenu() { |
| EndMenuCommon(); |
| CHECK(menu_stack_.empty()); |
| } |
| |
| void MobilizeMenuFilter::EndDeepMenu() { |
| EndMenuCommon(); |
| CHECK(!menu_stack_.empty()); |
| } |
| |
| MobilizeMenuItem* MobilizeMenuFilter::EnsureMenuItem() { |
| MobilizeMenu* current_menu = menu_stack_.back(); |
| int sz = current_menu->entries_size(); |
| MobilizeMenuItem* entry; |
| if (is_next_menu_item_new_ || sz == 0) { |
| entry = current_menu->add_entries(); |
| } else { |
| entry = current_menu->mutable_entries(sz - 1); |
| if (entry->has_submenu()) { |
| entry = current_menu->add_entries(); |
| } |
| } |
| is_next_menu_item_new_ = false; |
| return entry; |
| } |
| |
| void MobilizeMenuFilter::StartMenuItem(const char* href_or_null) { |
| CHECK(!menu_stack_.empty()); |
| ClearMenuText(); |
| MobilizeMenuItem* entry = EnsureMenuItem(); |
| if (entry->has_url() || entry->has_submenu() || entry->has_name()) { |
| entry = menu_stack_.back()->add_entries(); |
| } |
| if (href_or_null != NULL && *href_or_null != '\0') { |
| entry->set_url(href_or_null); |
| } |
| } |
| |
| void MobilizeMenuFilter::EndMenuItem() { |
| CHECK(!menu_stack_.empty()); |
| MobilizeMenu* current_menu = menu_stack_.back(); |
| CHECK_LT(0, current_menu->entries_size()); |
| MobilizeMenuItem* entry = |
| current_menu->mutable_entries(current_menu->entries_size() - 1); |
| if (entry->has_name() && !menu_item_text_.empty()) { |
| ClearMenuText(); |
| } else { |
| SetEntryName(entry); |
| } |
| } |
| |
| namespace { |
| |
| // Choose the best level for a url occurring at menu levels a and b. |
| // * 0 is used for absent values. Choose the other in that case. |
| // * Level 2 is preferred (one level nested). |
| // * Otherwise prefer the minimum level. |
| // Only value a can be absent. |
| int BestLevel(int a, int b) { |
| if (a == 0) { |
| return b; |
| } else if (a == 2 || b == 2) { |
| return 2; |
| } else { |
| return std::min(a, b); |
| } |
| } |
| |
| } // namespace |
| |
| // Clean up the constructed menu by removing duplicate elements, empty submenus, |
| // etc. We try to keep a url as close to level 2 as possible (inside a single |
| // nested menu). If it's deeper, we favor a shallower occurrence. If it's |
| // shallower, we favor the nested one. |
| void MobilizeMenuFilter::CleanupMenu(MobilizeMenu* menu) { |
| if (menu->entries_size() == 0) { |
| return; |
| } |
| UrlLevelMap url_level; |
| MobilizeMenu swept_menu; |
| SweepMenu(*menu, &swept_menu); |
| DCHECK(IsMenuOk(swept_menu)); |
| CollectMenuUrls(1, swept_menu, &url_level); |
| ClearDuplicateEntries(1, &swept_menu, &url_level); |
| menu->Clear(); |
| SweepMenu(swept_menu, menu); |
| DCHECK(IsMenuOk(*menu)); |
| } |
| |
| // Sweep valid entries from menu into new_menu, throwing out garbage and |
| // flattening useless nesting. |
| void MobilizeMenuFilter::SweepNestedMenu( |
| const MobilizeMenu& menu, MobilizeMenu* new_menu) { |
| int n = menu.entries_size(); |
| for (int i = 0; i < n; ++i) { |
| const MobilizeMenuItem& item = menu.entries(i); |
| if (item.has_name()) { |
| if (item.has_submenu()) { |
| MobilizeMenu new_submenu; |
| SweepNestedMenu(item.submenu(), &new_submenu); |
| if (new_submenu.entries_size() > 0) { |
| if (item.has_url()) { |
| // This is a common case on non-mobile friendly web sites: A link |
| // that on hover opens a menu, but on click opens a page with more |
| // general options. There are various ways to deal with this case. |
| // * Right now, we add a new item to the end of the submenu |
| // that has the same name and link as the parent menu. |
| // * We could add an item that had some special marker instead. |
| // * We could add the item to the beginning rather than the end, |
| // but we'd want to handle de-duplication specially as the menu |
| // sometimes already contains the link and we'd like to prefer |
| // the one that was there. |
| // * We could drop the link. The JS menu extraction does this. |
| // * We could just flatten the entire menu (we did this originally). |
| // This causes long, scrolly top menus. There's a serious |
| // usability question here: what's easier, a long top menu or a |
| // lot of hierarchy? |
| MobilizeMenuItem* nested_link = new_submenu.add_entries(); |
| nested_link->set_name(item.name()); |
| nested_link->set_url(item.url()); |
| } |
| MobilizeMenuItem* new_item = new_menu->add_entries(); |
| if (new_submenu.entries_size() == 1) { |
| const MobilizeMenuItem& single_entry = new_submenu.entries(0); |
| if (single_entry.has_name()) { |
| LOG(INFO) << "Flattening away 1-entry submenu " |
| << single_entry.name(); |
| } |
| // Pull the data out of the single submenu entry. Warning: |
| // assignment here is O(n) if the nested entry is itself a submenu. |
| // We could work around this if it came up often and we expected |
| // large menus, but we don't expect either. |
| *new_item = single_entry; |
| } else { |
| new_item->set_name(item.name()); |
| new_item->mutable_submenu()->mutable_entries()->Swap( |
| new_submenu.mutable_entries()); |
| } |
| continue; |
| } |
| DCHECK_EQ(0, new_submenu.entries_size()); |
| // Fall through in case empty submenu had url attached. |
| } |
| if (!item.has_url()) { |
| LOG(INFO) << "Dropping item " << item.name() << " without link."; |
| continue; |
| } |
| MobilizeMenuItem* new_item = new_menu->add_entries(); |
| new_item->set_url(item.url()); |
| new_item->set_name(item.name()); |
| } else { |
| if (item.has_url()) { |
| LOG(INFO) << "Dropping link " << item.url() |
| << " without name (image only?)"; |
| } |
| if (item.has_submenu()) { |
| // submenu without title. Flatten into new_menu. |
| SweepNestedMenu(item.submenu(), new_menu); |
| } |
| } |
| } |
| } |
| |
| // Sweep top-level menu, flattening a singleton outer submenu. |
| void MobilizeMenuFilter::SweepMenu( |
| const MobilizeMenu& menu, MobilizeMenu* new_menu) { |
| SweepNestedMenu(menu, new_menu); |
| if (new_menu->entries_size() == 1 && new_menu->entries(0).has_submenu()) { |
| // We move the nested menu into temp, swapping empty entries into the place |
| // where the nested menu used to be. |
| MobilizeMenu temp; |
| temp.mutable_entries()->Swap( |
| new_menu->mutable_entries(0)->mutable_submenu()->mutable_entries()); |
| // Now we replace new_menu's entries with the nested entries. |
| new_menu->mutable_entries()->Swap(temp.mutable_entries()); |
| // At this point temp contains the original top-level new_menu entry, and |
| // will be deallocated on block exit. |
| } |
| } |
| |
| // Find canonical occurrences of menu urls. |
| void MobilizeMenuFilter::CollectMenuUrls( |
| int level, const MobilizeMenu& menu, UrlLevelMap* url_level) { |
| int n = menu.entries_size(); |
| for (int i = 0; i < n; ++i) { |
| const MobilizeMenuItem& item = menu.entries(i); |
| DCHECK(item.has_name()); |
| if (item.has_submenu()) { |
| DCHECK(!item.has_url()); |
| CollectMenuUrls(level + 1, item.submenu(), url_level); |
| } |
| if (item.has_url()) { |
| DCHECK(!item.has_submenu()); |
| int& preferred_level = (*url_level)[item.url()]; |
| preferred_level = BestLevel(preferred_level, level); |
| } |
| } |
| } |
| |
| // Take duplicate url entries and clear them from *menu, based on the |
| // data previously collected in *url_level by CollectNestedMenu. |
| void MobilizeMenuFilter::ClearDuplicateEntries( |
| int level, MobilizeMenu* menu, UrlLevelMap* url_level) { |
| int n = menu->entries_size(); |
| for (int i = 0; i < n; ++i) { |
| MobilizeMenuItem* item = menu->mutable_entries(i); |
| if (item->has_submenu()) { |
| ClearDuplicateEntries(level + 1, item->mutable_submenu(), url_level); |
| } else if (item->has_url()) { |
| int& preferred_level = (*url_level)[item->url()]; |
| if (level == preferred_level) { |
| // First occurrence at preferred_level. Clear preferred_level so |
| // subsequent occurrences at the same level have their menu entries |
| // cleared. |
| preferred_level = 0; |
| } else { |
| // Duplicated. Clear it. |
| LOG(INFO) << "Dropping duplicate entry " << item->name() |
| << " for " << item->url() << " at level " << level; |
| item->clear_url(); |
| item->clear_name(); |
| } |
| } |
| } |
| } |
| |
| // Rules for a well-formed menu: |
| // * Every entry has a name. |
| // * Every entry has either a submenu or a url, not both. |
| // * Every submenu has at least two entries. |
| // These conditions are enforced by SweepNestedMenu. |
| // For debug purposes. Usage: DCHECK(IsMenuOK(menu)) |
| bool MobilizeMenuFilter::IsMenuOk(const MobilizeMenu& menu) { |
| bool ok = true; |
| int n = menu.entries_size(); |
| for (int i = 0; i < n; ++i) { |
| const MobilizeMenuItem& item = menu.entries(i); |
| if (!item.has_name()) { |
| ok = false; |
| LOG(ERROR) << "Menu item without name."; |
| } |
| if (item.has_submenu()) { |
| const MobilizeMenu& submenu = item.submenu(); |
| if (item.has_url()) { |
| ok = false; |
| LOG(ERROR) << "Submenu " << item.name() << " with url " << item.url(); |
| } |
| if (submenu.entries_size() <= 1) { |
| ok = false; |
| LOG(ERROR) << "Submenu " << item.name() << " has <= 1 entry."; |
| } |
| ok = IsMenuOk(submenu) && ok; |
| } else if (!item.has_url()) { |
| ok = false; |
| LOG(ERROR) << "Item " << item.name() << " without link."; |
| } |
| } |
| return ok; |
| } |
| |
| } // namespace net_instaweb |