blob: 9a3ddf21b16a137944ec9e5aca0714e5be98743f [file] [log] [blame]
/*
* Copyright 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: mdsteele@google.com (Matthew D. Steele)
#include "pagespeed/kernel/html/elide_attributes_filter.h"
#include <cstddef>
#include <map>
#include <utility>
#include "pagespeed/kernel/html/doctype.h"
#include "pagespeed/kernel/html/html_element.h"
#include "pagespeed/kernel/html/html_name.h"
#include "pagespeed/kernel/html/html_parse.h"
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/string_util.h"
namespace net_instaweb {
namespace {
// An attribute can be simplified if it is a "boolean attribute".
// See http://www.w3.org/TR/html5/common-microsyntaxes.html#boolean-attribute
// For example, <option selected="selected"> can become <option selected>.
struct TagAttr {
HtmlName::Keyword tag_name;
HtmlName::Keyword attr_name;
};
const TagAttr kBooleanAttrs[] = {
// http://www.w3.org/TR/html4/struct/objects.html#h-13.6.1
{HtmlName::kArea, HtmlName::kNohref},
// http://www.w3.org/TR/html5/video.html#media-elements
{HtmlName::kAudio, HtmlName::kAutoplay},
{HtmlName::kAudio, HtmlName::kControls},
{HtmlName::kAudio, HtmlName::kLoop},
{HtmlName::kAudio, HtmlName::kMuted},
// http://www.w3.org/TR/html5/the-button-element.html#the-button-element
{HtmlName::kButton, HtmlName::kAutofocus},
{HtmlName::kButton, HtmlName::kDisabled},
// http://www.w3.org/TR/html5/interactive-elements.html#the-command
{HtmlName::kCommand, HtmlName::kChecked},
{HtmlName::kCommand, HtmlName::kDisabled},
// http://www.w3.org/TR/html5/interactive-elements.html#the-details-element
{HtmlName::kDetails, HtmlName::kOpen},
// http://www.w3.org/TR/html5/association-of-controls-and-forms.html#
// attributes-for-form-submission
{HtmlName::kForm, HtmlName::kNovalidate},
// http://www.w3.org/TR/html4/present/frames.html#h-16.2.2
{HtmlName::kFrame, HtmlName::kNoresize},
// http://www.w3.org/TR/html5/the-button-element.html#the-keygen-element
{HtmlName::kKeygen, HtmlName::kAutofocus},
{HtmlName::kKeygen, HtmlName::kDisabled},
// http://www.w3.org/TR/html5/the-iframe-element.html#the-iframe-element
{HtmlName::kIframe, HtmlName::kSeamless},
// http://www.w3.org/TR/html5/embedded-content-1.html#the-img-element
{HtmlName::kImg, HtmlName::kIsmap},
// http://www.w3.org/TR/html5/the-input-element.html#the-input-element
{HtmlName::kInput, HtmlName::kAutofocus},
{HtmlName::kInput, HtmlName::kChecked},
{HtmlName::kInput, HtmlName::kDefaultchecked},
{HtmlName::kInput, HtmlName::kDisabled},
{HtmlName::kInput, HtmlName::kFormnovalidate},
{HtmlName::kInput, HtmlName::kIndeterminate},
{HtmlName::kInput, HtmlName::kMultiple},
{HtmlName::kInput, HtmlName::kReadonly},
{HtmlName::kInput, HtmlName::kRequired},
// http://www.w3.org/TR/html4/struct/objects.html#h-13.3
{HtmlName::kObject, HtmlName::kDeclare},
// http://www.w3.org/TR/html5/grouping-content.html#the-ol-element
{HtmlName::kOl, HtmlName::kReversed},
// http://www.w3.org/TR/html5/the-button-element.html#the-optgroup-element
{HtmlName::kOptgroup, HtmlName::kDisabled},
// http://www.w3.org/TR/html5/the-button-element.html#the-option-element
{HtmlName::kOption, HtmlName::kDefaultselected},
{HtmlName::kOption, HtmlName::kDisabled},
{HtmlName::kOption, HtmlName::kSelected},
// http://www.w3.org/TR/html5/scripting-1.html#script
{HtmlName::kScript, HtmlName::kAsync},
{HtmlName::kScript, HtmlName::kDefer},
// http://www.w3.org/TR/html5/the-button-element.html#the-select-element
{HtmlName::kSelect, HtmlName::kAutofocus},
{HtmlName::kSelect, HtmlName::kDisabled},
{HtmlName::kSelect, HtmlName::kMultiple},
{HtmlName::kSelect, HtmlName::kRequired},
// http://www.w3.org/TR/html5/semantics.html#the-style-element
{HtmlName::kStyle, HtmlName::kScoped},
// http://www.w3.org/TR/html5/the-button-element.html#the-textarea-element
{HtmlName::kTextarea, HtmlName::kAutofocus},
{HtmlName::kTextarea, HtmlName::kDisabled},
{HtmlName::kTextarea, HtmlName::kReadonly},
{HtmlName::kTextarea, HtmlName::kRequired},
// http://www.w3.org/TR/html5/video.html#media-elements
{HtmlName::kVideo, HtmlName::kAutoplay},
{HtmlName::kVideo, HtmlName::kControls},
{HtmlName::kVideo, HtmlName::kLoop},
{HtmlName::kVideo, HtmlName::kMuted},
};
// An attribute can be removed from a tag if its name and value is in
// kDefaultList.
//
// Note: It is important that this list not include attributes that can be
// inherited. Otherwise something like this could fail:
// <div attr="non_default_value">
// <div attr="default_value"> <!-- must not be elided -->
// </div>
// </div>
struct TagAttrValue {
HtmlName::Keyword tag_name;
HtmlName::Keyword attr_name;
const char* attr_value;
bool requires_version_5; // Default value only exists in (X)HTML 5.
};
// References for HTML 4 and HTML 5 are included below, with extra notes for
// entries that apply differently to HTML 4 and HTML 5 (i.e. those with 4th
// argument true). If you are so inclined, you are encouraged to carefully
// verify the references and make changes to any errors in this data.
const TagAttrValue kDefaultList[] = {
// 4: http://www.w3.org/TR/html4/struct/links.html#h-12.2
// 5: Note that the <a> tag's shape attribute is deprecated in HTML5.
// http://www.w3.org/TR/html5/obsolete.html#non-conforming-features
{HtmlName::kA, HtmlName::kShape, "rect", false},
// 4: http://www.w3.org/TR/html4/struct/objects.html#h-13.6.1
// 5: http://www.w3.org/TR/html5/the-map-element.html#the-area-element
{HtmlName::kArea, HtmlName::kShape, "rect", false},
// 4: http://www.w3.org/TR/html4/interact/forms.html#h-17.5
// 5: http://www.w3.org/TR/html5/the-button-element.html#the-button-element
// IE does not support this default.
// {HtmlName::kButton, HtmlName::kType, "submit", false},
// 4: The <command> tag does not exist in HTML 4.
// 5: http://www.w3.org/TR/html5/interactive-elements.html#the-command
{HtmlName::kCommand, HtmlName::kType, "command", true},
// 4: The <form> tag's autocomplete attribute does not exist in HTML 4.
// 5: http://www.w3.org/TR/html5/forms.html#the-form-element
{HtmlName::kForm, HtmlName::kAutocomplete, "on", true},
// 4: http://www.w3.org/TR/html4/interact/forms.html#h-17.3
// 5: http://www.w3.org/TR/html5/association-of-controls-and-forms.html#
// attributes-for-form-submission
{HtmlName::kForm, HtmlName::kEnctype, "application/x-www-form-urlencoded",
false}, // NOLINT
{HtmlName::kForm, HtmlName::kMethod, "get", false},
// 4: http://www.w3.org/TR/html4/present/frames.html#h-16.2.2
// 5: Note that the <frame> tag is deprecated in HTML5.
// http://www.w3.org/TR/html5/obsolete.html#non-conforming-features
{HtmlName::kFrame, HtmlName::kFrameborder, "1", false},
{HtmlName::kFrame, HtmlName::kScrolling, "auto", false},
// 4: http://www.w3.org/TR/html4/present/frames.html#h-16.5
// 5: Note that these attributes are deprecated in HTML5.
// http://www.w3.org/TR/html5/obsolete.html#non-conforming-features
{HtmlName::kIframe, HtmlName::kFrameborder, "1", false},
{HtmlName::kIframe, HtmlName::kScrolling, "auto", false},
// 4: The <keygen> tag does not exist in HTML 4.
// 5: http://www.w3.org/TR/html5/the-button-element.html#the-keygen-element
{HtmlName::kKeygen, HtmlName::kKeytype, "rsa", true},
// 4: The <menu> tag seems to mean something different in HTML 4.
// 5: http://www.w3.org/TR/html5/interactive-elements.html#menus
{HtmlName::kMenu, HtmlName::kType, "list", true},
// 4: http://www.w3.org/TR/html4/struct/objects.html#h-13.3.2
// 5: Note that the <param> tag's valuetype attribute is deprecated in HTML5.
// http://www.w3.org/TR/html5/obsolete.html#non-conforming-features
{HtmlName::kParam, HtmlName::kValuetype, "data", false},
// 4: These attributes have no default values in HTML 4.
// http://www.w3.org/TR/html4/interact/scripts.html#h-18.2.1
// 5: http://www.w3.org/TR/html5/scripting-1.html
{HtmlName::kScript, HtmlName::kLanguage, "javascript", true},
{HtmlName::kScript, HtmlName::kType, "text/javascript", true},
// 4: The <source> tag does not exist in HTML 4.
// 5: http://www.w3.org/TR/html5/video.html#the-source-element
{HtmlName::kSource, HtmlName::kMedia, "all", true},
// 4: This attribute has no default value in HTML 4.
// http://www.w3.org/TR/html4/present/styles.html#h-14.2.3
// 5: http://www.w3.org/TR/html5/semantics.html#the-style-element
{HtmlName::kStyle, HtmlName::kType, "text/css", true},
// 4: This attributes has a _different_ default value in HTML 4!
// http://www.w3.org/TR/html4/present/styles.html#h-14.2.3
// 5: http://www.w3.org/TR/html5/semantics.html#the-style-element
{HtmlName::kStyle, HtmlName::kMedia, "all", true},
// 4: The <textarea> tag's wrap attribute does not exist in HTML 4.
// 5: http://www.w3.org/TR/html5/the-button-element.html#the-textarea-element
{HtmlName::kTextarea, HtmlName::kWrap, "soft", true},
// 4: http://www.w3.org/TR/html4/struct/tables.html
// 5: http://www.w3.org/TR/html5/tabular-data.html#table-model
{HtmlName::kCol, HtmlName::kSpan, "1", false},
{HtmlName::kColgroup, HtmlName::kSpan, "1", false},
{HtmlName::kTd, HtmlName::kColspan, "1", false},
{HtmlName::kTd, HtmlName::kRowspan, "1", false},
{HtmlName::kTh, HtmlName::kColspan, "1", false},
{HtmlName::kTh, HtmlName::kRowspan, "1", false},
};
} // namespace
ElideAttributesFilter::ElideAttributesFilter(HtmlParse* html_parse)
: html_parse_(html_parse) {
// Populate one_value_attrs_map_
for (size_t i = 0; i < arraysize(kBooleanAttrs); ++i) {
const TagAttr& entry = kBooleanAttrs[i];
KeywordSet& keywords = one_value_attrs_map_[entry.tag_name];
keywords.insert(entry.attr_name);
}
// Populate default_value_map_
for (size_t i = 0; i < arraysize(kDefaultList); ++i) {
const TagAttrValue& entry = kDefaultList[i];
AttrValue& value = default_value_map_[entry.tag_name][entry.attr_name];
value.attr_value = entry.attr_value;
value.requires_version_5 = entry.requires_version_5;
}
}
ElideAttributesFilter::~ElideAttributesFilter() {}
void ElideAttributesFilter::StartElement(HtmlElement* element) {
const DocType& doctype = html_parse_->doctype();
// TODO(jmarantz): switch to using mimetype. To do that we need to have
// access to the RewriteDriver* to get the response-headers, and so this
// is not compatible with PageSpeed Insights that uses this filter for
// HTML minification.
if (!doctype.IsXhtml()) {
// Check for boolean attributes.
KeywordSetMap::const_iterator iter =
one_value_attrs_map_.find(element->keyword());
if (iter != one_value_attrs_map_.end()) {
const KeywordSet& oneValueAttrs = iter->second;
HtmlElement::AttributeList* attrs = element->mutable_attributes();
for (HtmlElement::AttributeIterator i(attrs->begin());
i != attrs->end(); ++i) {
HtmlElement::Attribute& attribute = *i;
if (attribute.escaped_value() != NULL &&
oneValueAttrs.count(attribute.keyword()) > 0) {
attribute.SetEscapedValue(NULL);
}
}
}
}
// Check for attributes with default values.
ValueMapMap::const_iterator iter1 = default_value_map_.find(
element->keyword());
if (iter1 != default_value_map_.end()) {
const ValueMap& default_values = iter1->second;
HtmlElement::AttributeList* attrs = element->mutable_attributes();
HtmlElement::AttributeIterator i(attrs->begin());
while (i != attrs->end()) {
HtmlElement::Attribute& attribute = *i;
bool remove = false;
const char* attr_value = attribute.DecodedValueOrNull();
if (attr_value != NULL) {
ValueMap::const_iterator iter2 = default_values.find(
attribute.keyword());
if (iter2 != default_values.end()) {
const AttrValue& value = iter2->second;
if ((!value.requires_version_5 || doctype.IsVersion5()) &&
StringCaseEqual(attr_value, value.attr_value)) {
remove = true;
}
}
}
if (remove) {
attrs->Erase(&i);
} else {
++i;
}
}
}
}
} // namespace net_instaweb