blob: 21282df2c6208d6b585b6d3827ad1ea8f1e8feab [file] [log] [blame]
/*
* Copyright 2012 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: gagansingh@google.com (Gagan Singh)
#include "net/instaweb/rewriter/public/blink_util.h"
#include <cstddef>
#include <utility>
#include <vector>
#include "base/logging.h"
#include "net/instaweb/http/public/async_fetch.h"
#include "net/instaweb/http/public/log_record.h"
#include "net/instaweb/http/public/logging_proto_impl.h"
#include "net/instaweb/rewriter/public/server_context.h"
#include "pagespeed/kernel/base/abstract_mutex.h"
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/timer.h"
#include "pagespeed/kernel/html/html_element.h"
#include "pagespeed/kernel/html/html_name.h"
#include "pagespeed/kernel/http/google_url.h"
#include "pagespeed/kernel/http/request_headers.h"
#include "pagespeed/kernel/http/user_agent_matcher.h"
namespace net_instaweb {
namespace BlinkUtil {
namespace {
bool IsAllIncludedIn(const StringPieceVector& spec_vector,
const StringPieceVector& value_vector) {
for (int i = 0, m = spec_vector.size(); i < m; ++i) {
bool found_spec_item = false;
for (int j = 0, n = value_vector.size(); j < n; ++j) {
if (StringCaseCompare(value_vector[j], spec_vector[i]) == 0) {
// The i'th token in spec is there in value.
found_spec_item = true;
break;
}
}
if (!found_spec_item) {
// If a token in spec is not found in value then we can return false.
return false;
}
}
// Found all in spec in value.
return true;
}
// Checks whether the user agent is allowed to go into the blink flow.
bool IsUserAgentAllowedForBlink(AsyncFetch* async_fetch,
const RewriteOptions* options,
const char* user_agent,
UserAgentMatcher* user_agent_matcher) {
UserAgentMatcher::BlinkRequestType request_type =
user_agent_matcher->GetBlinkRequestType(
user_agent, async_fetch->request_headers());
{
ScopedMutex lock(async_fetch->log_record()->mutex());
CacheHtmlLoggingInfo* cache_html_logging_info =
async_fetch->log_record()->logging_info()->
mutable_cache_html_logging_info();
switch (request_type) {
case UserAgentMatcher::kBlinkWhiteListForDesktop:
cache_html_logging_info->set_cache_html_user_agent(
CacheHtmlLoggingInfo::CACHE_HTML_DESKTOP_WHITELIST);
return true;
case UserAgentMatcher::kBlinkWhiteListForMobile:
cache_html_logging_info->set_cache_html_user_agent(
CacheHtmlLoggingInfo::CACHE_HTML_MOBILE);
return true;
case UserAgentMatcher::kDoesNotSupportBlink:
cache_html_logging_info->set_cache_html_user_agent(
CacheHtmlLoggingInfo::NOT_SUPPORT_CACHE_HTML);
return false;
case UserAgentMatcher::kBlinkBlackListForDesktop:
FALLTHROUGH_INTENDED;
case UserAgentMatcher::kDoesNotSupportBlinkForMobile:
cache_html_logging_info->set_cache_html_user_agent(
CacheHtmlLoggingInfo::CACHE_HTML_DESKTOP_BLACKLIST);
return false;
case UserAgentMatcher::kNullOrEmpty:
cache_html_logging_info->set_cache_html_user_agent(
CacheHtmlLoggingInfo::NULL_OR_EMPTY);
return false;
}
}
return false;
}
bool IsBlinkBlacklistActive(int64 now_ms,
int64 blink_blacklist_end_timestamp_ms,
AbstractLogRecord* log_record) {
bool is_blacklisted = blink_blacklist_end_timestamp_ms >= now_ms;
if (is_blacklisted) {
ScopedMutex lock(log_record->mutex());
log_record->logging_info()->mutable_cache_html_logging_info()->
set_cache_html_request_flow(
CacheHtmlLoggingInfo::CACHE_HTML_BLACKLISTED);
}
return is_blacklisted;
}
} // namespace
// TODO(rahulbansal): Add tests for this.
bool IsBlinkRequest(const GoogleUrl& url,
AsyncFetch* async_fetch,
const RewriteOptions* options,
const char* user_agent,
const ServerContext* server_context,
RewriteOptions::Filter filter) {
if (options != NULL &&
// Is rewriting enabled?
options->enabled() &&
// Is Get Request?
async_fetch->request_headers()->method() == RequestHeaders::kGet &&
// Is the filter enabled?
options->Enabled(filter) &&
// Is url allowed? (i.e., it is not in black-list.)
// TODO(sriharis): We also make this check in regular proxy flow
// (ProxyFetch). Should we combine these?
options->IsAllowed(url.Spec()) &&
// Is the user agent allowed to enter the blink flow?
IsUserAgentAllowedForBlink(
async_fetch, options, user_agent,
server_context->user_agent_matcher()) &&
// Ensure there is no blink blacklist for this domain.
!IsBlinkBlacklistActive(server_context->timer()->NowMs(),
options->blink_blacklist_end_timestamp_ms(),
async_fetch->log_record())) {
// Is the request a HTTP request?
if (url.SchemeIs("http")) {
return true;
}
}
return false;
}
bool IsJsonEmpty(const Json::Value& json) {
const std::vector<std::string>& keys = json.getMemberNames();
for (Json::ArrayIndex k = 0; k < keys.size(); ++k) {
const std::string& key = keys[k];
if (key != kContiguous) {
return false;
}
}
return true;
}
void ClearArrayIfAllEmpty(Json::Value* json) {
for (Json::ArrayIndex i = 0; i < json->size(); ++i) {
if (!IsJsonEmpty((*json)[i])) {
return;
}
}
json->clear();
}
void EscapeString(GoogleString* str) {
// Escape </script> to <\/script>.
GlobalReplaceSubstring("</script>", "<\\/script>", str);
// TODO(sriharis): Check whether we need to do any other escaping.
int num_replacements = 0;
GoogleString tmp;
const int length = str->length();
for (int i = 0; i < length; ++i) {
const unsigned char c = (*str)[i];
switch (c) {
case 0xe2: {
if ((i + 2 < length) && ((*str)[i + 1] == '\x80')) {
if ((*str)[i + 2] == '\xa8') {
++num_replacements;
tmp.append("\\u2028");
i += 2;
break;
} else if ((*str)[i + 2] == '\xa9') {
++num_replacements;
tmp.append("\\u2029");
i += 2;
break;
}
}
tmp.push_back(c);
break;
}
default: {
tmp.push_back(c);
break;
}
}
}
if (num_replacements > 0) {
str->swap(tmp);
}
}
bool StripTrailingNewline(GoogleString* s) {
if (!s->empty() && (*s)[s->size() - 1] == '\n') {
if (s->size() > 1 && (*s)[s->size() - 2] == '\r')
s->resize(s->size() - 2);
else
s->resize(s->size() - 1);
return true;
}
return false;
}
void PopulateAttributeToNonCacheableValuesMap(
const RewriteOptions* rewrite_options, const GoogleUrl& url,
AttributesToNonCacheableValuesMap* attribute_non_cacheable_values_map,
std::vector<int>* panel_number_num_instances) {
GoogleString non_cacheable_elements_str =
rewrite_options->non_cacheables_for_cache_partial_html();
StringPiece non_cacheable_elements(non_cacheable_elements_str);
// TODO(rahulbansal): Add more error checking.
StringPieceVector non_cacheable_values;
SplitStringPieceToVector(non_cacheable_elements,
",", &non_cacheable_values, true);
for (size_t i = 0; i < non_cacheable_values.size(); ++i) {
StringPieceVector non_cacheable_values_pair;
SplitStringPieceToVector(non_cacheable_values[i], "=",
&non_cacheable_values_pair, true);
if (non_cacheable_values_pair.size() != 2) {
LOG(WARNING) << "Incorrect non cacheable element value "
<< non_cacheable_values[i];
return;
}
StringPiece attribute_name = non_cacheable_values_pair[0];
StringPiece attribute_value = non_cacheable_values_pair[1];
TrimWhitespace(&attribute_name);
TrimQuote(&attribute_value);
attribute_non_cacheable_values_map->insert(make_pair(
attribute_name.as_string(),
make_pair(attribute_value.as_string(), i)));
panel_number_num_instances->push_back(0);
}
}
int GetPanelNumberForNonCacheableElement(
const AttributesToNonCacheableValuesMap& attribute_non_cacheable_values_map,
const HtmlElement* element) {
const HtmlElement::AttributeList& attrs = element->attributes();
for (HtmlElement::AttributeConstIterator i(attrs.begin());
i != attrs.end(); ++i) {
const HtmlElement::Attribute& attribute = *i;
StringPiece value = attribute.DecodedValueOrNull();
if (value.empty()) {
continue;
}
// Get all items in the map with matching attribute name.
// TODO(sriharis): We need case insensitive compare here.
typedef AttributesToNonCacheableValuesMap::const_iterator Iterator;
std::pair<Iterator, Iterator> ret =
attribute_non_cacheable_values_map.equal_range(
attribute.name_str().as_string());
if (attribute.name().keyword() == HtmlName::kClass) {
// Split class attribute value on whitespace.
StringPieceVector value_vector;
SplitStringPieceToVector(value, " \r\n\t", &value_vector, true);
for (Iterator it = ret.first; it != ret.second; ++it) {
StringPieceVector spec_vector;
SplitStringPieceToVector(it->second.first, " \t", &spec_vector, true);
// If spec_vector is a subset of value_vector return the index
// (it->second.second).
if (IsAllIncludedIn(spec_vector, value_vector)) {
return it->second.second;
}
}
} else {
for (Iterator it = ret.first; it != ret.second; ++it) {
if (value == it->second.first) {
// Returning the index.
return it->second.second;
}
}
}
}
return -1;
}
GoogleString GetPanelId(int panel_number, int instance_number) {
return StrCat(BlinkUtil::kPanelId, "-", IntegerToString(panel_number),
".", IntegerToString(instance_number));
}
} // namespace BlinkUtil
} // namespace net_instaweb