blob: a71c95e3c8653b9a3e352d5e3c25ce294a1145eb [file] [log] [blame]
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Author: morlovich@google.com (Maksim Orlovich)
#include "pagespeed/kernel/http/user_agent_normalizer.h"
#include "base/logging.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/util/re2.h"
namespace net_instaweb {
UserAgentNormalizer::~UserAgentNormalizer() {}
GoogleString UserAgentNormalizer::NormalizeWithAll(
const std::vector<const UserAgentNormalizer*>& ua_normalizers,
const GoogleString& ua_in) {
GoogleString ua = ua_in;
for (int i = 0, n = ua_normalizers.size(); i < n; ++i) {
ua = ua_normalizers[i]->Normalize(ua);
}
return ua;
}
// Samples:
// Dalvik/1.4.0 (Linux; U; Android 2.3.7; M5 Build/GRK39F)
// Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03L) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.58 Safari/537.31
// Mozilla/5.0 (Linux; Android 4.2.2; Nexus 4 Build/JDQ39) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.58 Mobile Safari/537.31
// Some of Samsung's phones also seem to throw in Version/1.0 before Chrome/
// Mozilla/5.0 (Linux; U; Android 4.1.2; ar-ae; GT-I9300 Build/JZO54K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30"
AndroidUserAgentNormalizer::AndroidUserAgentNormalizer()
: dalvik_ua_("(Dalvik/[\\d\\.]+ \\(Linux; U; Android "
"[^\\s;]+)[\\s;][^)]+\\)"),
chrome_android_ua_(
"(Mozilla/5.0 \\(Linux; Android [\\d\\.]+; )[^)]+(\\) "
"AppleWebKit/[\\d\\.]+ \\(KHTML, like Gecko\\) )"
"(?:Version/[\\d\\.]+ )?"
"(Chrome/[\\d\\.]+(?: Mobile)?[ ]+Safari/[\\d\\.]+)"),
android_browser_ua_(
"(Mozilla/5.0 \\(Linux;(?: U;)? Android [\\d\\.]+; )[^)]+(\\) "
"AppleWebKit/[\\d\\.\\+]+ \\(KHTML, like Gecko\\) "
"Version/[\\d\\.]+(?: Mobile)? Safari/[\\d\\.]+)") {
CHECK(dalvik_ua_.ok()) << dalvik_ua_.error();
CHECK(chrome_android_ua_.ok()) << chrome_android_ua_.error();
CHECK(android_browser_ua_.ok()) << android_browser_ua_.error();
}
AndroidUserAgentNormalizer::~AndroidUserAgentNormalizer() {
}
GoogleString AndroidUserAgentNormalizer::Normalize(
const GoogleString& in) const {
Re2StringPiece match, match2, match3;
if (RE2::FullMatch(in, dalvik_ua_, &match)) {
return StrCat(Re2ToStringPiece(match), ")");
}
if (RE2::FullMatch(in, chrome_android_ua_, &match, &match2, &match3)) {
return StrCat(Re2ToStringPiece(match), Re2ToStringPiece(match2),
Re2ToStringPiece(match3));
}
if (RE2::FullMatch(in, android_browser_ua_, &match, &match2)) {
return StrCat(Re2ToStringPiece(match), Re2ToStringPiece(match2));
}
return in;
}
// Samples:
// "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; SE 2.X MetaSr 1.0)", 69838, 0.75695
// "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; WOW64; Trident/6.0; Touch)", 48308, 0.777381
IEUserAgentNormalizer::IEUserAgentNormalizer()
: ie_ua_("(Mozilla/\\d.0 \\(compatible; MSIE [\\d\\.]+)"
"([^)]+)\\)") {
CHECK(ie_ua_.ok()) << ie_ua_.error();
}
IEUserAgentNormalizer::~IEUserAgentNormalizer() {
}
GoogleString IEUserAgentNormalizer::Normalize(const GoogleString& in) const {
Re2StringPiece match, match2;
if (RE2::FullMatch(in, ie_ua_, &match, &match2)) {
// IE UA strings enumerate things like installed .NET versions which
// blow up their variety. We keep only parts that talk about the
// renderer or platform
GoogleString out;
match.CopyToString(&out);
StringPieceVector fragments;
SplitStringUsingSubstr(Re2ToStringPiece(match2), "; ", &fragments);
for (int i = 0, n = fragments.size(); i < n; ++i) {
StringPiece fragment = fragments[i];
if (HasPrefixString(fragment, "Trident") ||
HasPrefixString(fragment, "Windows ") ||
HasPrefixString(fragment, "WOW64 ") ||
HasPrefixString(fragment, "chromeframe") ||
HasPrefixString(fragment, "IEMobile") ||
HasPrefixString(fragment, "Media Center PC")) {
StrAppend(&out, "; ", fragment);
}
}
StrAppend(&out, ")");
return out;
}
return in;
}
} // namespace net_instaweb