| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| #include "pagespeed/kernel/http/user_agent_normalizer.h" |
| |
| #include "base/logging.h" |
| ////#include "strings/stringpiece_utils.h" |
| #include "pagespeed/kernel/base/string_util.h" |
| #include "pagespeed/kernel/util/re2.h" |
| |
| namespace net_instaweb { |
| |
| UserAgentNormalizer::~UserAgentNormalizer() {} |
| |
| GoogleString UserAgentNormalizer::NormalizeWithAll( |
| const std::vector<const UserAgentNormalizer*>& ua_normalizers, |
| const GoogleString& ua_in) { |
| GoogleString ua = ua_in; |
| for (int i = 0, n = ua_normalizers.size(); i < n; ++i) { |
| ua = ua_normalizers[i]->Normalize(ua); |
| } |
| return ua; |
| } |
| |
| // Samples: |
| // Dalvik/1.4.0 (Linux; U; Android 2.3.7; M5 Build/GRK39F) |
| // Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03L) AppleWebKit/537.31 |
| // (KHTML, like Gecko) Chrome/26.0.1410.58 Safari/537.31 Mozilla/5.0 (Linux; |
| // Android 4.2.2; Nexus 4 Build/JDQ39) AppleWebKit/537.31 (KHTML, like Gecko) |
| // Chrome/26.0.1410.58 Mobile Safari/537.31 Some of Samsung's phones also seem |
| // to throw in Version/1.0 before Chrome/ Mozilla/5.0 (Linux; U; Android 4.1.2; |
| // ar-ae; GT-I9300 Build/JZO54K) AppleWebKit/534.30 (KHTML, like Gecko) |
| // Version/4.0 Mobile Safari/534.30" |
| |
| AndroidUserAgentNormalizer::AndroidUserAgentNormalizer() |
| : dalvik_ua_( |
| "(Dalvik/[\\d\\.]+ \\(Linux; U; Android " |
| "[^\\s;]+)[\\s;][^)]+\\)"), |
| chrome_android_ua_( |
| "(Mozilla/5.0 \\(Linux; Android [\\d\\.]+; )[^)]+(\\) " |
| "AppleWebKit/[\\d\\.]+ \\(KHTML, like Gecko\\) )" |
| "(?:Version/[\\d\\.]+ )?" |
| "(Chrome/[\\d\\.]+(?: Mobile)?[ ]+Safari/[\\d\\.]+)"), |
| android_browser_ua_( |
| "(Mozilla/5.0 \\(Linux;(?: U;)? Android [\\d\\.]+; )[^)]+(\\) " |
| "AppleWebKit/[\\d\\.\\+]+ \\(KHTML, like Gecko\\) " |
| "Version/[\\d\\.]+(?: Mobile)? Safari/[\\d\\.]+)") { |
| CHECK(dalvik_ua_.ok()) << dalvik_ua_.error(); |
| CHECK(chrome_android_ua_.ok()) << chrome_android_ua_.error(); |
| CHECK(android_browser_ua_.ok()) << android_browser_ua_.error(); |
| } |
| |
| AndroidUserAgentNormalizer::~AndroidUserAgentNormalizer() {} |
| |
| GoogleString AndroidUserAgentNormalizer::Normalize( |
| const GoogleString& in) const { |
| Re2StringPiece match, match2, match3; |
| if (RE2::FullMatch(in, dalvik_ua_, &match)) { |
| return StrCat(Re2ToStringPiece(match), ")"); |
| } |
| if (RE2::FullMatch(in, chrome_android_ua_, &match, &match2, &match3)) { |
| return StrCat(Re2ToStringPiece(match), Re2ToStringPiece(match2), |
| Re2ToStringPiece(match3)); |
| } |
| if (RE2::FullMatch(in, android_browser_ua_, &match, &match2)) { |
| return StrCat(Re2ToStringPiece(match), Re2ToStringPiece(match2)); |
| } |
| return in; |
| } |
| |
| // Samples: |
| // "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; SE 2.X |
| // MetaSr 1.0)", 69838, 0.75695 "Mozilla/5.0 (compatible; MSIE 10.0; Windows |
| // NT 6.2; WOW64; Trident/6.0; Touch)", 48308, 0.777381 |
| |
| IEUserAgentNormalizer::IEUserAgentNormalizer() |
| : ie_ua_( |
| "(Mozilla/\\d.0 \\(compatible; MSIE [\\d\\.]+)" |
| "([^)]+)\\)") { |
| CHECK(ie_ua_.ok()) << ie_ua_.error(); |
| } |
| |
| IEUserAgentNormalizer::~IEUserAgentNormalizer() {} |
| |
| GoogleString IEUserAgentNormalizer::Normalize(const GoogleString& in) const { |
| Re2StringPiece match, match2; |
| if (RE2::FullMatch(in, ie_ua_, &match, &match2)) { |
| // IE UA strings enumerate things like installed .NET versions which |
| // blow up their variety. We keep only parts that talk about the |
| // renderer or platform |
| GoogleString out; |
| match.CopyToString(&out); |
| StringPieceVector fragments; |
| SplitStringUsingSubstr(Re2ToStringPiece(match2), "; ", &fragments); |
| for (int i = 0, n = fragments.size(); i < n; ++i) { |
| StringPiece fragment = fragments[i]; |
| if (strings::StartsWith(fragment, "Trident") || |
| strings::StartsWith(fragment, "Windows ") || |
| strings::StartsWith(fragment, "WOW64 ") || |
| strings::StartsWith(fragment, "chromeframe") || |
| strings::StartsWith(fragment, "IEMobile") || |
| strings::StartsWith(fragment, "Media Center PC")) { |
| StrAppend(&out, "; ", fragment); |
| } |
| } |
| StrAppend(&out, ")"); |
| return out; |
| } |
| return in; |
| } |
| |
| } // namespace net_instaweb |