| /* |
| * Copyright 2010 Google Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // Author: jmarantz@google.com (Joshua Marantz) |
| |
| #include "net/instaweb/rewriter/public/resource_namer.h" |
| |
| #include <cctype> |
| #include <memory> |
| #include <vector> |
| |
| #include "base/logging.h" |
| #include "pagespeed/kernel/base/hasher.h" |
| #include "pagespeed/kernel/base/string.h" |
| #include "pagespeed/kernel/base/string_util.h" |
| #include "pagespeed/kernel/http/content_type.h" |
| #include "pagespeed/kernel/util/url_escaper.h" |
| |
| namespace net_instaweb { |
| |
| namespace { |
| |
| // The format of all resource names is: |
| // |
| // ORIGINAL_NAME.pagespeed[.EXPT].ID.HASH.EXT |
| // |
| // "pagespeed" is what we'll call the system ID. Rationale: |
| // 1. Any abbreviation of this will not be well known, e.g. |
| // ps, mps (mod page speed), psa (page speed automatic) |
| // and early reports from users indicate confusion over |
| // the gibberish names in our resources. |
| // 2. "pagespeed" is the family of products now, not just the |
| // firebug plug in. Page Speed Automatic is the proper name for |
| // the rewriting technology but it's longer, and "pagespeed" solves the |
| // "WTF is this garbage in my URL" problem. |
| // 3. "mod_pagespeed" is slightly longer if/when this technology |
| // is ported to other servers then the "mod_" is less relevant. |
| // |
| // EXPT is an optional character indicating the index of an ExperimentSpec. The |
| // first ExperimentSpec is a, the next is b, ... Users not in any experiment |
| // won't have this section. |
| // |
| // If you change this, or the structure of the encoded string, |
| // you will also need to change: |
| // |
| // automatic/system_test.sh |
| // system/system_test.sh |
| // apache/system_test.sh |
| // |
| // Plus a few constants in _test.cc files. |
| |
| static const char kSystemId[] = "pagespeed"; |
| static const char kSeparatorString[] = "."; |
| static const char kSeparatorChar = kSeparatorString[0]; |
| |
| } // namespace |
| |
| const int ResourceNamer::kOverhead = 4 + STATIC_STRLEN(kSystemId); |
| |
| bool ResourceNamer::DecodeIgnoreHashAndSignature(StringPiece encoded_string) { |
| // Decode only takes into consideration signatures if the provided signature |
| // length is greater than 0. Providing -1 for signature_length will cause the |
| // hash_length to be ignored. Hash and signature outputs from this function |
| // must not be used. |
| return Decode(encoded_string, -1, -1); |
| } |
| |
| bool ResourceNamer::Decode(const StringPiece& encoded_string, int hash_length, |
| int signature_length) { |
| // Expected syntax: |
| // name.pagespeed[.experiment|.options].id.hash[signature].ext |
| // Note that 'name' and 'options' may have arbitrary numbers of dots, so |
| // we parse by anchoring at the 'pagespeed', beginning, and end of the |
| // StringPiece vector. |
| |
| StringPieceVector segments; |
| SplitStringPieceToVector(encoded_string, kSeparatorString, &segments, false); |
| int system_id_index = -1; |
| int n = segments.size(); |
| for (int i = 0; i < n; ++i) { |
| if (segments[i] == kSystemId) { |
| system_id_index = i; |
| break; |
| } |
| } |
| |
| experiment_.clear(); |
| options_.clear(); |
| |
| // We expect at least one segment before the system-ID: the name. We expect |
| // at least 3 segments after it: the id, hash, and extension. Extra segments |
| // preceding the system-ID are part of the name. Extra segments after the |
| // system-ID are the options or experiments. Options always are more than |
| // one character, experiments always have 1 character. |
| // If the url is to be signed, the signature is one or more characters, and |
| // the signature is placed between the hash and the extension. |
| if ((system_id_index >= 1) && // at least 1 segment before the system ID. |
| (n - system_id_index >= 4)) { // at least 3 segments after the system ID. |
| name_.clear(); |
| AppendJoinIterator(&name_, |
| segments.begin(), segments.begin() + system_id_index, |
| kSeparatorString); |
| // Looking from the right, we should see ext, hash[signature], id |
| // If the hash/signature segment is not of the exact length specified, we |
| // take the entire segment as the hash and set the signature to an empty |
| // string. |
| bool is_signed = |
| (signature_length > 0) && |
| (segments[n - 2].size() == |
| static_cast<unsigned int>(hash_length + signature_length)); |
| segments[--n].CopyToString(&ext_); |
| if (is_signed) { |
| segments[--n].substr(0, hash_length).CopyToString(&hash_); |
| segments[n].substr(hash_length).CopyToString(&signature_); |
| } else { |
| segments[--n].CopyToString(&hash_); |
| } |
| segments[--n].CopyToString(&id_); |
| |
| // Now between system_id_index and n, we have the experiment or options. |
| // Re-join them (general case includes dots for the options. |
| int experiment_or_options_start = system_id_index + 1; |
| if (experiment_or_options_start < n) { |
| GoogleString experiment_or_options; |
| AppendJoinIterator( |
| &experiment_or_options, |
| segments.begin() + experiment_or_options_start, |
| segments.begin() + n, |
| kSeparatorString); |
| if (experiment_or_options.size() == 1) { |
| if ((experiment_or_options[0] >= 'a') && |
| (experiment_or_options[0] <= 'z')) { |
| experiment_or_options.swap(experiment_); |
| } else { |
| return false; // invalid experiment |
| } |
| } else if (experiment_or_options.empty() || |
| !UrlEscaper::DecodeFromUrlSegment(experiment_or_options, |
| &options_)) { |
| return false; |
| } |
| } |
| return true; |
| } |
| return LegacyDecode(encoded_string); |
| } |
| |
| // TODO(jmarantz): validate that the 'id' is one of the filters that |
| // were implemented as of Nov 2010. Also validate that the hash |
| // code is a 32-char hex number. |
| bool ResourceNamer::LegacyDecode(const StringPiece& encoded_string) { |
| bool ret = false; |
| // First check that this URL has a known extension type |
| if (NameExtensionToContentType(encoded_string) != NULL) { |
| StringPieceVector names; |
| SplitStringPieceToVector(encoded_string, kSeparatorString, &names, true); |
| if (names.size() == 4) { |
| names[1].CopyToString(&hash_); |
| |
| // The legacy hash codes were all either 1-character (for tests) or |
| // 32 characters, all in hex. There is no point in being backwards |
| // compatible with tests, however, and it can occasionally cause us to |
| // log spam (issue 688), so we only accept the production one. |
| if (hash_.size() != 32) { |
| return false; |
| } |
| for (int i = 0, n = hash_.size(); i < n; ++i) { |
| char ch = hash_[i]; |
| if (!isdigit(ch)) { |
| ch = UpperChar(ch); |
| if ((ch < 'A') || (ch > 'F')) { |
| return false; |
| } |
| } |
| } |
| |
| names[0].CopyToString(&id_); |
| names[2].CopyToString(&name_); |
| names[3].CopyToString(&ext_); |
| ret = true; |
| } |
| } |
| return ret; |
| } |
| |
| // This is used for legacy compatibility as we transition to the grand new |
| // world. |
| GoogleString ResourceNamer::InternalEncode() const { |
| StringPieceVector parts; |
| GoogleString encoded_options; |
| parts.push_back(name_); |
| parts.push_back(kSystemId); |
| DCHECK(!(has_experiment() && has_options())); |
| if (has_experiment()) { |
| parts.push_back(experiment_); |
| } else if (has_options()) { |
| UrlEscaper::EncodeToUrlSegment(options_, &encoded_options); |
| parts.push_back(encoded_options); |
| } |
| parts.push_back(id_); |
| GoogleString hash_signature = StrCat(hash_, signature_); |
| parts.push_back(hash_signature); |
| parts.push_back(ext_); |
| return JoinCollection(parts, kSeparatorString); |
| } |
| |
| // The current encoding assumes there are no dots in any of the components. |
| // This restriction may be relaxed in the future, but check it aggressively |
| // for now. |
| GoogleString ResourceNamer::Encode() const { |
| DCHECK(StringPiece::npos == id_.find(kSeparatorChar)); |
| // It is OK for options_ to have separator characters because we |
| // use the base UrlSegmentEncoder implementation, so we don't need |
| // to run DCHECK(StringPiece::npos == options_.find(kSeparatorChar)); |
| DCHECK(!hash_.empty()); |
| DCHECK(StringPiece::npos == hash_.find(kSeparatorChar)); |
| DCHECK(StringPiece::npos == ext_.find(kSeparatorChar)); |
| DCHECK(StringPiece::npos == experiment_.find(kSeparatorChar)); |
| DCHECK(StringPiece::npos == signature_.find(kSeparatorChar)); |
| DCHECK(!has_experiment() || experiment_.length()); |
| DCHECK(!(has_experiment() && has_options())); |
| return InternalEncode(); |
| } |
| |
| GoogleString ResourceNamer::EncodeIdName() const { |
| CHECK(id_.find(kSeparatorChar) == StringPiece::npos); |
| return StrCat(id_, kSeparatorString, name_); |
| } |
| |
| void ResourceNamer::CopyFrom(const ResourceNamer& other) { |
| other.id().CopyToString(&id_); |
| other.name().CopyToString(&name_); |
| other.options().CopyToString(&options_); |
| other.hash().CopyToString(&hash_); |
| other.ext().CopyToString(&ext_); |
| other.signature().CopyToString(&signature_); |
| other.experiment().CopyToString(&experiment_); |
| } |
| |
| int ResourceNamer::EventualSize(const Hasher& hasher, |
| int signature_length) const { |
| int eventual_size = name_.size() + id_.size() + ext_.size() + kOverhead + |
| hasher.HashSizeInChars() + signature_length; |
| if (has_experiment()) { |
| // Experiment is one character, plus one for the separator. |
| eventual_size += 2; |
| } else if (has_options()) { |
| GoogleString encoded_options; |
| UrlEscaper::EncodeToUrlSegment(options_, &encoded_options); |
| eventual_size += 1 + encoded_options.size(); // add one for the separator. |
| } |
| return eventual_size; |
| } |
| |
| } // namespace net_instaweb |