src/pagespeed/kernel/util/url_escaper_test.cc - incubator-pagespeed-debian - Git at Google

 /*
  * Copyright 2010 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 // Author: jmarantz@google.com (Joshua Marantz)

 #include <cctype>
 #include <cstddef>
 #include "pagespeed/kernel/util/url_escaper.h"
 #include "pagespeed/kernel/base/gtest.h"
 #include "pagespeed/kernel/base/string.h"
 #include "pagespeed/kernel/base/string_util.h"

 namespace {

 // We pass through a few special characters unchanged, and we
 // accept those characters, plus ',', as acceptable in the encoded
 // URLs.
 static const char kAcceptableSpecialChars[] = ",._+-=";
 static const char kPassThruChars[]          =  "._+-=";

 }  // namespace

 namespace net_instaweb {

 class UrlEscaperTest : public testing::Test {
  protected:
   void CheckEncoding(const StringPiece& url) {
     GoogleString encoded, decoded;
     UrlEscaper::EncodeToUrlSegment(url, &encoded);

     // Make sure there are only alphanumerics and _+-=%
     for (size_t i = 0; i < encoded.size(); ++i) {
       char c = encoded[i];
       EXPECT_TRUE(isalnum(c) || (strchr(kAcceptableSpecialChars, c) != NULL));
     }

     EXPECT_TRUE(UrlEscaper::DecodeFromUrlSegment(encoded, &decoded));
     EXPECT_EQ(url, decoded) << "\n encoded was " << encoded;
   }

   // Some basic text should be completely unchanged upon encode/decode.
   void CheckUnchanged(const StringPiece& url) {
     GoogleString encoded, decoded;
     UrlEscaper::EncodeToUrlSegment(url, &encoded);
     EXPECT_EQ(url, encoded);
     EXPECT_TRUE(UrlEscaper::DecodeFromUrlSegment(encoded, &decoded));
     EXPECT_EQ(url, decoded);
   }

   GoogleString Decode(const StringPiece& encoding) {
     GoogleString decoded;
     EXPECT_TRUE(UrlEscaper::DecodeFromUrlSegment(encoding, &decoded));
     return decoded;
   }

   GoogleString Encode(const StringPiece& url) {
     GoogleString encoded;
     UrlEscaper::EncodeToUrlSegment(url, &encoded);
     return encoded;
   }
 };

 TEST_F(UrlEscaperTest, TestUrls) {
   CheckEncoding("http://www.google.com");
   // Test encoding of % and lack of leading http:// (beware of double encoding):
   CheckEncoding("//web.mit.edu/foo.cgi?bar%baz");
   CheckEncoding("http://x.com/images/hacks.js.pagespeed.jm.GSLMcHP-fl.js");
   CheckEncoding("http://www.foo.bar/z1234/b_c.d?e=f&g=h");
   CheckEncoding("http://china.com/\u591a\u5e74\u7ecf\u5178\u5361\u7247\u673a");
   CheckEncoding("http://中国 汪 世 孟");
   CheckEncoding("/static/f.1.js?v=120");
   CheckEncoding("!@#$%^&*()_+=-[]{}?><,./");
 }

 TEST_F(UrlEscaperTest, TestUnchanged) {
   CheckUnchanged("abcdefghijklmnopqrstuvwxyz");
   CheckUnchanged("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
   CheckUnchanged("0123456789");
   CheckUnchanged("=+-_");
   CheckUnchanged(kPassThruChars);
 }

 TEST_F(UrlEscaperTest, LegacyDecode) {
   EXPECT_EQ("a.css", Decode("a,s"));
   EXPECT_EQ("b.jpg", Decode("b,j"));
   EXPECT_EQ("c.png", Decode("c,p"));
   EXPECT_EQ("d.gif", Decode("d,g"));
   EXPECT_EQ("e.jpeg", Decode("e,k"));
   EXPECT_EQ("f.js", Decode("f,l"));
   EXPECT_EQ("g.anything", Decode("g,oanything"));
   EXPECT_EQ("http://www.myhost.com", Decode(",h,wmyhost,c"));
 }

 TEST_F(UrlEscaperTest, PercentDecoding) {
   // Test the corner case where browser percent-encoded parts of our url.
   EXPECT_EQ("a.css", Decode("%61%2E%63%73%73"));  // Just %-encode whole url
   EXPECT_EQ("a.js+b.js", Decode("a.js%20b.js"));  // '+' re-encoded as %20 (' ')
   EXPECT_EQ("a%20b", Decode("a%2CP20b"));  // %-encoding of ,
   // TODO(jmaessen): The following never seems to happen in practice
   //  (encoding of character following , in comma encoding)
   // EXPECT_EQ("a/b", Decode("a,%2Fb"));  // %-encoding of character after ,
 }

 TEST_F(UrlEscaperTest, TestEncoding) {
   // Special case encoding a common sequence that would be long and
   // ugly to escape char-by-char.  We used to encode more than this
   // (e.g. .com -> ,c) but now that we can allow '.' in encoded names,
   // we favor legibility over compactness and have dropped the encoding
   // of ".com" and others.  However http:// requires three characters
   // to be decoded so we'll encode it in one piece.
   EXPECT_EQ(",h", Encode("http://"));

   // These common characters get special-case encodings.
   EXPECT_EQ(",u", Encode("^"));
   EXPECT_EQ(",P", Encode("%"));
   EXPECT_EQ(",_", Encode("/"));
   EXPECT_EQ(",-", Encode("\\"));
   EXPECT_EQ(",,", Encode(","));
   EXPECT_EQ(",q", Encode("?"));
   EXPECT_EQ(",a", Encode("&"));
   EXPECT_EQ(",M", Encode(".pagespeed."));
   EXPECT_EQ(",hx.com,_images,_hacks.js,Mjm.GSLMcHP-fl.js",
             Encode("http://x.com/images/hacks.js.pagespeed.jm.GSLMcHP-fl.js"));

   // Other characters are simply hexified.
   EXPECT_EQ(",3A", Encode(":"));
 }

 }  // namespace net_instaweb
	/*
	* Copyright 2010 Google Inc.
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	// Author: jmarantz@google.com (Joshua Marantz)

	#include <cctype>
	#include <cstddef>
	#include "pagespeed/kernel/util/url_escaper.h"
	#include "pagespeed/kernel/base/gtest.h"
	#include "pagespeed/kernel/base/string.h"
	#include "pagespeed/kernel/base/string_util.h"

	namespace {

	// We pass through a few special characters unchanged, and we
	// accept those characters, plus ',', as acceptable in the encoded
	// URLs.
	static const char kAcceptableSpecialChars[] = ",._+-=";
	static const char kPassThruChars[] = "._+-=";

	} // namespace

	namespace net_instaweb {

	class UrlEscaperTest : public testing::Test {
	protected:
	void CheckEncoding(const StringPiece& url) {
	GoogleString encoded, decoded;
	UrlEscaper::EncodeToUrlSegment(url, &encoded);

	// Make sure there are only alphanumerics and _+-=%
	for (size_t i = 0; i < encoded.size(); ++i) {
	char c = encoded[i];
	EXPECT_TRUE(isalnum(c) \|\| (strchr(kAcceptableSpecialChars, c) != NULL));
	}

	EXPECT_TRUE(UrlEscaper::DecodeFromUrlSegment(encoded, &decoded));
	EXPECT_EQ(url, decoded) << "\n encoded was " << encoded;
	}

	// Some basic text should be completely unchanged upon encode/decode.
	void CheckUnchanged(const StringPiece& url) {
	GoogleString encoded, decoded;
	UrlEscaper::EncodeToUrlSegment(url, &encoded);
	EXPECT_EQ(url, encoded);
	EXPECT_TRUE(UrlEscaper::DecodeFromUrlSegment(encoded, &decoded));
	EXPECT_EQ(url, decoded);
	}

	GoogleString Decode(const StringPiece& encoding) {
	GoogleString decoded;
	EXPECT_TRUE(UrlEscaper::DecodeFromUrlSegment(encoding, &decoded));
	return decoded;
	}

	GoogleString Encode(const StringPiece& url) {
	GoogleString encoded;
	UrlEscaper::EncodeToUrlSegment(url, &encoded);
	return encoded;
	}
	};

	TEST_F(UrlEscaperTest, TestUrls) {
	CheckEncoding("http://www.google.com");
	// Test encoding of % and lack of leading http:// (beware of double encoding):
	CheckEncoding("//web.mit.edu/foo.cgi?bar%baz");
	CheckEncoding("http://x.com/images/hacks.js.pagespeed.jm.GSLMcHP-fl.js");
	CheckEncoding("http://www.foo.bar/z1234/b_c.d?e=f&g=h");
	CheckEncoding("http://china.com/\u591a\u5e74\u7ecf\u5178\u5361\u7247\u673a");
	CheckEncoding("http://中国汪世孟");
	CheckEncoding("/static/f.1.js?v=120");
	CheckEncoding("!@#$%^&*()_+=-[]{}?><,./");
	}

	TEST_F(UrlEscaperTest, TestUnchanged) {
	CheckUnchanged("abcdefghijklmnopqrstuvwxyz");
	CheckUnchanged("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
	CheckUnchanged("0123456789");
	CheckUnchanged("=+-_");
	CheckUnchanged(kPassThruChars);
	}

	TEST_F(UrlEscaperTest, LegacyDecode) {
	EXPECT_EQ("a.css", Decode("a,s"));
	EXPECT_EQ("b.jpg", Decode("b,j"));
	EXPECT_EQ("c.png", Decode("c,p"));
	EXPECT_EQ("d.gif", Decode("d,g"));
	EXPECT_EQ("e.jpeg", Decode("e,k"));
	EXPECT_EQ("f.js", Decode("f,l"));
	EXPECT_EQ("g.anything", Decode("g,oanything"));
	EXPECT_EQ("http://www.myhost.com", Decode(",h,wmyhost,c"));
	}

	TEST_F(UrlEscaperTest, PercentDecoding) {
	// Test the corner case where browser percent-encoded parts of our url.
	EXPECT_EQ("a.css", Decode("%61%2E%63%73%73")); // Just %-encode whole url
	EXPECT_EQ("a.js+b.js", Decode("a.js%20b.js")); // '+' re-encoded as %20 (' ')
	EXPECT_EQ("a%20b", Decode("a%2CP20b")); // %-encoding of ,
	// TODO(jmaessen): The following never seems to happen in practice
	// (encoding of character following , in comma encoding)
	// EXPECT_EQ("a/b", Decode("a,%2Fb")); // %-encoding of character after ,
	}

	TEST_F(UrlEscaperTest, TestEncoding) {
	// Special case encoding a common sequence that would be long and
	// ugly to escape char-by-char. We used to encode more than this
	// (e.g. .com -> ,c) but now that we can allow '.' in encoded names,
	// we favor legibility over compactness and have dropped the encoding
	// of ".com" and others. However http:// requires three characters
	// to be decoded so we'll encode it in one piece.
	EXPECT_EQ(",h", Encode("http://"));

	// These common characters get special-case encodings.
	EXPECT_EQ(",u", Encode("^"));
	EXPECT_EQ(",P", Encode("%"));
	EXPECT_EQ(",_", Encode("/"));
	EXPECT_EQ(",-", Encode("\\"));
	EXPECT_EQ(",,", Encode(","));
	EXPECT_EQ(",q", Encode("?"));
	EXPECT_EQ(",a", Encode("&"));
	EXPECT_EQ(",M", Encode(".pagespeed."));
	EXPECT_EQ(",hx.com,_images,_hacks.js,Mjm.GSLMcHP-fl.js",
	Encode("http://x.com/images/hacks.js.pagespeed.jm.GSLMcHP-fl.js"));

	// Other characters are simply hexified.
	EXPECT_EQ(",3A", Encode(":"));
	}

	} // namespace net_instaweb