blob: 44525c616ecd79c92c24099a20547c7af4a7514f [file] [log] [blame]
/**
* Copyright 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: jmarantz@google.com (Joshua Marantz)
#ifndef NET_INSTAWEB_UTIL_PUBLIC_URL_ESCAPER_H_
#define NET_INSTAWEB_UTIL_PUBLIC_URL_ESCAPER_H_
#include "net/instaweb/util/public/url_segment_encoder.h"
#include <string>
#include "net/instaweb/util/public/string_util.h"
namespace net_instaweb {
/*
common url format
http://www.foo.bar/z1234/b_c.d?e=f&g=h
this suggests we should have short encodings for a-zA-Z0-9.:/?&
We would like the above URL to be reasonably legible if possible.
However it's also nice if it's short.
One annoyance is that we are using '.' to delimit the 4 fields of an
instaweb-generated URL. That can probably be changed to use ^, which
is a legal URL but is rarely used in URLs. This would enable us to
leave . alone. But that's probably a moderately painful change
involving a fair amount of regolding.
In the meantime we can replace . with ^ in this encoder so the they
don't change size. So the transform table is:
a-zA-Z0-9_=+-&? unchanged
% %%
/ %_
\ %-
http:// %h
.com %c
.css %s
.edu %e
.gif %g
.html %t
.jpeg %k
.jpg %j
.js %l
.net %n
.png %p
www. %w
. ^
^ %^
everything else %XX where xx are hex digits using capital latters
The intent of this class to to help encode arbitrary URLs (really, any
stream of 8-byte characters, but optimized for URLs) so that it can be
used in one 'segment' of a new URL. This means we will not output
. or / but will instead escape those.
*/
class UrlEscaper : public UrlSegmentEncoder {
public:
virtual ~UrlEscaper();
virtual void EncodeToUrlSegment(
const StringPiece& in, std::string* url_segment);
virtual bool DecodeFromUrlSegment(
const StringPiece& url_segment, std::string* out);
};
} // namespace net_instaweb
#endif // NET_INSTAWEB_UTIL_PUBLIC_URL_ESCAPER_H_