blob: 6d2f871987f053946ab4588982d185062d9520fb [file] [log] [blame]
// Copyright 2013 The Closure Library Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS-IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/**
* @fileoverview The SafeUrl type and its builders.
*
* TODO(user): Link to document stating type contract.
*/
goog.provide('goog.html.SafeUrl');
goog.require('goog.asserts');
goog.require('goog.fs.url');
goog.require('goog.i18n.bidi.Dir');
goog.require('goog.i18n.bidi.DirectionalString');
goog.require('goog.string.Const');
goog.require('goog.string.TypedString');
/**
* A string that is safe to use in URL context in DOM APIs and HTML documents.
*
* A SafeUrl is a string-like object that carries the security type contract
* that its value as a string will not cause untrusted script execution
* when evaluated as a hyperlink URL in a browser.
*
* Values of this type are guaranteed to be safe to use in URL/hyperlink
* contexts, such as, assignment to URL-valued DOM properties, or
* interpolation into a HTML template in URL context (e.g., inside a href
* attribute), in the sense that the use will not result in a
* Cross-Site-Scripting vulnerability.
*
* Note that, as documented in {@code goog.html.SafeUrl.unwrap}, this type's
* contract does not guarantee that instances are safe to interpolate into HTML
* without appropriate escaping.
*
* Note also that this type's contract does not imply any guarantees regarding
* the resource the URL refers to. In particular, SafeUrls are <b>not</b>
* safe to use in a context where the referred-to resource is interpreted as
* trusted code, e.g., as the src of a script tag.
*
* Instances of this type must be created via the factory methods
* ({@code goog.html.SafeUrl.fromConstant}, {@code goog.html.SafeUrl.sanitize}),
* etc and not by invoking its constructor. The constructor intentionally
* takes no parameters and the type is immutable; hence only a default instance
* corresponding to the empty string can be obtained via constructor invocation.
*
* @see goog.html.SafeUrl#fromConstant
* @see goog.html.SafeUrl#from
* @see goog.html.SafeUrl#sanitize
* @constructor
* @final
* @struct
* @implements {goog.i18n.bidi.DirectionalString}
* @implements {goog.string.TypedString}
*/
goog.html.SafeUrl = function() {
/**
* The contained value of this SafeUrl. The field has a purposely ugly
* name to make (non-compiled) code that attempts to directly access this
* field stand out.
* @private {string}
*/
this.privateDoNotAccessOrElseSafeHtmlWrappedValue_ = '';
/**
* A type marker used to implement additional run-time type checking.
* @see goog.html.SafeUrl#unwrap
* @const
* @private
*/
this.SAFE_URL_TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_ =
goog.html.SafeUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_;
};
/**
* The innocuous string generated by goog.html.SafeUrl.sanitize when passed
* an unsafe URL.
*
* about:invalid is registered in
* http://www.w3.org/TR/css3-values/#about-invalid.
* http://tools.ietf.org/html/rfc6694#section-2.2.1 permits about URLs to
* contain a fragment, which is not to be considered when determining if an
* about URL is well-known.
*
* Using about:invalid seems preferable to using a fixed data URL, since
* browsers might choose to not report CSP violations on it, as legitimate
* CSS function calls to attr() can result in this URL being produced. It is
* also a standard URL which matches exactly the semantics we need:
* "The about:invalid URI references a non-existent document with a generic
* error condition. It can be used when a URI is necessary, but the default
* value shouldn't be resolveable as any type of document".
*
* @const {string}
*/
goog.html.SafeUrl.INNOCUOUS_STRING = 'about:invalid#zClosurez';
/**
* @override
* @const
*/
goog.html.SafeUrl.prototype.implementsGoogStringTypedString = true;
/**
* Returns this SafeUrl's value a string.
*
* IMPORTANT: In code where it is security relevant that an object's type is
* indeed {@code SafeUrl}, use {@code goog.html.SafeUrl.unwrap} instead of this
* method. If in doubt, assume that it's security relevant. In particular, note
* that goog.html functions which return a goog.html type do not guarantee that
* the returned instance is of the right type. For example:
*
* <pre>
* var fakeSafeHtml = new String('fake');
* fakeSafeHtml.__proto__ = goog.html.SafeHtml.prototype;
* var newSafeHtml = goog.html.SafeHtml.htmlEscape(fakeSafeHtml);
* // newSafeHtml is just an alias for fakeSafeHtml, it's passed through by
* // goog.html.SafeHtml.htmlEscape() as fakeSafeHtml instanceof
* // goog.html.SafeHtml.
* </pre>
*
* IMPORTANT: The guarantees of the SafeUrl type contract only extend to the
* behavior of browsers when interpreting URLs. Values of SafeUrl objects MUST
* be appropriately escaped before embedding in a HTML document. Note that the
* required escaping is context-sensitive (e.g. a different escaping is
* required for embedding a URL in a style property within a style
* attribute, as opposed to embedding in a href attribute).
*
* @see goog.html.SafeUrl#unwrap
* @override
*/
goog.html.SafeUrl.prototype.getTypedStringValue = function() {
return this.privateDoNotAccessOrElseSafeHtmlWrappedValue_;
};
/**
* @override
* @const
*/
goog.html.SafeUrl.prototype.implementsGoogI18nBidiDirectionalString = true;
/**
* Returns this URLs directionality, which is always {@code LTR}.
* @override
*/
goog.html.SafeUrl.prototype.getDirection = function() {
return goog.i18n.bidi.Dir.LTR;
};
if (goog.DEBUG) {
/**
* Returns a debug string-representation of this value.
*
* To obtain the actual string value wrapped in a SafeUrl, use
* {@code goog.html.SafeUrl.unwrap}.
*
* @see goog.html.SafeUrl#unwrap
* @override
*/
goog.html.SafeUrl.prototype.toString = function() {
return 'SafeUrl{' + this.privateDoNotAccessOrElseSafeHtmlWrappedValue_ +
'}';
};
}
/**
* Performs a runtime check that the provided object is indeed a SafeUrl
* object, and returns its value.
*
* IMPORTANT: The guarantees of the SafeUrl type contract only extend to the
* behavior of browsers when interpreting URLs. Values of SafeUrl objects MUST
* be appropriately escaped before embedding in a HTML document. Note that the
* required escaping is context-sensitive (e.g. a different escaping is
* required for embedding a URL in a style property within a style
* attribute, as opposed to embedding in a href attribute).
*
* Note that the returned value does not necessarily correspond to the string
* with which the SafeUrl was constructed, since goog.html.SafeUrl.sanitize
* will percent-encode many characters.
*
* @param {!goog.html.SafeUrl} safeUrl The object to extract from.
* @return {string} The SafeUrl object's contained string, unless the run-time
* type check fails. In that case, {@code unwrap} returns an innocuous
* string, or, if assertions are enabled, throws
* {@code goog.asserts.AssertionError}.
*/
goog.html.SafeUrl.unwrap = function(safeUrl) {
// Perform additional Run-time type-checking to ensure that safeUrl is indeed
// an instance of the expected type. This provides some additional protection
// against security bugs due to application code that disables type checks.
// Specifically, the following checks are performed:
// 1. The object is an instance of the expected type.
// 2. The object is not an instance of a subclass.
// 3. The object carries a type marker for the expected type. "Faking" an
// object requires a reference to the type marker, which has names intended
// to stand out in code reviews.
if (safeUrl instanceof goog.html.SafeUrl &&
safeUrl.constructor === goog.html.SafeUrl &&
safeUrl.SAFE_URL_TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_ ===
goog.html.SafeUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_) {
return safeUrl.privateDoNotAccessOrElseSafeHtmlWrappedValue_;
} else {
goog.asserts.fail('expected object of type SafeUrl, got \'' +
safeUrl + '\'');
return 'type_error:SafeUrl';
}
};
/**
* Creates a SafeUrl object from a compile-time constant string.
*
* Compile-time constant strings are inherently program-controlled and hence
* trusted.
*
* @param {!goog.string.Const} url A compile-time-constant string from which to
* create a SafeUrl.
* @return {!goog.html.SafeUrl} A SafeUrl object initialized to {@code url}.
*/
goog.html.SafeUrl.fromConstant = function(url) {
return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(
goog.string.Const.unwrap(url));
};
/**
* A pattern that matches Blob types that can have SafeUrls created from
* URL.createObjectURL(blob). Only matches image types, currently.
* @const
* @private
*/
goog.html.SAFE_BLOB_TYPE_PATTERN_ =
/^image\/(?:bmp|gif|jpeg|jpg|png|tiff|webp)$/i;
/**
* Creates a SafeUrl wrapping a blob URL for the given {@code blob}. The
* blob URL is created with {@code URL.createObjectURL}. If the MIME type
* for {@code blob} is not of a known safe image MIME type, then the
* SafeUrl will wrap {@link #INNOCUOUS_STRING}.
* @see http://www.w3.org/TR/FileAPI/#url
* @param {!Blob} blob
* @return {!goog.html.SafeUrl} The blob URL, or an innocuous string wrapped
* as a SafeUrl.
*/
goog.html.SafeUrl.fromBlob = function(blob) {
var url = goog.html.SAFE_BLOB_TYPE_PATTERN_.test(blob.type) ?
goog.fs.url.createObjectUrl(blob) : goog.html.SafeUrl.INNOCUOUS_STRING;
return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(url);
};
/**
* A pattern that recognizes a commonly useful subset of URLs that satisfy
* the SafeUrl contract.
*
* This regular expression matches a subset of URLs that will not cause script
* execution if used in URL context within a HTML document. Specifically, this
* regular expression matches if (comment from here on and regex copied from
* Soy's EscapingConventions):
* (1) Either a protocol in a whitelist (http, https, mailto or ftp).
* (2) or no protocol. A protocol must be followed by a colon. The below
* allows that by allowing colons only after one of the characters [/?#].
* A colon after a hash (#) must be in the fragment.
* Otherwise, a colon after a (?) must be in a query.
* Otherwise, a colon after a single solidus (/) must be in a path.
* Otherwise, a colon after a double solidus (//) must be in the authority
* (before port).
*
* The pattern disallows &, used in HTML entity declarations before
* one of the characters in [/?#]. This disallows HTML entities used in the
* protocol name, which should never happen, e.g. "h&#116;tp" for "http".
* It also disallows HTML entities in the first path part of a relative path,
* e.g. "foo&lt;bar/baz". Our existing escaping functions should not produce
* that. More importantly, it disallows masking of a colon,
* e.g. "javascript&#58;...".
*
* @private
* @const {!RegExp}
*/
goog.html.SAFE_URL_PATTERN_ =
/^(?:(?:https?|mailto|ftp):|[^&:/?#]*(?:[/?#]|$))/i;
/**
* Creates a SafeUrl object from {@code url}. If {@code url} is a
* goog.html.SafeUrl then it is simply returned. Otherwise the input string is
* validated to match a pattern of commonly used safe URLs. The string is
* converted to UTF-8 and non-whitelisted characters are percent-encoded. The
* string wrapped by the created SafeUrl will thus contain only ASCII printable
* characters.
*
* {@code url} may be a URL with the http, https, mailto or ftp scheme,
* or a relative URL (i.e., a URL without a scheme; specifically, a
* scheme-relative, absolute-path-relative, or path-relative URL).
*
* {@code url} is converted to UTF-8 and non-whitelisted characters are
* percent-encoded. Whitelisted characters are '%' and, from RFC 3986,
* unreserved characters and reserved characters, with the exception of '\'',
* '(' and ')'. This ensures the the SafeUrl contains only ASCII-printable
* characters and reduces the chance of security bugs were it to be
* interpolated into a specific context without the necessary escaping.
*
* If {@code url} fails validation or does not UTF-16 decode correctly
* (JavaScript strings are UTF-16 encoded), this function returns a SafeUrl
* object containing an innocuous string, goog.html.SafeUrl.INNOCUOUS_STRING.
*
* @see http://url.spec.whatwg.org/#concept-relative-url
* @param {string|!goog.string.TypedString} url The URL to validate.
* @return {!goog.html.SafeUrl} The validated URL, wrapped as a SafeUrl.
*/
goog.html.SafeUrl.sanitize = function(url) {
if (url instanceof goog.html.SafeUrl) {
return url;
}
else if (url.implementsGoogStringTypedString) {
url = url.getTypedStringValue();
} else {
url = String(url);
}
if (!goog.html.SAFE_URL_PATTERN_.test(url)) {
url = goog.html.SafeUrl.INNOCUOUS_STRING;
} else {
url = goog.html.SafeUrl.normalize_(url);
}
return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(url);
};
/**
* Normalizes {@code url} the UTF-8 encoding of url, using a whitelist of
* characters. Whitelisted characters are not percent-encoded.
* @param {string} url The URL to normalize.
* @return {string} The normalized URL.
* @private
*/
goog.html.SafeUrl.normalize_ = function(url) {
try {
var normalized = encodeURI(url);
} catch (e) { // Happens if url contains invalid surrogate sequences.
return goog.html.SafeUrl.INNOCUOUS_STRING;
}
return normalized.replace(
goog.html.SafeUrl.NORMALIZE_MATCHER_,
function(match) {
return goog.html.SafeUrl.NORMALIZE_REPLACER_MAP_[match];
});
};
/**
* Matches characters and strings which need to be replaced in the string
* generated by encodeURI. Specifically:
*
* - '\'', '(' and ')' are not encoded. They are part of the reserved
* characters group in RFC 3986 but only appear in the obsolete mark
* production in Appendix D.2 of RFC 3986, so they can be encoded without
* changing semantics.
* - '[' and ']' are encoded by encodeURI, despite being reserved characters
* which can be used to represent IPv6 addresses. So they need to be decoded.
* - '%' is encoded by encodeURI. However, encoding '%' characters that are
* already part of a valid percent-encoded sequence changes the semantics of a
* URL, and hence we need to preserve them. Note that this may allow
* non-encoded '%' characters to remain in the URL (i.e., occurrences of '%'
* that are not part of a valid percent-encoded sequence, for example,
* 'ab%xy').
*
* @const {!RegExp}
* @private
*/
goog.html.SafeUrl.NORMALIZE_MATCHER_ = /[()']|%5B|%5D|%25/g;
/**
* Map of replacements to be done in string generated by encodeURI.
* @const {!Object<string, string>}
* @private
*/
goog.html.SafeUrl.NORMALIZE_REPLACER_MAP_ = {
'\'': '%27',
'(': '%28',
')': '%29',
'%5B': '[',
'%5D': ']',
'%25': '%'
};
/**
* Type marker for the SafeUrl type, used to implement additional run-time
* type checking.
* @const
* @private
*/
goog.html.SafeUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_ = {};
/**
* Package-internal utility method to create SafeUrl instances.
*
* @param {string} url The string to initialize the SafeUrl object with.
* @return {!goog.html.SafeUrl} The initialized SafeUrl object.
* @package
*/
goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse = function(
url) {
var safeUrl = new goog.html.SafeUrl();
safeUrl.privateDoNotAccessOrElseSafeHtmlWrappedValue_ = url;
return safeUrl;
};