blob: ce701085855970b650b704b9224021144e63d0d9 [file] [log] [blame]
'use strict';
// TODO: Use the `URL` global when targeting Node.js 10
const URLParser = typeof URL === 'undefined' ? require('url').URL : URL;
const testParameter = (name, filters) => {
return filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name);
};
module.exports = (urlString, opts) => {
opts = Object.assign({
defaultProtocol: 'http:',
normalizeProtocol: true,
forceHttp: false,
forceHttps: false,
stripHash: true,
stripWWW: true,
removeQueryParameters: [/^utm_\w+/i],
removeTrailingSlash: true,
removeDirectoryIndex: false,
sortQueryParameters: true
}, opts);
// Backwards compatibility
if (Reflect.has(opts, 'normalizeHttps')) {
opts.forceHttp = opts.normalizeHttps;
}
if (Reflect.has(opts, 'normalizeHttp')) {
opts.forceHttps = opts.normalizeHttp;
}
if (Reflect.has(opts, 'stripFragment')) {
opts.stripHash = opts.stripFragment;
}
urlString = urlString.trim();
const hasRelativeProtocol = urlString.startsWith('//');
const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString);
// Prepend protocol
if (!isRelativeUrl) {
urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, opts.defaultProtocol);
}
const urlObj = new URLParser(urlString);
if (opts.forceHttp && opts.forceHttps) {
throw new Error('The `forceHttp` and `forceHttps` options cannot be used together');
}
if (opts.forceHttp && urlObj.protocol === 'https:') {
urlObj.protocol = 'http:';
}
if (opts.forceHttps && urlObj.protocol === 'http:') {
urlObj.protocol = 'https:';
}
// Remove hash
if (opts.stripHash) {
urlObj.hash = '';
}
// Remove duplicate slashes if not preceded by a protocol
if (urlObj.pathname) {
// TODO: Use the following instead when targeting Node.js 10
// `urlObj.pathname = urlObj.pathname.replace(/(?<!https?:)\/{2,}/g, '/');`
urlObj.pathname = urlObj.pathname.replace(/((?![https?:]).)\/{2,}/g, (_, p1) => {
if (/^(?!\/)/g.test(p1)) {
return `${p1}/`;
}
return '/';
});
}
// Decode URI octets
if (urlObj.pathname) {
urlObj.pathname = decodeURI(urlObj.pathname);
}
// Remove directory index
if (opts.removeDirectoryIndex === true) {
opts.removeDirectoryIndex = [/^index\.[a-z]+$/];
}
if (Array.isArray(opts.removeDirectoryIndex) && opts.removeDirectoryIndex.length > 0) {
let pathComponents = urlObj.pathname.split('/');
const lastComponent = pathComponents[pathComponents.length - 1];
if (testParameter(lastComponent, opts.removeDirectoryIndex)) {
pathComponents = pathComponents.slice(0, pathComponents.length - 1);
urlObj.pathname = pathComponents.slice(1).join('/') + '/';
}
}
if (urlObj.hostname) {
// Remove trailing dot
urlObj.hostname = urlObj.hostname.replace(/\.$/, '');
// Remove `www.`
// eslint-disable-next-line no-useless-escape
if (opts.stripWWW && /^www\.([a-z\-\d]{2,63})\.([a-z\.]{2,5})$/.test(urlObj.hostname)) {
// Each label should be max 63 at length (min: 2).
// The extension should be max 5 at length (min: 2).
// Source: https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names
urlObj.hostname = urlObj.hostname.replace(/^www\./, '');
}
}
// Remove query unwanted parameters
if (Array.isArray(opts.removeQueryParameters)) {
for (const key of [...urlObj.searchParams.keys()]) {
if (testParameter(key, opts.removeQueryParameters)) {
urlObj.searchParams.delete(key);
}
}
}
// Sort query parameters
if (opts.sortQueryParameters) {
urlObj.searchParams.sort();
}
// Take advantage of many of the Node `url` normalizations
urlString = urlObj.toString();
// Remove ending `/`
if (opts.removeTrailingSlash || urlObj.pathname === '/') {
urlString = urlString.replace(/\/$/, '');
}
// Restore relative protocol, if applicable
if (hasRelativeProtocol && !opts.normalizeProtocol) {
urlString = urlString.replace(/^http:\/\//, '//');
}
return urlString;
};