blob: f4aa181467e0882f2289fb2f639beaa22497e810 [file] [log] [blame]
'use strict';
var url = require('url');
var punycode = require('punycode');
var queryString = require('query-string');
var prependHttp = require('prepend-http');
var sortKeys = require('sort-keys');
var objectAssign = require('object-assign');
var DEFAULT_PORTS = {
'http:': 80,
'https:': 443,
'ftp:': 21
};
// protocols that always contain a `//`` bit
var slashedProtocol = {
'http': true,
'https': true,
'ftp': true,
'gopher': true,
'file': true,
'http:': true,
'https:': true,
'ftp:': true,
'gopher:': true,
'file:': true
};
function testParameter(name, filters) {
return filters.some(function (filter) {
return filter instanceof RegExp ? filter.test(name) : filter === name;
});
}
module.exports = function (str, opts) {
opts = objectAssign({
normalizeProtocol: true,
normalizeHttps: false,
stripFragment: true,
stripWWW: true,
removeQueryParameters: [/^utm_\w+/i],
removeTrailingSlash: true,
removeDirectoryIndex: false
}, opts);
if (typeof str !== 'string') {
throw new TypeError('Expected a string');
}
var hasRelativeProtocol = str.indexOf('//') === 0;
// prepend protocol
str = prependHttp(str.trim()).replace(/^\/\//, 'http://');
var urlObj = url.parse(str);
if (opts.normalizeHttps && urlObj.protocol === 'https:') {
urlObj.protocol = 'http:';
}
if (!urlObj.hostname && !urlObj.pathname) {
throw new Error('Invalid URL');
}
// prevent these from being used by `url.format`
delete urlObj.host;
delete urlObj.query;
// remove fragment
if (opts.stripFragment) {
delete urlObj.hash;
}
// remove default port
var port = DEFAULT_PORTS[urlObj.protocol];
if (Number(urlObj.port) === port) {
delete urlObj.port;
}
// remove duplicate slashes
if (urlObj.pathname) {
urlObj.pathname = urlObj.pathname.replace(/\/{2,}/g, '/');
}
// decode URI octets
if (urlObj.pathname) {
urlObj.pathname = decodeURI(urlObj.pathname);
}
// remove directory index
if (opts.removeDirectoryIndex === true) {
opts.removeDirectoryIndex = [/^index\.[a-z]+$/];
}
if (Array.isArray(opts.removeDirectoryIndex) && opts.removeDirectoryIndex.length) {
var pathComponents = urlObj.pathname.split('/');
var lastComponent = pathComponents[pathComponents.length - 1];
if (testParameter(lastComponent, opts.removeDirectoryIndex)) {
pathComponents = pathComponents.slice(0, pathComponents.length - 1);
urlObj.pathname = pathComponents.slice(1).join('/') + '/';
}
}
// resolve relative paths, but only for slashed protocols
if (slashedProtocol[urlObj.protocol]) {
var domain = urlObj.protocol + '//' + urlObj.hostname;
var relative = url.resolve(domain, urlObj.pathname);
urlObj.pathname = relative.replace(domain, '');
}
if (urlObj.hostname) {
// IDN to Unicode
urlObj.hostname = punycode.toUnicode(urlObj.hostname).toLowerCase();
// remove trailing dot
urlObj.hostname = urlObj.hostname.replace(/\.$/, '');
// remove `www.`
if (opts.stripWWW) {
urlObj.hostname = urlObj.hostname.replace(/^www\./, '');
}
}
// remove URL with empty query string
if (urlObj.search === '?') {
delete urlObj.search;
}
var queryParameters = queryString.parse(urlObj.search);
// remove query unwanted parameters
if (Array.isArray(opts.removeQueryParameters)) {
for (var key in queryParameters) {
if (testParameter(key, opts.removeQueryParameters)) {
delete queryParameters[key];
}
}
}
// sort query parameters
urlObj.search = queryString.stringify(sortKeys(queryParameters));
// decode query parameters
urlObj.search = decodeURIComponent(urlObj.search);
// take advantage of many of the Node `url` normalizations
str = url.format(urlObj);
// remove ending `/`
if (opts.removeTrailingSlash || urlObj.pathname === '/') {
str = str.replace(/\/$/, '');
}
// restore relative protocol, if applicable
if (hasRelativeProtocol && !opts.normalizeProtocol) {
str = str.replace(/^http:\/\//, '//');
}
return str;
};