blob: 7b79dd3b8f7fb087e0ed149ff30687cbebbf5081 [file] [log] [blame]
# = Public Suffix
#
# Domain name parser based on the Public Suffix List.
#
# Copyright (c) 2009-2017 Simone Carletti <weppos@weppos.net>
require_relative "public_suffix/domain"
require_relative "public_suffix/version"
require_relative "public_suffix/errors"
require_relative "public_suffix/rule"
require_relative "public_suffix/list"
# PublicSuffix is a Ruby domain name parser based on the Public Suffix List.
#
# The [Public Suffix List](https://publicsuffix.org) is a cross-vendor initiative
# to provide an accurate list of domain name suffixes.
#
# The Public Suffix List is an initiative of the Mozilla Project,
# but is maintained as a community resource. It is available for use in any software,
# but was originally created to meet the needs of browser manufacturers.
module PublicSuffix
DOT = ".".freeze
BANG = "!".freeze
STAR = "*".freeze
# Parses +name+ and returns the {PublicSuffix::Domain} instance.
#
# @example Parse a valid domain
# PublicSuffix.parse("google.com")
# # => #<PublicSuffix::Domain:0x007fec2e51e588 @sld="google", @tld="com", @trd=nil>
#
# @example Parse a valid subdomain
# PublicSuffix.parse("www.google.com")
# # => #<PublicSuffix::Domain:0x007fec276d4cf8 @sld="google", @tld="com", @trd="www">
#
# @example Parse a fully qualified domain
# PublicSuffix.parse("google.com.")
# # => #<PublicSuffix::Domain:0x007fec257caf38 @sld="google", @tld="com", @trd=nil>
#
# @example Parse a fully qualified domain (subdomain)
# PublicSuffix.parse("www.google.com.")
# # => #<PublicSuffix::Domain:0x007fec27b6bca8 @sld="google", @tld="com", @trd="www">
#
# @example Parse an invalid (unlisted) domain
# PublicSuffix.parse("x.yz")
# # => #<PublicSuffix::Domain:0x007fec2f49bec0 @sld="x", @tld="yz", @trd=nil>
#
# @example Parse an invalid (unlisted) domain with strict checking (without applying the default * rule)
# PublicSuffix.parse("x.yz", default_rule: nil)
# # => PublicSuffix::DomainInvalid: `x.yz` is not a valid domain
#
# @example Parse an URL (not supported, only domains)
# PublicSuffix.parse("http://www.google.com")
# # => PublicSuffix::DomainInvalid: http://www.google.com is not expected to contain a scheme
#
#
# @param [String, #to_s] name The domain name or fully qualified domain name to parse.
# @param [PublicSuffix::List] list The rule list to search, defaults to the default {PublicSuffix::List}
# @param [Boolean] ignore_private
# @return [PublicSuffix::Domain]
#
# @raise [PublicSuffix::DomainInvalid]
# If domain is not a valid domain.
# @raise [PublicSuffix::DomainNotAllowed]
# If a rule for +domain+ is found, but the rule doesn't allow +domain+.
def self.parse(name, list: List.default, default_rule: list.default_rule, ignore_private: false)
what = normalize(name)
raise what if what.is_a?(DomainInvalid)
rule = list.find(what, default: default_rule, ignore_private: ignore_private)
# rubocop:disable Style/IfUnlessModifier
if rule.nil?
raise DomainInvalid, "`#{what}` is not a valid domain"
end
if rule.decompose(what).last.nil?
raise DomainNotAllowed, "`#{what}` is not allowed according to Registry policy"
end
# rubocop:enable Style/IfUnlessModifier
decompose(what, rule)
end
# Checks whether +domain+ is assigned and allowed, without actually parsing it.
#
# This method doesn't care whether domain is a domain or subdomain.
# The validation is performed using the default {PublicSuffix::List}.
#
# @example Validate a valid domain
# PublicSuffix.valid?("example.com")
# # => true
#
# @example Validate a valid subdomain
# PublicSuffix.valid?("www.example.com")
# # => true
#
# @example Validate a not-listed domain
# PublicSuffix.valid?("example.tldnotlisted")
# # => true
#
# @example Validate a not-listed domain with strict checking (without applying the default * rule)
# PublicSuffix.valid?("example.tldnotlisted")
# # => true
# PublicSuffix.valid?("example.tldnotlisted", default_rule: nil)
# # => false
#
# @example Validate a fully qualified domain
# PublicSuffix.valid?("google.com.")
# # => true
# PublicSuffix.valid?("www.google.com.")
# # => true
#
# @example Check an URL (which is not a valid domain)
# PublicSuffix.valid?("http://www.example.com")
# # => false
#
#
# @param [String, #to_s] name The domain name or fully qualified domain name to validate.
# @param [Boolean] ignore_private
# @return [Boolean]
def self.valid?(name, list: List.default, default_rule: list.default_rule, ignore_private: false)
what = normalize(name)
return false if what.is_a?(DomainInvalid)
rule = list.find(what, default: default_rule, ignore_private: ignore_private)
!rule.nil? && !rule.decompose(what).last.nil?
end
# Attempt to parse the name and returns the domain, if valid.
#
# This method doesn't raise. Instead, it returns nil if the domain is not valid for whatever reason.
#
# @param [String, #to_s] name The domain name or fully qualified domain name to parse.
# @param [PublicSuffix::List] list The rule list to search, defaults to the default {PublicSuffix::List}
# @param [Boolean] ignore_private
# @return [String]
def self.domain(name, **options)
parse(name, **options).domain
rescue PublicSuffix::Error
nil
end
# private
def self.decompose(name, rule)
left, right = rule.decompose(name)
parts = left.split(DOT)
# If we have 0 parts left, there is just a tld and no domain or subdomain
# If we have 1 part left, there is just a tld, domain and not subdomain
# If we have 2 parts left, the last part is the domain, the other parts (combined) are the subdomain
tld = right
sld = parts.empty? ? nil : parts.pop
trd = parts.empty? ? nil : parts.join(DOT)
Domain.new(tld, sld, trd)
end
# Pretend we know how to deal with user input.
def self.normalize(name)
name = name.to_s.dup
name.strip!
name.chomp!(DOT)
name.downcase!
return DomainInvalid.new("Name is blank") if name.empty?
return DomainInvalid.new("Name starts with a dot") if name.start_with?(DOT)
return DomainInvalid.new("%s is not expected to contain a scheme" % name) if name.include?("://")
name
end
end