180 lines
6.2 KiB
Ruby
180 lines
6.2 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
# = Public Suffix
|
|
#
|
|
# Domain name parser based on the Public Suffix List.
|
|
#
|
|
# Copyright (c) 2009-2019 Simone Carletti <weppos@weppos.net>
|
|
|
|
require_relative "public_suffix/domain"
|
|
require_relative "public_suffix/version"
|
|
require_relative "public_suffix/errors"
|
|
require_relative "public_suffix/rule"
|
|
require_relative "public_suffix/list"
|
|
|
|
# PublicSuffix is a Ruby domain name parser based on the Public Suffix List.
|
|
#
|
|
# The [Public Suffix List](https://publicsuffix.org) is a cross-vendor initiative
|
|
# to provide an accurate list of domain name suffixes.
|
|
#
|
|
# The Public Suffix List is an initiative of the Mozilla Project,
|
|
# but is maintained as a community resource. It is available for use in any software,
|
|
# but was originally created to meet the needs of browser manufacturers.
|
|
module PublicSuffix
|
|
|
|
DOT = "."
|
|
BANG = "!"
|
|
STAR = "*"
|
|
|
|
# Parses +name+ and returns the {PublicSuffix::Domain} instance.
|
|
#
|
|
# @example Parse a valid domain
|
|
# PublicSuffix.parse("google.com")
|
|
# # => #<PublicSuffix::Domain:0x007fec2e51e588 @sld="google", @tld="com", @trd=nil>
|
|
#
|
|
# @example Parse a valid subdomain
|
|
# PublicSuffix.parse("www.google.com")
|
|
# # => #<PublicSuffix::Domain:0x007fec276d4cf8 @sld="google", @tld="com", @trd="www">
|
|
#
|
|
# @example Parse a fully qualified domain
|
|
# PublicSuffix.parse("google.com.")
|
|
# # => #<PublicSuffix::Domain:0x007fec257caf38 @sld="google", @tld="com", @trd=nil>
|
|
#
|
|
# @example Parse a fully qualified domain (subdomain)
|
|
# PublicSuffix.parse("www.google.com.")
|
|
# # => #<PublicSuffix::Domain:0x007fec27b6bca8 @sld="google", @tld="com", @trd="www">
|
|
#
|
|
# @example Parse an invalid (unlisted) domain
|
|
# PublicSuffix.parse("x.yz")
|
|
# # => #<PublicSuffix::Domain:0x007fec2f49bec0 @sld="x", @tld="yz", @trd=nil>
|
|
#
|
|
# @example Parse an invalid (unlisted) domain with strict checking (without applying the default * rule)
|
|
# PublicSuffix.parse("x.yz", default_rule: nil)
|
|
# # => PublicSuffix::DomainInvalid: `x.yz` is not a valid domain
|
|
#
|
|
# @example Parse an URL (not supported, only domains)
|
|
# PublicSuffix.parse("http://www.google.com")
|
|
# # => PublicSuffix::DomainInvalid: http://www.google.com is not expected to contain a scheme
|
|
#
|
|
#
|
|
# @param [String, #to_s] name The domain name or fully qualified domain name to parse.
|
|
# @param [PublicSuffix::List] list The rule list to search, defaults to the default {PublicSuffix::List}
|
|
# @param [Boolean] ignore_private
|
|
# @return [PublicSuffix::Domain]
|
|
#
|
|
# @raise [PublicSuffix::DomainInvalid]
|
|
# If domain is not a valid domain.
|
|
# @raise [PublicSuffix::DomainNotAllowed]
|
|
# If a rule for +domain+ is found, but the rule doesn't allow +domain+.
|
|
def self.parse(name, list: List.default, default_rule: list.default_rule, ignore_private: false)
|
|
what = normalize(name)
|
|
raise what if what.is_a?(DomainInvalid)
|
|
|
|
rule = list.find(what, default: default_rule, ignore_private: ignore_private)
|
|
|
|
# rubocop:disable Style/IfUnlessModifier
|
|
if rule.nil?
|
|
raise DomainInvalid, "`#{what}` is not a valid domain"
|
|
end
|
|
if rule.decompose(what).last.nil?
|
|
raise DomainNotAllowed, "`#{what}` is not allowed according to Registry policy"
|
|
end
|
|
|
|
# rubocop:enable Style/IfUnlessModifier
|
|
|
|
decompose(what, rule)
|
|
end
|
|
|
|
# Checks whether +domain+ is assigned and allowed, without actually parsing it.
|
|
#
|
|
# This method doesn't care whether domain is a domain or subdomain.
|
|
# The validation is performed using the default {PublicSuffix::List}.
|
|
#
|
|
# @example Validate a valid domain
|
|
# PublicSuffix.valid?("example.com")
|
|
# # => true
|
|
#
|
|
# @example Validate a valid subdomain
|
|
# PublicSuffix.valid?("www.example.com")
|
|
# # => true
|
|
#
|
|
# @example Validate a not-listed domain
|
|
# PublicSuffix.valid?("example.tldnotlisted")
|
|
# # => true
|
|
#
|
|
# @example Validate a not-listed domain with strict checking (without applying the default * rule)
|
|
# PublicSuffix.valid?("example.tldnotlisted")
|
|
# # => true
|
|
# PublicSuffix.valid?("example.tldnotlisted", default_rule: nil)
|
|
# # => false
|
|
#
|
|
# @example Validate a fully qualified domain
|
|
# PublicSuffix.valid?("google.com.")
|
|
# # => true
|
|
# PublicSuffix.valid?("www.google.com.")
|
|
# # => true
|
|
#
|
|
# @example Check an URL (which is not a valid domain)
|
|
# PublicSuffix.valid?("http://www.example.com")
|
|
# # => false
|
|
#
|
|
#
|
|
# @param [String, #to_s] name The domain name or fully qualified domain name to validate.
|
|
# @param [Boolean] ignore_private
|
|
# @return [Boolean]
|
|
def self.valid?(name, list: List.default, default_rule: list.default_rule, ignore_private: false)
|
|
what = normalize(name)
|
|
return false if what.is_a?(DomainInvalid)
|
|
|
|
rule = list.find(what, default: default_rule, ignore_private: ignore_private)
|
|
|
|
!rule.nil? && !rule.decompose(what).last.nil?
|
|
end
|
|
|
|
# Attempt to parse the name and returns the domain, if valid.
|
|
#
|
|
# This method doesn't raise. Instead, it returns nil if the domain is not valid for whatever reason.
|
|
#
|
|
# @param [String, #to_s] name The domain name or fully qualified domain name to parse.
|
|
# @param [PublicSuffix::List] list The rule list to search, defaults to the default {PublicSuffix::List}
|
|
# @param [Boolean] ignore_private
|
|
# @return [String]
|
|
def self.domain(name, **options)
|
|
parse(name, **options).domain
|
|
rescue PublicSuffix::Error
|
|
nil
|
|
end
|
|
|
|
|
|
# private
|
|
|
|
def self.decompose(name, rule)
|
|
left, right = rule.decompose(name)
|
|
|
|
parts = left.split(DOT)
|
|
# If we have 0 parts left, there is just a tld and no domain or subdomain
|
|
# If we have 1 part left, there is just a tld, domain and not subdomain
|
|
# If we have 2 parts left, the last part is the domain, the other parts (combined) are the subdomain
|
|
tld = right
|
|
sld = parts.empty? ? nil : parts.pop
|
|
trd = parts.empty? ? nil : parts.join(DOT)
|
|
|
|
Domain.new(tld, sld, trd)
|
|
end
|
|
|
|
# Pretend we know how to deal with user input.
|
|
def self.normalize(name)
|
|
name = name.to_s.dup
|
|
name.strip!
|
|
name.chomp!(DOT)
|
|
name.downcase!
|
|
|
|
return DomainInvalid.new("Name is blank") if name.empty?
|
|
return DomainInvalid.new("Name starts with a dot") if name.start_with?(DOT)
|
|
return DomainInvalid.new("%s is not expected to contain a scheme" % name) if name.include?("://")
|
|
|
|
name
|
|
end
|
|
|
|
end
|