LibURL: Correct logic for domains not matched by PSL in public_suffix

For the AO defined in the URL specification, in the case the
domain does not match against the PSL, we should be returning
the TLD. This fixes a crash for a bunch of WPT tests using the
Document.domain setter when the test is being served by WPT
locally.

We should be doing similar logic in registrable_domain, but that
unfortunately runs into some other issues, so just leave a FIXME
for now.
This commit is contained in:
Shannon Booth 2025-06-28 22:19:33 +12:00 committed by Tim Ledbetter
parent a2b523eeb8
commit b49b1b35e4
2 changed files with 60 additions and 7 deletions

View File

@ -192,13 +192,19 @@ Optional<String> Host::public_suffix() const
auto trailing_dot = host_string.ends_with('.') ? "."sv : ""sv;
// 3. Let publicSuffix be the public suffix determined by running the Public Suffix List algorithm with host as domain. [PSL]
// NOTE: The spec algorithm for the public suffix returns "*" by default, but get_public_suffix() returns an empty Optional.
// Remove the `value_or()` if and when we update it.
auto public_suffix = PublicSuffixData::the()->get_public_suffix(host_string).value_or("*"_string);
// FIXME: Unify this logic with registrable domain.
auto public_suffix = PublicSuffixData::the()->get_public_suffix(host_string);
if (!public_suffix.has_value()) {
auto last_dot = host_string.bytes_as_string_view().find_last('.');
if (last_dot.has_value())
public_suffix = MUST(host_string.substring_from_byte_offset(last_dot.value() + 1));
else
public_suffix = host_string;
}
// 4. Assert: publicSuffix is an ASCII string that does not end with ".".
VERIFY(public_suffix.is_ascii());
VERIFY(!public_suffix.ends_with('.'));
VERIFY(public_suffix->is_ascii());
VERIFY(!public_suffix->ends_with('.'));
// 5. Return publicSuffix and trailingDot concatenated.
return MUST(String::formatted("{}{}", public_suffix, trailing_dot));
@ -219,8 +225,7 @@ Optional<String> Host::registrable_domain() const
auto trailing_dot = host_string.ends_with('.') ? "."sv : ""sv;
// 3. Let registrableDomain be the registrable domain determined by running the Public Suffix List algorithm with host as domain. [PSL]
// NOTE: The spec algorithm for the public suffix returns "*" by default, but get_public_suffix() returns an empty Optional.
// Remove the `value_or()` if and when we update it.
// FIXME: This is not correct, we should be doing the same as public_suffix() above.
auto registrable_domain = get_registrable_domain(host_string).value_or("*"_string);
// 4. Assert: registrableDomain is an ASCII string that does not end with ".".

View File

@ -641,3 +641,51 @@ TEST_CASE(get_registrable_domain)
EXPECT_EQ(*domain, "ladybird.github.io"sv);
}
}
TEST_CASE(public_suffix)
{
{
auto domain = URL::Parser::parse_host("com"sv);
EXPECT_EQ(domain->public_suffix(), "com"sv);
}
{
auto domain = URL::Parser::parse_host("example.com"sv);
EXPECT_EQ(domain->public_suffix(), "com"sv);
}
{
auto domain = URL::Parser::parse_host("www.example.com"sv);
EXPECT_EQ(domain->public_suffix(), "com"sv);
}
{
auto domain = URL::Parser::parse_host("EXAMPLE.COM"sv);
EXPECT_EQ(domain->public_suffix(), "com"sv);
}
{
auto domain = URL::Parser::parse_host("www.example.com."sv);
EXPECT_EQ(domain->public_suffix(), "com."sv);
}
{
auto domain = URL::Parser::parse_host("github.io"sv);
EXPECT_EQ(domain->public_suffix(), "github.io"sv);
}
{
auto domain = URL::Parser::parse_host("whatwg.github.io"sv);
EXPECT_EQ(domain->public_suffix(), "github.io"sv);
}
{
auto domain = URL::Parser::parse_host("إختبار"sv);
EXPECT_EQ(domain->public_suffix(), "xn--kgbechtv"sv);
}
{
auto domain = URL::Parser::parse_host("example.إختبار"sv);
EXPECT_EQ(domain->public_suffix(), "xn--kgbechtv"sv);
}
{
auto domain = URL::Parser::parse_host("sub.example.إختبار"sv);
EXPECT_EQ(domain->public_suffix(), "xn--kgbechtv"sv);
}
{
auto domain = URL::Parser::parse_host("[2001:0db8:85a3:0000:0000:8a2e:0370:7334]"sv);
EXPECT_EQ(domain->public_suffix(), OptionalNone {});
}
}