mirror of
https://github.com/zebrajr/ladybird.git
synced 2025-12-06 00:19:53 +01:00
AK+LibJS+LibWeb: Recognize that our UTF-16 string is actually WTF-16
For the web, we allow a wobbly UTF-16 encoding (i.e. lonely surrogates are permitted). Only in a few exceptional cases do we strictly require valid UTF-16. As such, our `validate(AllowLonelySurrogates::Yes)` calls will always succeed. It's a wasted effort to ever make such a check. This patch eliminates such invocations. The validation methods will now only check for strict UTF-16, and are only invoked when needed.
This commit is contained in:
parent
36c7302178
commit
8472e469f4
|
|
@ -323,12 +323,6 @@ Utf16String StringBuilder::to_utf16_string()
|
|||
return Utf16String::from_string_builder({}, *this);
|
||||
}
|
||||
|
||||
Utf16String StringBuilder::to_utf16_string_without_validation()
|
||||
{
|
||||
VERIFY(m_mode == Mode::UTF16);
|
||||
return Utf16String::from_string_builder_without_validation({}, *this);
|
||||
}
|
||||
|
||||
u8* StringBuilder::data()
|
||||
{
|
||||
return m_buffer.data() + string_builder_prefix_size(m_mode);
|
||||
|
|
|
|||
|
|
@ -85,7 +85,6 @@ public:
|
|||
ErrorOr<FlyString> to_fly_string() const;
|
||||
|
||||
Utf16String to_utf16_string();
|
||||
Utf16String to_utf16_string_without_validation();
|
||||
|
||||
[[nodiscard]] ErrorOr<ByteBuffer> to_byte_buffer() const;
|
||||
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ Optional<Utf16FlyString> Utf16FlyString::create_fly_string_from_cache(ViewType c
|
|||
return Utf16String::from_utf8_without_validation(string);
|
||||
} else {
|
||||
if (string.length_in_code_units() <= Detail::MAX_SHORT_STRING_BYTE_COUNT && string.is_ascii())
|
||||
return Utf16String::from_utf16_without_validation(string);
|
||||
return Utf16String::from_utf16(string);
|
||||
}
|
||||
|
||||
if (auto it = all_utf16_fly_strings().find(string.hash(), [&](auto const& entry) { return *entry == string; }); it != all_utf16_fly_strings().end())
|
||||
|
|
@ -71,13 +71,6 @@ Utf16FlyString Utf16FlyString::from_utf16(Utf16View const& string)
|
|||
return Utf16String::from_utf16(string);
|
||||
}
|
||||
|
||||
Utf16FlyString Utf16FlyString::from_utf16_without_validation(Utf16View const& string)
|
||||
{
|
||||
if (auto result = create_fly_string_from_cache(string); result.has_value())
|
||||
return result.release_value();
|
||||
return Utf16String::from_utf16_without_validation(string);
|
||||
}
|
||||
|
||||
Utf16FlyString::Utf16FlyString(Utf16String const& string)
|
||||
{
|
||||
if (string.has_short_ascii_storage()) {
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ public:
|
|||
static Utf16FlyString from_utf8_but_should_be_ported_to_utf16(StringView string) { return from_utf8_without_validation(string); }
|
||||
|
||||
static Utf16FlyString from_utf16(Utf16View const&);
|
||||
static Utf16FlyString from_utf16_without_validation(Utf16View const&);
|
||||
|
||||
template<typename T>
|
||||
requires(IsOneOf<RemoveCVReference<T>, Utf16String, Utf16FlyString>)
|
||||
|
|
@ -193,8 +192,5 @@ inline constexpr bool IsHashCompatible<Utf16FlyString, Utf16String> = true;
|
|||
|
||||
[[nodiscard]] ALWAYS_INLINE AK::Utf16FlyString operator""_utf16_fly_string(char16_t const* string, size_t length)
|
||||
{
|
||||
AK::Utf16View view { string, length };
|
||||
|
||||
ASSERT(view.validate());
|
||||
return AK::Utf16FlyString::from_utf16_without_validation(view);
|
||||
return AK::Utf16FlyString::from_utf16({ string, length });
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ Utf16String Utf16String::from_utf8_with_replacement_character(StringView utf8_st
|
|||
builder.append_code_point(code_point);
|
||||
}
|
||||
|
||||
return builder.to_utf16_string_without_validation();
|
||||
return builder.to_utf16_string();
|
||||
}
|
||||
|
||||
Utf16String Utf16String::from_utf8_without_validation(StringView utf8_string)
|
||||
|
|
@ -51,7 +51,7 @@ Utf16String Utf16String::from_utf8_without_validation(StringView utf8_string)
|
|||
return Utf16String { Detail::Utf16StringData::from_utf8(utf8_string, Detail::Utf16StringData::AllowASCIIStorage::Yes) };
|
||||
}
|
||||
|
||||
Utf16String Utf16String::from_utf16_without_validation(Utf16View const& utf16_string)
|
||||
Utf16String Utf16String::from_utf16(Utf16View const& utf16_string)
|
||||
{
|
||||
if (utf16_string.length_in_code_units() <= Detail::MAX_SHORT_STRING_BYTE_COUNT && utf16_string.is_ascii()) {
|
||||
Utf16String string;
|
||||
|
|
@ -86,7 +86,7 @@ Utf16String Utf16String::from_utf32(Utf32View const& utf32_string)
|
|||
return Utf16String { Detail::Utf16StringData::from_utf32(utf32_string) };
|
||||
}
|
||||
|
||||
Utf16String Utf16String::from_string_builder_without_validation(StringBuilder& builder)
|
||||
Utf16String Utf16String::from_string_builder(Badge<StringBuilder>, StringBuilder& builder)
|
||||
{
|
||||
auto view = builder.utf16_string_view();
|
||||
|
||||
|
|
@ -147,14 +147,14 @@ Utf16String Utf16String::repeated(u32 code_point, size_t count)
|
|||
|
||||
Utf16String Utf16String::to_well_formed() const
|
||||
{
|
||||
if (utf16_view().validate(AllowLonelySurrogates::No))
|
||||
if (utf16_view().validate())
|
||||
return *this;
|
||||
return Utf16String { Detail::Utf16StringData::to_well_formed(*this) };
|
||||
}
|
||||
|
||||
String Utf16String::to_well_formed_utf8() const
|
||||
{
|
||||
if (utf16_view().validate(AllowLonelySurrogates::No))
|
||||
if (utf16_view().validate())
|
||||
return to_utf8(AllowLonelySurrogates::No);
|
||||
return to_well_formed().to_utf8(AllowLonelySurrogates::No);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -63,34 +63,15 @@ public:
|
|||
return from_utf8_without_validation(utf8_string);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static Utf16String from_utf16(Utf16View const& utf16_string)
|
||||
{
|
||||
VERIFY(utf16_string.validate());
|
||||
return from_utf16_without_validation(utf16_string);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static ErrorOr<Utf16String> try_from_utf16(Utf16View const& utf16_string)
|
||||
{
|
||||
if (!utf16_string.validate())
|
||||
return Error::from_string_literal("Input was not valid UTF-16");
|
||||
return from_utf16_without_validation(utf16_string);
|
||||
}
|
||||
|
||||
static Utf16String from_utf8_without_validation(StringView);
|
||||
static Utf16String from_utf16_without_validation(Utf16View const&);
|
||||
static Utf16String from_utf32(Utf32View const&);
|
||||
|
||||
static Utf16String from_utf16(Utf16View const& utf16_string);
|
||||
|
||||
template<typename T>
|
||||
requires(IsOneOf<RemoveCVReference<T>, Utf16String, Utf16FlyString>)
|
||||
static Utf16String from_utf16(T&&) = delete;
|
||||
|
||||
template<typename T>
|
||||
requires(IsOneOf<RemoveCVReference<T>, Utf16String, Utf16FlyString>)
|
||||
static ErrorOr<Utf16String> try_from_utf16(T&&) = delete;
|
||||
|
||||
template<typename T>
|
||||
requires(IsOneOf<RemoveCVReference<T>, Utf16String, Utf16FlyString>)
|
||||
static Utf16String from_utf16_without_validation(T&&) = delete;
|
||||
static Utf16String from_utf32(Utf32View const&);
|
||||
|
||||
ALWAYS_INLINE static Utf16String from_code_point(u32 code_point)
|
||||
{
|
||||
|
|
@ -101,7 +82,7 @@ public:
|
|||
code_units[length_in_code_units++] = code_unit;
|
||||
});
|
||||
|
||||
return from_utf16_without_validation({ code_units.data(), length_in_code_units });
|
||||
return from_utf16({ code_units.data(), length_in_code_units });
|
||||
}
|
||||
|
||||
template<typename... Parameters>
|
||||
|
|
@ -132,19 +113,6 @@ public:
|
|||
|
||||
static Utf16String repeated(u32 code_point, size_t count);
|
||||
|
||||
ALWAYS_INLINE static Utf16String from_string_builder(Badge<StringBuilder>, StringBuilder& builder)
|
||||
{
|
||||
VERIFY(builder.utf16_string_view().validate());
|
||||
return from_string_builder_without_validation(builder);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static Utf16String from_string_builder_without_validation(Badge<StringBuilder>, StringBuilder& builder)
|
||||
{
|
||||
return from_string_builder_without_validation(builder);
|
||||
}
|
||||
|
||||
static ErrorOr<Utf16String> from_ipc_stream(Stream&, size_t length_in_code_units, bool is_ascii);
|
||||
|
||||
Utf16String to_well_formed() const;
|
||||
String to_well_formed_utf8() const;
|
||||
|
||||
|
|
@ -223,7 +191,7 @@ public:
|
|||
if (!needs_trimming)
|
||||
return *this;
|
||||
|
||||
return Utf16String::from_utf16_without_validation(utf16_view().trim(code_units, mode));
|
||||
return Utf16String::from_utf16(utf16_view().trim(code_units, mode));
|
||||
}
|
||||
|
||||
ALWAYS_INLINE Utf16String trim_ascii_whitespace(TrimMode mode = TrimMode::Both) const
|
||||
|
|
@ -233,13 +201,14 @@ public:
|
|||
|
||||
ALWAYS_INLINE Utf16String escape_html_entities() const { return utf16_view().escape_html_entities(); }
|
||||
|
||||
static Utf16String from_string_builder(Badge<StringBuilder>, StringBuilder& builder);
|
||||
static ErrorOr<Utf16String> from_ipc_stream(Stream&, size_t length_in_code_units, bool is_ascii);
|
||||
|
||||
private:
|
||||
ALWAYS_INLINE explicit Utf16String(NonnullRefPtr<Detail::Utf16StringData const> value)
|
||||
: Utf16StringBase(move(value))
|
||||
{
|
||||
}
|
||||
|
||||
static Utf16String from_string_builder_without_validation(StringBuilder&);
|
||||
};
|
||||
|
||||
template<>
|
||||
|
|
@ -264,8 +233,5 @@ struct Traits<Utf16String> : public DefaultTraits<Utf16String> {
|
|||
|
||||
[[nodiscard]] ALWAYS_INLINE AK::Utf16String operator""_utf16(char16_t const* string, size_t length)
|
||||
{
|
||||
AK::Utf16View view { string, length };
|
||||
|
||||
ASSERT(view.validate());
|
||||
return AK::Utf16String::from_utf16_without_validation(view);
|
||||
return AK::Utf16String::from_utf16({ string, length });
|
||||
}
|
||||
|
|
|
|||
|
|
@ -177,9 +177,6 @@ ErrorOr<NonnullRefPtr<Utf16StringData>> Utf16StringData::from_ipc_stream(Stream&
|
|||
|
||||
Bytes bytes { reinterpret_cast<u8*>(string->m_utf16_data), length_in_code_units * sizeof(char16_t) };
|
||||
TRY(stream.read_until_filled(bytes));
|
||||
|
||||
if (!string->utf16_view().validate())
|
||||
return Error::from_string_literal("Stream contains invalid UTF-16 data");
|
||||
}
|
||||
|
||||
return string.release_nonnull();
|
||||
|
|
|
|||
|
|
@ -33,12 +33,11 @@ ErrorOr<String> Utf16View::to_utf8(AllowLonelySurrogates allow_lonely_surrogates
|
|||
if (has_ascii_storage())
|
||||
return String::from_utf8_without_validation(bytes());
|
||||
|
||||
if (!validate(allow_lonely_surrogates))
|
||||
return Error::from_string_literal("Input was not valid UTF-16");
|
||||
|
||||
if (allow_lonely_surrogates == AllowLonelySurrogates::No) {
|
||||
String result;
|
||||
if (!validate())
|
||||
return Error::from_string_literal("Input was not valid UTF-16");
|
||||
|
||||
String result;
|
||||
auto utf8_length = simdutf::utf8_length_from_utf16(m_string.utf16, length_in_code_units());
|
||||
|
||||
TRY(result.replace_with_new_string(Badge<Utf16View> {}, utf8_length, [&](Bytes buffer) -> ErrorOr<void> {
|
||||
|
|
@ -157,30 +156,24 @@ bool Utf16View::is_ascii() const
|
|||
return all_of(utf16_span(), AK::is_ascii);
|
||||
}
|
||||
|
||||
bool Utf16View::validate(size_t& valid_code_units, AllowLonelySurrogates allow_lonely_surrogates) const
|
||||
bool Utf16View::validate() const
|
||||
{
|
||||
if (has_ascii_storage())
|
||||
return true;
|
||||
return simdutf::validate_utf16(m_string.utf16, length_in_code_units());
|
||||
}
|
||||
|
||||
bool Utf16View::validate(size_t& valid_code_units) const
|
||||
{
|
||||
if (has_ascii_storage()) {
|
||||
valid_code_units = length_in_code_units();
|
||||
return true;
|
||||
}
|
||||
|
||||
auto view = *this;
|
||||
valid_code_units = 0;
|
||||
auto result = simdutf::validate_utf16_with_errors(m_string.utf16, length_in_code_units());
|
||||
valid_code_units = result.count;
|
||||
|
||||
while (!view.is_empty()) {
|
||||
auto result = simdutf::validate_utf16_with_errors(view.m_string.utf16, view.length_in_code_units());
|
||||
valid_code_units += result.count;
|
||||
|
||||
if (result.error == simdutf::SUCCESS)
|
||||
return true;
|
||||
if (allow_lonely_surrogates == AllowLonelySurrogates::No || result.error != simdutf::SURROGATE)
|
||||
return false;
|
||||
|
||||
view = view.substring_view(result.count + 1);
|
||||
++valid_code_units;
|
||||
}
|
||||
|
||||
return true;
|
||||
return result.error == simdutf::SUCCESS;
|
||||
}
|
||||
|
||||
size_t Utf16View::code_unit_offset_of(size_t code_point_offset) const
|
||||
|
|
@ -298,7 +291,7 @@ size_t Utf16View::calculate_length_in_code_points() const
|
|||
ASSERT(!has_ascii_storage());
|
||||
|
||||
// simdutf's code point length method assumes valid UTF-16, whereas we allow lonely surrogates.
|
||||
if (validate(AllowLonelySurrogates::No)) [[likely]]
|
||||
if (validate()) [[likely]]
|
||||
return simdutf::count_utf16(m_string.utf16, length_in_code_units());
|
||||
|
||||
size_t code_points = 0;
|
||||
|
|
|
|||
|
|
@ -345,13 +345,9 @@ public:
|
|||
return all_of(utf16_span(), AK::is_ascii_space);
|
||||
}
|
||||
|
||||
[[nodiscard]] ALWAYS_INLINE bool validate(AllowLonelySurrogates allow_lonely_surrogates = AllowLonelySurrogates::Yes) const
|
||||
{
|
||||
size_t valid_code_units = 0;
|
||||
return validate(valid_code_units, allow_lonely_surrogates);
|
||||
}
|
||||
|
||||
[[nodiscard]] bool validate(size_t& valid_code_units, AllowLonelySurrogates = AllowLonelySurrogates::Yes) const;
|
||||
// Note that these do not allow lonely surrogates. The string may be assumed to always be valid WTF-16.
|
||||
[[nodiscard]] bool validate() const;
|
||||
[[nodiscard]] bool validate(size_t& valid_code_units) const;
|
||||
|
||||
[[nodiscard]] constexpr size_t length_in_code_units() const { return m_length_in_code_units & ~(1uz << Detail::UTF16_FLAG); }
|
||||
|
||||
|
|
|
|||
|
|
@ -254,7 +254,7 @@ void RopeString::resolve(EncodingPreference preference) const
|
|||
builder.append(current->utf16_string_view());
|
||||
}
|
||||
|
||||
m_utf16_string = builder.to_utf16_string_without_validation();
|
||||
m_utf16_string = builder.to_utf16_string();
|
||||
m_is_rope = false;
|
||||
m_lhs = nullptr;
|
||||
m_rhs = nullptr;
|
||||
|
|
|
|||
|
|
@ -26,20 +26,20 @@ public:
|
|||
Optional<Utf16String> const& last_match() const
|
||||
{
|
||||
if (!m_last_match_string.has_value())
|
||||
m_last_match_string = Utf16String::from_utf16_without_validation(m_last_match);
|
||||
m_last_match_string = Utf16String::from_utf16(m_last_match);
|
||||
return m_last_match_string;
|
||||
}
|
||||
Optional<Utf16String> const& last_paren() const { return m_last_paren; }
|
||||
Optional<Utf16String> const& left_context() const
|
||||
{
|
||||
if (!m_left_context_string.has_value())
|
||||
m_left_context_string = Utf16String::from_utf16_without_validation(m_left_context);
|
||||
m_left_context_string = Utf16String::from_utf16(m_left_context);
|
||||
return m_left_context_string;
|
||||
}
|
||||
Optional<Utf16String> const& right_context() const
|
||||
{
|
||||
if (!m_right_context_string.has_value())
|
||||
m_right_context_string = Utf16String::from_utf16_without_validation(m_right_context);
|
||||
m_right_context_string = Utf16String::from_utf16(m_right_context);
|
||||
return m_right_context_string;
|
||||
}
|
||||
Optional<Utf16String> const& $1() const { return m_$1; }
|
||||
|
|
|
|||
|
|
@ -320,7 +320,7 @@ static ThrowCompletionOr<Value> regexp_builtin_exec(VM& vm, RegExpObject& regexp
|
|||
// 2. Set captureEnd to ! GetStringIndex(S, Input, captureEnd).
|
||||
// iv. Let capture be the Match { [[StartIndex]]: captureStart, [[EndIndex]: captureEnd }.
|
||||
// v. Let capturedValue be ! GetMatchString(S, capture).
|
||||
auto capture_as_utf16_string = Utf16String::from_utf16_without_validation(capture.view.u16_view());
|
||||
auto capture_as_utf16_string = Utf16String::from_utf16(capture.view.u16_view());
|
||||
captured_value = PrimitiveString::create(vm, capture_as_utf16_string);
|
||||
// vi. Append capture to indices.
|
||||
indices.append(Match::create(capture));
|
||||
|
|
|
|||
|
|
@ -98,7 +98,7 @@ Optional<size_t> string_index_of(Utf16View const& string, Utf16View const& searc
|
|||
static bool is_string_well_formed_unicode(Utf16View string)
|
||||
{
|
||||
// OPTIMIZATION: simdutf can do this much faster.
|
||||
return string.validate(AllowLonelySurrogates::No);
|
||||
return string.validate();
|
||||
}
|
||||
|
||||
// 11.1.4 CodePointAt ( string, position ), https://tc39.es/ecma262/#sec-codepointat
|
||||
|
|
|
|||
|
|
@ -166,7 +166,7 @@ Utf16String icu_string_to_utf16_string(icu::UnicodeString const& string)
|
|||
|
||||
Utf16String icu_string_to_utf16_string(UChar const* string, i32 length)
|
||||
{
|
||||
return Utf16String::from_utf16_without_validation({ string, static_cast<size_t>(length) });
|
||||
return Utf16String::from_utf16({ string, static_cast<size_t>(length) });
|
||||
}
|
||||
|
||||
Utf16View icu_string_to_utf16_view(icu::UnicodeString const& string)
|
||||
|
|
|
|||
|
|
@ -54,10 +54,10 @@ WebIDL::ExceptionOr<Utf16String> CharacterData::substring_data(size_t offset, si
|
|||
// 3. If offset plus count is greater than length, return a string whose value is the code units from the offsetth code unit
|
||||
// to the end of node’s data, and then return.
|
||||
if (offset + count > length)
|
||||
return Utf16String::from_utf16_without_validation(m_data.substring_view(offset));
|
||||
return Utf16String::from_utf16(m_data.substring_view(offset));
|
||||
|
||||
// 4. Return a string whose value is the code units from the offsetth code unit to the offset+countth code unit in node’s data.
|
||||
return Utf16String::from_utf16_without_validation(m_data.substring_view(offset, count));
|
||||
return Utf16String::from_utf16(m_data.substring_view(offset, count));
|
||||
}
|
||||
|
||||
// https://dom.spec.whatwg.org/#concept-cd-replace
|
||||
|
|
|
|||
|
|
@ -174,7 +174,7 @@ Utf16String Node::descendant_text_content() const
|
|||
return TraversalDecision::Continue;
|
||||
});
|
||||
|
||||
return builder.to_utf16_string_without_validation();
|
||||
return builder.to_utf16_string();
|
||||
}
|
||||
|
||||
// https://dom.spec.whatwg.org/#dom-node-textcontent
|
||||
|
|
|
|||
|
|
@ -567,7 +567,7 @@ bool command_font_size_action(DOM::Document& document, Utf16String const& value)
|
|||
resulting_value = font_sizes[number - 1];
|
||||
|
||||
// 12. Set the selection's value to value.
|
||||
set_the_selections_value(document, CommandNames::fontSize, Utf16String::from_utf16_without_validation(resulting_value));
|
||||
set_the_selections_value(document, CommandNames::fontSize, Utf16String::from_utf16(resulting_value));
|
||||
|
||||
// 13. Return true.
|
||||
return true;
|
||||
|
|
@ -622,10 +622,9 @@ bool command_format_block_action(DOM::Document& document, Utf16String const& val
|
|||
{
|
||||
// 1. If value begins with a "<" character and ends with a ">" character, remove the first and last characters from
|
||||
// it.
|
||||
auto resulting_value = Utf16String::from_utf16_without_validation(
|
||||
value.starts_with('<') && value.ends_with('>')
|
||||
? value.substring_view(1, value.length_in_code_units() - 2)
|
||||
: value);
|
||||
auto resulting_value = value;
|
||||
if (value.starts_with('<') && value.ends_with('>'))
|
||||
resulting_value = Utf16String::from_utf16(value.substring_view(1, value.length_in_code_units() - 2));
|
||||
|
||||
// 2. Let value be converted to ASCII lowercase.
|
||||
resulting_value = resulting_value.to_ascii_lowercase();
|
||||
|
|
|
|||
|
|
@ -807,7 +807,7 @@ void FormAssociatedTextControlElement::handle_insert(Utf16String const& data)
|
|||
if (auto max_length = text_node->max_length(); max_length.has_value()) {
|
||||
auto remaining_length = *max_length - text_node->length_in_utf16_code_units();
|
||||
if (remaining_length < data.length_in_code_units())
|
||||
data_for_insertion = Utf16String::from_utf16_without_validation(data.substring_view(0, remaining_length));
|
||||
data_for_insertion = Utf16String::from_utf16(data.substring_view(0, remaining_length));
|
||||
}
|
||||
|
||||
auto selection_start = this->selection_start();
|
||||
|
|
|
|||
|
|
@ -284,7 +284,7 @@ GC::Ref<DOM::DocumentFragment> HTMLElement::rendered_text_fragment(Utf16View con
|
|||
// 2. If text is not the empty string, then append a new Text node whose data is text and node document is
|
||||
// document to fragment.
|
||||
if (!text.is_empty()) {
|
||||
MUST(fragment->append_child(document().create_text_node(Utf16String::from_utf16_without_validation(text))));
|
||||
MUST(fragment->append_child(document().create_text_node(Utf16String::from_utf16(text))));
|
||||
}
|
||||
|
||||
// 3. While position is not past the end of input, and the code point at position is either U+000A LF or U+000D CR:
|
||||
|
|
@ -453,7 +453,7 @@ Utf16String HTMLElement::get_the_text_steps()
|
|||
}
|
||||
|
||||
// 7. Return the concatenation of the string items in results.
|
||||
return builder.to_utf16_string_without_validation();
|
||||
return builder.to_utf16_string();
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/dom.html#dom-innertext
|
||||
|
|
|
|||
|
|
@ -237,82 +237,52 @@ TEST_CASE(validate_invalid_utf16)
|
|||
{
|
||||
// Lonely high surrogate.
|
||||
invalid = u"\xd800"sv;
|
||||
EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No));
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 0uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes));
|
||||
EXPECT_EQ(valid_code_units, 1uz);
|
||||
|
||||
invalid = u"\xdbff"sv;
|
||||
EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No));
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 0uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes));
|
||||
EXPECT_EQ(valid_code_units, 1uz);
|
||||
}
|
||||
{
|
||||
// Lonely low surrogate.
|
||||
invalid = u"\xdc00"sv;
|
||||
EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No));
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 0uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes));
|
||||
EXPECT_EQ(valid_code_units, 1uz);
|
||||
|
||||
invalid = u"\xdfff"sv;
|
||||
EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No));
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 0uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes));
|
||||
EXPECT_EQ(valid_code_units, 1uz);
|
||||
}
|
||||
{
|
||||
// High surrogate followed by non-surrogate.
|
||||
invalid = u"\xd800\x0000"sv;
|
||||
EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No));
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 0uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes));
|
||||
EXPECT_EQ(valid_code_units, 2uz);
|
||||
|
||||
invalid = u"\xd800\xe000"sv;
|
||||
EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No));
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 0uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes));
|
||||
EXPECT_EQ(valid_code_units, 2uz);
|
||||
}
|
||||
{
|
||||
// High surrogate followed by high surrogate.
|
||||
invalid = u"\xd800\xd800"sv;
|
||||
EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No));
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 0uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes));
|
||||
EXPECT_EQ(valid_code_units, 2uz);
|
||||
|
||||
invalid = u"\xd800\xdbff"sv;
|
||||
EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No));
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 0uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes));
|
||||
EXPECT_EQ(valid_code_units, 2uz);
|
||||
}
|
||||
{
|
||||
// Valid UTF-16 followed by invalid code units.
|
||||
invalid = u"\x0041\x0041\xd800"sv;
|
||||
EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No));
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 2uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes));
|
||||
EXPECT_EQ(valid_code_units, 3uz);
|
||||
|
||||
invalid = u"\x0041\x0041\xd800"sv;
|
||||
EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No));
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 2uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes));
|
||||
EXPECT_EQ(valid_code_units, 3uz);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user