AK: Implement creating a UTF-16 string from a repeated code point

This commit is contained in:
Timothy Flynn 2025-07-25 09:24:19 -04:00 committed by Jelle Raaijmakers
parent a46e9b2adb
commit df77ae1920
5 changed files with 110 additions and 0 deletions

View File

@ -193,6 +193,28 @@ ErrorOr<void> StringBuilder::try_append_repeated(StringView string, size_t n)
return {};
}
ErrorOr<void> StringBuilder::try_append_repeated(Utf16View const& string, size_t n)
{
if (string.is_empty())
return {};
if (m_mode == Mode::UTF8) {
if (string.has_ascii_storage()) {
TRY(will_append(string.length_in_code_units() * n));
} else {
auto utf8_length = simdutf::utf8_length_from_utf16(string.utf16_span().data(), string.length_in_code_units());
TRY(will_append(utf8_length * n));
}
} else {
TRY(will_append(string.length_in_code_units() * n * 2));
}
for (size_t i = 0; i < n; ++i)
TRY(try_append(string));
return {};
}
void StringBuilder::append(StringView string)
{
MUST(try_append(string));
@ -228,6 +250,11 @@ void StringBuilder::append_repeated(StringView string, size_t n)
MUST(try_append_repeated(string, n));
}
void StringBuilder::append_repeated(Utf16View const& string, size_t n)
{
MUST(try_append_repeated(string, n));
}
ErrorOr<ByteBuffer> StringBuilder::to_byte_buffer() const
{
return ByteBuffer::copy(data(), length());

View File

@ -45,6 +45,7 @@ public:
ErrorOr<void> try_append(char const*, size_t);
ErrorOr<void> try_append_repeated(char, size_t);
ErrorOr<void> try_append_repeated(StringView, size_t);
ErrorOr<void> try_append_repeated(Utf16View const&, size_t);
ErrorOr<void> try_append_escaped_for_json(StringView);
template<typename... Parameters>
@ -64,6 +65,7 @@ public:
void appendvf(char const*, va_list);
void append_repeated(char, size_t);
void append_repeated(StringView, size_t);
void append_repeated(Utf16View const&, size_t);
void append_escaped_for_json(StringView);
void append_as_lowercase(char);

View File

@ -90,6 +90,30 @@ Utf16String Utf16String::from_string_builder_without_validation(StringBuilder& b
return Utf16String { Detail::Utf16StringData::from_string_builder(builder) };
}
Utf16String Utf16String::repeated(u32 code_point, size_t count)
{
if (count <= Detail::MAX_SHORT_STRING_BYTE_COUNT && AK::is_ascii(code_point)) {
Utf16String string;
string.m_value.short_ascii_string = Detail::ShortString::create_with_byte_count(count);
Bytes bytes { string.m_value.short_ascii_string.storage, count };
bytes.fill(static_cast<u8>(code_point));
return string;
}
Array<char16_t, 2> code_units;
size_t length_in_code_units = 0;
(void)UnicodeUtils::code_point_to_utf16(code_point, [&](auto code_unit) {
code_units[length_in_code_units++] = code_unit;
});
StringBuilder builder(StringBuilder::Mode::UTF16);
builder.append_repeated({ code_units.data(), length_in_code_units }, count);
return builder.to_utf16_string();
}
ErrorOr<void> Formatter<Utf16String>::format(FormatBuilder& builder, Utf16String const& utf16_string)
{
if (utf16_string.has_long_utf16_storage())

View File

@ -125,6 +125,8 @@ public:
return builder.to_utf16_string();
}
static Utf16String repeated(u32 code_point, size_t count);
ALWAYS_INLINE static Utf16String from_string_builder(Badge<StringBuilder>, StringBuilder& builder)
{
VERIFY(builder.utf16_string_view().validate());

View File

@ -356,6 +356,61 @@ TEST_CASE(formatted)
}
}
TEST_CASE(repeated)
{
{
auto string1 = Utf16String::repeated('a', 0);
EXPECT(string1.is_empty());
auto string2 = Utf16String::repeated(0x03C9U, 0);
EXPECT(string2.is_empty());
auto string3 = Utf16String::repeated(0x10300, 0);
EXPECT(string3.is_empty());
}
{
auto string1 = Utf16String::repeated('a', 1);
EXPECT_EQ(string1.length_in_code_units(), 1uz);
EXPECT_EQ(string1, u"a"sv);
auto string2 = Utf16String::repeated(0x03C9U, 1);
EXPECT_EQ(string2.length_in_code_units(), 1uz);
EXPECT_EQ(string2, u"ω"sv);
auto string3 = Utf16String::repeated(0x10300, 1);
EXPECT_EQ(string3.length_in_code_units(), 2uz);
EXPECT_EQ(string3, u"𐌀"sv);
}
{
auto string1 = Utf16String::repeated('a', 3);
EXPECT_EQ(string1.length_in_code_units(), 3uz);
EXPECT_EQ(string1, u"aaa"sv);
auto string2 = Utf16String::repeated(0x03C9U, 3);
EXPECT_EQ(string2.length_in_code_units(), 3uz);
EXPECT_EQ(string2, u"ωωω"sv);
auto string3 = Utf16String::repeated(0x10300, 3);
EXPECT_EQ(string3.length_in_code_units(), 6uz);
EXPECT_EQ(string3, u"𐌀𐌀𐌀"sv);
}
{
auto string1 = Utf16String::repeated('a', 10);
EXPECT_EQ(string1.length_in_code_units(), 10uz);
EXPECT_EQ(string1, u"aaaaaaaaaa"sv);
auto string2 = Utf16String::repeated(0x03C9U, 10);
EXPECT_EQ(string2.length_in_code_units(), 10uz);
EXPECT_EQ(string2, u"ωωωωωωωωωω"sv);
auto string3 = Utf16String::repeated(0x10300, 10);
EXPECT_EQ(string3.length_in_code_units(), 20uz);
EXPECT_EQ(string3, u"𐌀𐌀𐌀𐌀𐌀𐌀𐌀𐌀𐌀𐌀"sv);
}
EXPECT_DEATH("Creating a string from an invalid code point", (void)Utf16String::repeated(0xffffffff, 1));
}
TEST_CASE(copy_operations)
{
auto test = [](Utf16String const& string1) {