LibRegex: Use code unit length in more places that apply

Finishes what 7f6b70fafb started.
Having one part use length and another code unit length lead to crashes,
the added test ensures we don't mess that up again.
This commit is contained in:
Ali Mohammad Pur 2025-07-24 20:24:55 +02:00 committed by Jelle Raaijmakers
parent 97c8fdd042
commit c7ad6cd508
2 changed files with 5 additions and 4 deletions

View File

@ -387,7 +387,7 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightCaptureGroup::execute(MatchInput c
if (start_position < match.column) if (start_position < match.column)
return ExecutionResult::Continue; return ExecutionResult::Continue;
VERIFY(start_position + length <= input.view.length()); VERIFY(start_position + length <= input.view.length_in_code_units());
auto captured_text = input.view.substring_view(start_position, length); auto captured_text = input.view.substring_view(start_position, length);
@ -420,7 +420,7 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(MatchIn
if (start_position < match.column) if (start_position < match.column)
return ExecutionResult::Continue; return ExecutionResult::Continue;
VERIFY(start_position + length <= input.view.length()); VERIFY(start_position + length <= input.view.length_in_code_units());
auto view = input.view.substring_view(start_position, length); auto view = input.view.substring_view(start_position, length);
@ -551,7 +551,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
break; break;
} }
case CharacterCompareType::CharClass: { case CharacterCompareType::CharClass: {
if (input.view.length() <= state.string_position_in_code_units) if (input.view.length_in_code_units() <= state.string_position_in_code_units)
return ExecutionResult::Failed_ExecuteLowPrioForks; return ExecutionResult::Failed_ExecuteLowPrioForks;
auto character_class = (CharClass)m_bytecode->at(offset++); auto character_class = (CharClass)m_bytecode->at(offset++);

View File

@ -828,7 +828,8 @@ TEST_CASE(ECMA262_unicode_match)
"\\ud83c[\\udffb-\\udfff](?=\\ud83c[\\udffb-\\udfff])|(?:[^\\ud800-\\udfff][\\u0300-\\u036f\\ufe20-\\ufe2f\\u20d0-\\u20ff]?|[\\u0300-\\u036f\\ufe20-\\ufe2f\\u20d0-\\u20ff]|(?:\\ud83c[\\udde6-\\uddff]){2}|[\\ud800-\\udbff][\\udc00-\\udfff]|[\\ud800-\\udfff])[\\ufe0e\\ufe0f]?(?:[\\u0300-\\u036f\\ufe20-\\ufe2f\\u20d0-\\u20ff]|\\ud83c[\\udffb-\\udfff])?(?:\\u200d(?:[^\\ud800-\\udfff]|(?:\\ud83c[\\udde6-\\uddff]){2}|[\\ud800-\\udbff][\\udc00-\\udfff])[\\ufe0e\\ufe0f]?(?:[\\u0300-\\u036f\\ufe20-\\ufe2f\\u20d0-\\u20ff]|\\ud83c[\\udffb-\\udfff])?)*"sv, "\\ud83c[\\udffb-\\udfff](?=\\ud83c[\\udffb-\\udfff])|(?:[^\\ud800-\\udfff][\\u0300-\\u036f\\ufe20-\\ufe2f\\u20d0-\\u20ff]?|[\\u0300-\\u036f\\ufe20-\\ufe2f\\u20d0-\\u20ff]|(?:\\ud83c[\\udde6-\\uddff]){2}|[\\ud800-\\udbff][\\udc00-\\udfff]|[\\ud800-\\udfff])[\\ufe0e\\ufe0f]?(?:[\\u0300-\\u036f\\ufe20-\\ufe2f\\u20d0-\\u20ff]|\\ud83c[\\udffb-\\udfff])?(?:\\u200d(?:[^\\ud800-\\udfff]|(?:\\ud83c[\\udde6-\\uddff]){2}|[\\ud800-\\udbff][\\udc00-\\udfff])[\\ufe0e\\ufe0f]?(?:[\\u0300-\\u036f\\ufe20-\\ufe2f\\u20d0-\\u20ff]|\\ud83c[\\udffb-\\udfff])?)*"sv,
"😀"sv, "😀"sv,
true, true,
} },
{ "(?<before>\\w*)\\s*(?<emoji>\\p{Emoji}+)\\s*(?<after>\\w*)"sv, "Hey 🎉 there! I love 🍕 pizza"sv, true, ECMAScriptFlags::Unicode },
}; };
for (auto& test : tests) { for (auto& test : tests) {