util,readline: NFC-normalize strings before getStringWidth

The assumption here is that decomposed characters render like their
composed character equivalents, and that working with the former
comes with a risk of over-estimating string widths given that
we compute them on a per-code-point basis. The regression test
added here (한글 vs 한글) is an example of that happening.

PR-URL: https://github.com/nodejs/node/pull/33052
Reviewed-By: Gus Caplan <me@gus.host>
Reviewed-By: Michaël Zasso <targos@protonmail.com>
Reviewed-By: Anto Aravinth <anto.aravinth.cse@gmail.com>
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
This commit is contained in:
Anna Henningsen 2020-04-25 02:31:09 +02:00 committed by Ruben Bridgewater
parent 0f4d513873
commit c239cc650c
2 changed files with 19 additions and 5 deletions

View File

@ -1917,6 +1917,13 @@ function formatWithOptionsInternal(inspectOptions, ...args) {
return str; return str;
} }
function prepareStringForGetStringWidth(str, removeControlChars) {
str = str.normalize('NFC');
if (removeControlChars)
str = stripVTControlCharacters(str);
return str;
}
if (internalBinding('config').hasIntl) { if (internalBinding('config').hasIntl) {
const icu = internalBinding('icu'); const icu = internalBinding('icu');
// icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence) // icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence)
@ -1926,8 +1933,8 @@ if (internalBinding('config').hasIntl) {
// the receiving end supports. // the receiving end supports.
getStringWidth = function getStringWidth(str, removeControlChars = true) { getStringWidth = function getStringWidth(str, removeControlChars = true) {
let width = 0; let width = 0;
if (removeControlChars)
str = stripVTControlCharacters(str); str = prepareStringForGetStringWidth(str, removeControlChars);
for (let i = 0; i < str.length; i++) { for (let i = 0; i < str.length; i++) {
// Try to avoid calling into C++ by first handling the ASCII portion of // Try to avoid calling into C++ by first handling the ASCII portion of
// the string. If it is fully ASCII, we skip the C++ part. // the string. If it is fully ASCII, we skip the C++ part.
@ -1947,9 +1954,7 @@ if (internalBinding('config').hasIntl) {
getStringWidth = function getStringWidth(str, removeControlChars = true) { getStringWidth = function getStringWidth(str, removeControlChars = true) {
let width = 0; let width = 0;
if (removeControlChars) str = prepareStringForGetStringWidth(str, removeControlChars);
str = stripVTControlCharacters(str);
for (const char of str) { for (const char of str) {
const code = char.codePointAt(0); const code = char.codePointAt(0);
if (isFullWidthCodePoint(code)) { if (isFullWidthCodePoint(code)) {

View File

@ -87,3 +87,12 @@ for (let i = 0; i < 256; i++) {
assert.strictEqual(getStringWidth(char), 1); assert.strictEqual(getStringWidth(char), 1);
} }
} }
{
const a = '한글'.normalize('NFD'); // 한글
const b = '한글'.normalize('NFC'); // 한글
assert.strictEqual(a.length, 6);
assert.strictEqual(b.length, 2);
assert.strictEqual(getStringWidth(a), 4);
assert.strictEqual(getStringWidth(b), 4);
}