src: detect whether the string is one byte representation or not

References: nodejs#56090
PR-URL: https://github.com/nodejs/node/pull/56147
Fixes: https://github.com/nodejs/node/issues/56090
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Anna Henningsen <anna@addaleax.net>
This commit is contained in:
theweipeng 2024-12-21 21:36:22 +08:00 committed by James M Snell
parent 9fd90d9df7
commit 6cb0690fcc
5 changed files with 120 additions and 0 deletions

View File

@ -1304,6 +1304,45 @@ setTimeout(() => {
}, 1000);
```
## `v8.isStringOneByteRepresentation(content)`
<!-- YAML
added: REPLACEME
-->
* `content` {string}
* Returns: {boolean}
V8 only supports `Latin-1/ISO-8859-1` and `UTF16` as the underlying representation of a string.
If the `content` uses `Latin-1/ISO-8859-1` as the underlying representation, this function will return true;
otherwise, it returns false.
If this method returns false, that does not mean that the string contains some characters not in `Latin-1/ISO-8859-1`.
Sometimes a `Latin-1` string may also be represented as `UTF16`.
```js
const { isStringOneByteRepresentation } = require('node:v8');
const Encoding = {
latin1: 1,
utf16le: 2,
};
const buffer = Buffer.alloc(100);
function writeString(input) {
if (isStringOneByteRepresentation(input)) {
buffer.writeUint8(Encoding.latin1);
buffer.writeUint32LE(input.length, 1);
buffer.write(input, 5, 'latin1');
} else {
buffer.writeUint8(Encoding.utf16le);
buffer.writeUint32LE(input.length * 2, 1);
buffer.write(input, 5, 'utf16le');
}
}
writeString('hello');
writeString('你好');
```
[HTML structured clone algorithm]: https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Structured_clone_algorithm
[Hook Callbacks]: #hook-callbacks
[V8]: https://developers.google.com/v8/

View File

@ -108,6 +108,7 @@ const binding = internalBinding('v8');
const {
cachedDataVersionTag,
setFlagsFromString: _setFlagsFromString,
isStringOneByteRepresentation: _isStringOneByteRepresentation,
updateHeapStatisticsBuffer,
updateHeapSpaceStatisticsBuffer,
updateHeapCodeStatisticsBuffer,
@ -159,6 +160,17 @@ function setFlagsFromString(flags) {
_setFlagsFromString(flags);
}
/**
* Return whether this string uses one byte as underlying representation or not.
* @param {string} content
* @returns {boolean}
*/
function isStringOneByteRepresentation(content) {
validateString(content, 'content');
return _isStringOneByteRepresentation(content);
}
/**
* Gets the current V8 heap statistics.
* @returns {{
@ -445,4 +457,5 @@ module.exports = {
startupSnapshot,
setHeapSnapshotNearHeapLimit,
GCProfiler,
isStringOneByteRepresentation,
};

View File

@ -12,6 +12,9 @@ namespace node {
using CFunctionCallbackWithOneByteString =
uint32_t (*)(v8::Local<v8::Value>, const v8::FastOneByteString&);
using CFunctionCallbackReturnBool = bool (*)(v8::Local<v8::Value> unused,
v8::Local<v8::Value> receiver);
using CFunctionCallback = void (*)(v8::Local<v8::Value> unused,
v8::Local<v8::Value> receiver);
using CFunctionCallbackReturnDouble =
@ -90,6 +93,7 @@ class ExternalReferenceRegistry {
#define ALLOWED_EXTERNAL_REFERENCE_TYPES(V) \
V(CFunctionCallback) \
V(CFunctionCallbackWithOneByteString) \
V(CFunctionCallbackReturnBool) \
V(CFunctionCallbackReturnDouble) \
V(CFunctionCallbackReturnInt32) \
V(CFunctionCallbackValueReturnDouble) \

View File

@ -32,6 +32,7 @@
namespace node {
namespace v8_utils {
using v8::Array;
using v8::CFunction;
using v8::Context;
using v8::FunctionCallbackInfo;
using v8::FunctionTemplate;
@ -238,6 +239,23 @@ void SetFlagsFromString(const FunctionCallbackInfo<Value>& args) {
V8::SetFlagsFromString(*flags, static_cast<size_t>(flags.length()));
}
static void IsStringOneByteRepresentation(
const FunctionCallbackInfo<Value>& args) {
CHECK_EQ(args.Length(), 1);
CHECK(args[0]->IsString());
bool is_one_byte = args[0].As<String>()->IsOneByte();
args.GetReturnValue().Set(is_one_byte);
}
static bool FastIsStringOneByteRepresentation(Local<Value> receiver,
const Local<Value> target) {
CHECK(target->IsString());
return target.As<String>()->IsOneByte();
}
CFunction fast_is_string_one_byte_representation_(
CFunction::Make(FastIsStringOneByteRepresentation));
static const char* GetGCTypeName(v8::GCType gc_type) {
switch (gc_type) {
case v8::GCType::kGCTypeScavenge:
@ -478,6 +496,13 @@ void Initialize(Local<Object> target,
// Export symbols used by v8.setFlagsFromString()
SetMethod(context, target, "setFlagsFromString", SetFlagsFromString);
// Export symbols used by v8.isStringOneByteRepresentation()
SetFastMethodNoSideEffect(context,
target,
"isStringOneByteRepresentation",
IsStringOneByteRepresentation,
&fast_is_string_one_byte_representation_);
// GCProfiler
Local<FunctionTemplate> t =
NewFunctionTemplate(env->isolate(), GCProfiler::New);
@ -497,6 +522,9 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
registry->Register(GCProfiler::New);
registry->Register(GCProfiler::Start);
registry->Register(GCProfiler::Stop);
registry->Register(IsStringOneByteRepresentation);
registry->Register(FastIsStringOneByteRepresentation);
registry->Register(fast_is_string_one_byte_representation_.GetTypeInfo());
}
} // namespace v8_utils

View File

@ -0,0 +1,36 @@
'use strict';
require('../common');
const assert = require('assert');
const { isStringOneByteRepresentation } = require('v8');
[
undefined,
null,
false,
5n,
5,
Symbol(),
() => {},
{},
].forEach((value) => {
assert.throws(
() => { isStringOneByteRepresentation(value); },
/The "content" argument must be of type string/
);
});
{
const latin1String = 'hello world!';
// Run this inside a for loop to trigger the fast API
for (let i = 0; i < 10_000; i++) {
assert.strictEqual(isStringOneByteRepresentation(latin1String), true);
}
}
{
const utf16String = '你好😀😃';
// Run this inside a for loop to trigger the fast API
for (let i = 0; i < 10_000; i++) {
assert.strictEqual(isStringOneByteRepresentation(utf16String), false);
}
}