esm: populate separate cache for require(esm) in imported CJS

Otherwise if the ESM happens to be cached separately by the ESM loader
before it gets loaded with `require(esm)` from within an imported
CJS file (which uses a re-invented require() with a couple of quirks,
including a separate cache), it won't be able to load the esm properly
from the cache.

PR-URL: https://github.com/nodejs/node/pull/59679
Refs: https://github.com/nodejs/node/issues/59666
Refs: https://github.com/nodejs/node/issues/52697
Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com>
Reviewed-By: Chengzhong Wu <legendecas@gmail.com>
This commit is contained in:
Joyee Cheung 2025-09-08 16:59:31 +02:00 committed by GitHub
parent ed68b26ca3
commit 916929863d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 238 additions and 77 deletions

View File

@ -69,7 +69,8 @@ const {
module_export_names_private_symbol,
module_circular_visited_private_symbol,
module_export_private_symbol,
module_parent_private_symbol,
module_first_parent_private_symbol,
module_last_parent_private_symbol,
},
isInsideNodeModules,
} = internalBinding('util');
@ -94,9 +95,13 @@ const kModuleCircularVisited = module_circular_visited_private_symbol;
*/
const kModuleExport = module_export_private_symbol;
/**
* {@link Module} parent module.
* {@link Module} The first parent module that loads a module with require().
*/
const kModuleParent = module_parent_private_symbol;
const kFirstModuleParent = module_first_parent_private_symbol;
/**
* {@link Module} The last parent module that loads a module with require().
*/
const kLastModuleParent = module_last_parent_private_symbol;
const kIsMainSymbol = Symbol('kIsMainSymbol');
const kIsCachedByESMLoader = Symbol('kIsCachedByESMLoader');
@ -117,6 +122,7 @@ module.exports = {
findLongestRegisteredExtension,
resolveForCJSWithHooks,
loadSourceForCJSWithHooks: loadSource,
populateCJSExportsFromESM,
wrapSafe,
wrapModuleLoad,
kIsMainSymbol,
@ -326,7 +332,8 @@ function Module(id = '', parent) {
this.id = id;
this.path = path.dirname(id);
setOwnProperty(this, 'exports', {});
this[kModuleParent] = parent;
this[kFirstModuleParent] = parent;
this[kLastModuleParent] = parent;
updateChildren(parent, this, false);
this.filename = null;
this.loaded = false;
@ -408,7 +415,7 @@ ObjectDefineProperty(BuiltinModule.prototype, 'isPreloading', isPreloadingDesc);
* @returns {object}
*/
function getModuleParent() {
return this[kModuleParent];
return this[kFirstModuleParent];
}
/**
@ -418,7 +425,7 @@ function getModuleParent() {
* @returns {void}
*/
function setModuleParent(value) {
this[kModuleParent] = value;
this[kFirstModuleParent] = value;
}
let debug = debuglog('module', (fn) => {
@ -997,7 +1004,7 @@ function getExportsForCircularRequire(module) {
const requiredESM = module[kRequiredModuleSymbol];
if (requiredESM && requiredESM.getStatus() !== kEvaluated) {
let message = `Cannot require() ES Module ${module.id} in a cycle.`;
const parent = module[kModuleParent];
const parent = module[kLastModuleParent];
if (parent) {
message += ` (from ${parent.filename})`;
}
@ -1278,6 +1285,8 @@ Module._load = function(request, parent, isMain) {
// load hooks for the module keyed by the (potentially customized) filename.
module[kURL] = url;
module[kFormat] = format;
} else {
module[kLastModuleParent] = parent;
}
if (parent !== undefined) {
@ -1397,7 +1406,8 @@ Module._resolveFilename = function(request, parent, isMain, options) {
const requireStack = [];
for (let cursor = parent;
cursor;
cursor = cursor[kModuleParent]) {
// TODO(joyeecheung): it makes more sense to use kLastModuleParent here.
cursor = cursor[kFirstModuleParent]) {
ArrayPrototypePush(requireStack, cursor.filename || cursor.id);
}
let message = `Cannot find module '${request}'`;
@ -1514,7 +1524,7 @@ function loadESMFromCJS(mod, filename, format, source) {
// ESM won't be accessible via process.mainModule.
setOwnProperty(process, 'mainModule', undefined);
} else {
const parent = mod[kModuleParent];
const parent = mod[kLastModuleParent];
requireModuleWarningMode ??= getOptionValue('--trace-require-module');
if (requireModuleWarningMode) {
@ -1564,54 +1574,66 @@ function loadESMFromCJS(mod, filename, format, source) {
wrap,
namespace,
} = cascadedLoader.importSyncForRequire(mod, filename, source, isMain, parent);
// Tooling in the ecosystem have been using the __esModule property to recognize
// transpiled ESM in consuming code. For example, a 'log' package written in ESM:
//
// export default function log(val) { console.log(val); }
//
// Can be transpiled as:
//
// exports.__esModule = true;
// exports.default = function log(val) { console.log(val); }
//
// The consuming code may be written like this in ESM:
//
// import log from 'log'
//
// Which gets transpiled to:
//
// const _mod = require('log');
// const log = _mod.__esModule ? _mod.default : _mod;
//
// So to allow transpiled consuming code to recognize require()'d real ESM
// as ESM and pick up the default exports, we add a __esModule property by
// building a source text module facade for any module that has a default
// export and add .__esModule = true to the exports. This maintains the
// enumerability of the re-exported names and the live binding of the exports,
// without incurring a non-trivial per-access overhead on the exports.
//
// The source of the facade is defined as a constant per-isolate property
// required_module_default_facade_source_string, which looks like this
//
// export * from 'original';
// export { default } from 'original';
// export const __esModule = true;
//
// And the 'original' module request is always resolved by
// createRequiredModuleFacade() to `wrap` which is a ModuleWrap wrapping
// over the original module.
// We don't do this to modules that are marked as CJS ESM or that
// don't have default exports to avoid the unnecessary overhead.
// If __esModule is already defined, we will also skip the extension
// to allow users to override it.
if (ObjectHasOwn(namespace, 'module.exports')) {
mod.exports = namespace['module.exports'];
} else if (!ObjectHasOwn(namespace, 'default') || ObjectHasOwn(namespace, '__esModule')) {
mod.exports = namespace;
} else {
mod.exports = createRequiredModuleFacade(wrap);
}
populateCJSExportsFromESM(mod, wrap, namespace);
}
}
/**
* Populate the exports of a CJS module entry from an ESM module's namespace object for
* require(esm).
* @param {Module} mod CJS module instance
* @param {ModuleWrap} wrap ESM ModuleWrap instance.
* @param {object} namespace The ESM namespace object.
*/
function populateCJSExportsFromESM(mod, wrap, namespace) {
// Tooling in the ecosystem have been using the __esModule property to recognize
// transpiled ESM in consuming code. For example, a 'log' package written in ESM:
//
// export default function log(val) { console.log(val); }
//
// Can be transpiled as:
//
// exports.__esModule = true;
// exports.default = function log(val) { console.log(val); }
//
// The consuming code may be written like this in ESM:
//
// import log from 'log'
//
// Which gets transpiled to:
//
// const _mod = require('log');
// const log = _mod.__esModule ? _mod.default : _mod;
//
// So to allow transpiled consuming code to recognize require()'d real ESM
// as ESM and pick up the default exports, we add a __esModule property by
// building a source text module facade for any module that has a default
// export and add .__esModule = true to the exports. This maintains the
// enumerability of the re-exported names and the live binding of the exports,
// without incurring a non-trivial per-access overhead on the exports.
//
// The source of the facade is defined as a constant per-isolate property
// required_module_default_facade_source_string, which looks like this
//
// export * from 'original';
// export { default } from 'original';
// export const __esModule = true;
//
// And the 'original' module request is always resolved by
// createRequiredModuleFacade() to `wrap` which is a ModuleWrap wrapping
// over the original module.
// We don't do this to modules that are marked as CJS ESM or that
// don't have default exports to avoid the unnecessary overhead.
// If __esModule is already defined, we will also skip the extension
// to allow users to override it.
if (ObjectHasOwn(namespace, 'module.exports')) {
mod.exports = namespace['module.exports'];
} else if (!ObjectHasOwn(namespace, 'default') || ObjectHasOwn(namespace, '__esModule')) {
mod.exports = namespace;
} else {
mod.exports = createRequiredModuleFacade(wrap);
}
}
@ -1804,7 +1826,7 @@ function reconstructErrorStack(err, parentPath, parentSource) {
*/
function getRequireESMError(mod, pkg, content, filename) {
// This is an error path because `require` of a `.js` file in a `"type": "module"` scope is not allowed.
const parent = mod[kModuleParent];
const parent = mod[kFirstModuleParent];
const parentPath = parent?.filename;
const packageJsonPath = pkg?.path;
const usesEsm = containsModuleSyntax(content, filename);

View File

@ -17,6 +17,7 @@ const {
const {
kIsExecuting,
kRequiredModuleSymbol,
Module: CJSModule,
} = require('internal/modules/cjs/loader');
const { imported_cjs_symbol } = internalBinding('symbols');
@ -91,13 +92,18 @@ function newLoadCache() {
return new LoadCache();
}
let _translators;
function lazyLoadTranslators() {
_translators ??= require('internal/modules/esm/translators');
return _translators;
}
/**
* Lazy-load translators to avoid potentially unnecessary work at startup (ex if ESM is not used).
* @returns {import('./translators.js').Translators}
*/
function getTranslators() {
const { translators } = require('internal/modules/esm/translators');
return translators;
return lazyLoadTranslators().translators;
}
/**
@ -506,7 +512,7 @@ class ModuleLoader {
const { source } = loadResult;
const isMain = (parentURL === undefined);
const wrap = this.#translate(url, finalFormat, source, isMain);
const wrap = this.#translate(url, finalFormat, source, parentURL);
assert(wrap instanceof ModuleWrap, `Translator used for require(${url}) should not be async`);
if (process.env.WATCH_REPORT_DEPENDENCIES && process.send) {
@ -542,10 +548,10 @@ class ModuleLoader {
* @param {string} format Format of the module to be translated. This is used to find
* matching translators.
* @param {ModuleSource} source Source of the module to be translated.
* @param {boolean} isMain Whether the module to be translated is the entry point.
* @param {string|undefined} parentURL URL of the parent module. Undefined if it's the entry point.
* @returns {ModuleWrap}
*/
#translate(url, format, source, isMain) {
#translate(url, format, source, parentURL) {
this.validateLoadResult(url, format);
const translator = getTranslators().get(format);
@ -553,7 +559,20 @@ class ModuleLoader {
throw new ERR_UNKNOWN_MODULE_FORMAT(format, url);
}
const result = FunctionPrototypeCall(translator, this, url, source, isMain);
// Populate the CJS cache with a facade for ESM in case subsequent require(esm) is
// looking it up from the cache. The parent module of the CJS cache entry would be the
// first CJS module that loads it with require(). This is an approximation, because
// ESM caches more and it does not get re-loaded and updated every time an `import` is
// encountered, unlike CJS require(), and we only use the parent entry to provide
// more information in error messages.
if (format === 'module') {
const parentFilename = urlToFilename(parentURL);
const parent = parentFilename ? CJSModule._cache[parentFilename] : undefined;
const cjsModule = lazyLoadTranslators().cjsEmplaceModuleCacheEntryForURL(url, parent);
debug('cjsEmplaceModuleCacheEntryForURL', url, parent, cjsModule);
}
const result = FunctionPrototypeCall(translator, this, url, source, parentURL === undefined);
assert(result instanceof ModuleWrap);
return result;
}
@ -563,10 +582,10 @@ class ModuleLoader {
* This is run synchronously, and the translator always return a ModuleWrap synchronously.
* @param {string} url URL of the module to be translated.
* @param {object} loadContext See {@link load}
* @param {boolean} isMain Whether the module to be translated is the entry point.
* @param {string|undefined} parentURL URL of the parent module. Undefined if it's the entry point.
* @returns {ModuleWrap}
*/
loadAndTranslateForRequireInImportedCJS(url, loadContext, isMain) {
loadAndTranslateForRequireInImportedCJS(url, loadContext, parentURL) {
const { format: formatFromLoad, source } = this.#loadSync(url, loadContext);
if (formatFromLoad === 'wasm') { // require(wasm) is not supported.
@ -587,7 +606,7 @@ class ModuleLoader {
finalFormat = 'require-commonjs-typescript';
}
const wrap = this.#translate(url, finalFormat, source, isMain);
const wrap = this.#translate(url, finalFormat, source, parentURL);
assert(wrap instanceof ModuleWrap, `Translator used for require(${url}) should not be async`);
return wrap;
}
@ -597,13 +616,13 @@ class ModuleLoader {
* This may be run asynchronously if there are asynchronous module loader hooks registered.
* @param {string} url URL of the module to be translated.
* @param {object} loadContext See {@link load}
* @param {boolean} isMain Whether the module to be translated is the entry point.
* @param {string|undefined} parentURL URL of the parent module. Undefined if it's the entry point.
* @returns {Promise<ModuleWrap>|ModuleWrap}
*/
loadAndTranslate(url, loadContext, isMain) {
loadAndTranslate(url, loadContext, parentURL) {
const maybePromise = this.load(url, loadContext);
const afterLoad = ({ format, source }) => {
return this.#translate(url, format, source, isMain);
return this.#translate(url, format, source, parentURL);
};
if (isPromise(maybePromise)) {
return maybePromise.then(afterLoad);
@ -630,9 +649,9 @@ class ModuleLoader {
const isMain = parentURL === undefined;
let moduleOrModulePromise;
if (isForRequireInImportedCJS) {
moduleOrModulePromise = this.loadAndTranslateForRequireInImportedCJS(url, context, isMain);
moduleOrModulePromise = this.loadAndTranslateForRequireInImportedCJS(url, context, parentURL);
} else {
moduleOrModulePromise = this.loadAndTranslate(url, context, isMain);
moduleOrModulePromise = this.loadAndTranslate(url, context, parentURL);
}
const inspectBrk = (

View File

@ -44,6 +44,7 @@ const {
findLongestRegisteredExtension,
resolveForCJSWithHooks,
loadSourceForCJSWithHooks,
populateCJSExportsFromESM,
} = require('internal/modules/cjs/loader');
const { fileURLToPath, pathToFileURL, URL } = require('internal/url');
let debug = require('internal/util/debuglog').debuglog('esm', (fn) => {
@ -148,9 +149,19 @@ function loadCJSModule(module, source, url, filename, isMain) {
}
specifier = `${pathToFileURL(path)}`;
}
// FIXME(node:59666) Currently, the ESM loader re-invents require() here for imported CJS and this
// requires a separate cache to be populated as well as introducing several quirks. This is not ideal.
const job = cascadedLoader.getModuleJobForRequireInImportedCJS(specifier, url, importAttributes);
job.runSync();
return cjsCache.get(job.url).exports;
const mod = cjsCache.get(job.url);
assert(mod, `Imported CJS module ${url} failed to load module ${job.url} using require()`);
if (!job.module.synthetic && !mod.loaded) {
debug('populateCJSExportsFromESM from require(esm) in imported CJS', url, mod, job.module);
populateCJSExportsFromESM(mod, job.module, job.module.getNamespace());
mod.loaded = true;
}
return mod.exports;
};
setOwnProperty(requireFn, 'resolve', function resolve(specifier) {
if (!StringPrototypeStartsWith(specifier, 'node:')) {
@ -171,6 +182,7 @@ function loadCJSModule(module, source, url, filename, isMain) {
// TODO: can we use a weak map instead?
const cjsCache = new SafeMap();
/**
* Creates a ModuleWrap object for a CommonJS module.
* @param {string} url - The URL of the module.
@ -329,16 +341,17 @@ translators.set('commonjs', function commonjsStrategy(url, source, isMain) {
/**
* Get or create an entry in the CJS module cache for the given filename.
* @param {string} filename CJS module filename
* @param {CJSModule} parent The parent CJS module
* @returns {CJSModule} the cached CJS module entry
*/
function cjsEmplaceModuleCacheEntry(filename, exportNames) {
function cjsEmplaceModuleCacheEntry(filename, parent) {
// TODO: Do we want to keep hitting the user mutable CJS loader here?
let cjsMod = CJSModule._cache[filename];
if (cjsMod) {
return cjsMod;
}
cjsMod = new CJSModule(filename);
cjsMod = new CJSModule(filename, parent);
cjsMod.filename = filename;
cjsMod.paths = CJSModule._nodeModulePaths(cjsMod.path);
cjsMod[kIsCachedByESMLoader] = true;
@ -347,6 +360,22 @@ function cjsEmplaceModuleCacheEntry(filename, exportNames) {
return cjsMod;
}
/**
* Emplace a CJS module cache entry for the given URL.
* @param {string} url The module URL
* @param {CJSModule} parent The parent CJS module
* @returns {CJSModule|undefined} the cached CJS module entry, undefined if url cannot be used to identify a CJS entry.
*/
exports.cjsEmplaceModuleCacheEntryForURL = function cjsEmplaceModuleCacheEntryForURL(url, parent) {
const filename = urlToFilename(url);
if (!filename) {
return;
}
const cjsModule = cjsEmplaceModuleCacheEntry(filename, parent);
cjsCache.set(url, cjsModule);
return cjsModule;
};
/**
* Pre-parses a CommonJS module's exports and re-exports.
* @param {string} filename - The filename of the module.

View File

@ -22,7 +22,7 @@ const { validateString } = require('internal/validators');
const fs = require('fs'); // Import all of `fs` so that it can be monkey-patched.
const internalFS = require('internal/fs/utils');
const path = require('path');
const { pathToFileURL, fileURLToPath } = require('internal/url');
const { pathToFileURL, fileURLToPath, URL } = require('internal/url');
const assert = require('internal/assert');
const { getOptionValue } = require('internal/options');
@ -295,13 +295,32 @@ function normalizeReferrerURL(referrerName) {
}
/**
* Coerce a URL string to a filename. This is used by the ESM loader
* to map ESM URLs to entries in the CJS module cache on a best-effort basis.
* TODO(joyeecheung): this can be rather expensive, cache the result on the
* ModuleWrap wherever we can.
* @param {string|undefined} url URL to convert to filename
* @returns {string|undefined}
*/
function urlToFilename(url) {
if (url && StringPrototypeStartsWith(url, 'file://')) {
return fileURLToPath(url);
let urlObj;
try {
urlObj = new URL(url);
} catch {
// Not a proper URL, return as-is as the cache key.
return url;
}
try {
return fileURLToPath(urlObj);
} catch {
// This is generally only possible when the URL is provided by a custom loader.
// Just use the path and ignore whether it's absolute or not as there's no such
// requirement for CJS cache.
return urlObj.pathname;
}
}
// Not a file URL, return as-is.
return url;
}

View File

@ -30,7 +30,8 @@
V(module_export_names_private_symbol, "node:module_export_names") \
V(module_circular_visited_private_symbol, "node:module_circular_visited") \
V(module_export_private_symbol, "node:module_export") \
V(module_parent_private_symbol, "node:module_parent") \
V(module_first_parent_private_symbol, "node:module_first_parent") \
V(module_last_parent_private_symbol, "node:module_last_parent") \
V(napi_type_tag, "node:napi:type_tag") \
V(napi_wrapper, "node:napi:wrapper") \
V(untransferable_object_private_symbol, "node:untransferableObject") \
@ -409,6 +410,7 @@
V(stream_count_string, "streamCount") \
V(subject_string, "subject") \
V(subjectaltname_string, "subjectaltname") \
V(synthetic_string, "synthetic") \
V(syscall_string, "syscall") \
V(table_string, "table") \
V(target_string, "target") \

View File

@ -438,6 +438,13 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
}
}
if (that->Set(context,
realm->isolate_data()->synthetic_string(),
Boolean::New(isolate, synthetic))
.IsNothing()) {
return;
}
if (!that->Set(context, realm->isolate_data()->url_string(), url)
.FromMaybe(false)) {
return;

View File

@ -0,0 +1,36 @@
'use strict';
// This tests that the require(esm) works from an imported CJS module
// when the require-d ESM is cached separately.
require('../common');
const { spawnSyncAndAssert } = require('../common/child_process');
const fixtures = require('../common/fixtures');
spawnSyncAndAssert(
process.execPath,
[
'--experimental-require-module',
'--import',
fixtures.fileURL('es-modules', 'require-esm-in-cjs-cache', 'instrument-sync.js'),
fixtures.path('es-modules', 'require-esm-in-cjs-cache', 'app.cjs'),
],
{
trim: true,
stdout: / default: { hello: 'world' }/
}
);
spawnSyncAndAssert(
process.execPath,
[
'--experimental-require-module',
'--import',
fixtures.fileURL('es-modules', 'require-esm-in-cjs-cache', 'instrument.js'),
fixtures.path('es-modules', 'require-esm-in-cjs-cache', 'app.cjs'),
],
{
trim: true,
stdout: / default: { hello: 'world' }/
}
);

View File

@ -0,0 +1 @@
console.log(require('./test.js'));

View File

@ -0,0 +1,12 @@
import { readFileSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
export async function load(url, context, nextLoad) {
const result = await nextLoad(url, context);
if (result.format === 'commonjs' && !result.source) {
result.source = readFileSync(fileURLToPath(url), 'utf8');
}
return result;
}

View File

@ -0,0 +1,7 @@
import * as mod from 'node:module';
mod.registerHooks({
load(url, context, nextLoad) {
return nextLoad(url, context);
},
});

View File

@ -0,0 +1,3 @@
import * as mod from 'node:module';
mod.register(new URL('hooks.js', import.meta.url).toString());

View File

@ -0,0 +1,3 @@
{
"type": "module"
}

View File

@ -0,0 +1 @@
export default { hello: 'world' };