From 8df452888d2254b6f00afaeacf9b5b483b5107c7 Mon Sep 17 00:00:00 2001 From: Joyee Cheung Date: Wed, 21 Aug 2024 16:41:51 +0200 Subject: [PATCH] module: refator ESM loader for adding future synchronous hooks This lays the foundation for supporting synchronous hooks proposed in https://github.com/nodejs/loaders/pull/198 for ESM. In addition this corrects and adds several JSDoc comments for internal functions of the ESM loader, as well as explaining how require() for import CJS work in the special resolve/load paths. This doesn't consolidate it with import in require(esm) yet due to caching differences, which is left as a TODO. --- lib/internal/modules/esm/loader.js | 318 ++++++++++++++++-------- lib/internal/modules/esm/module_job.js | 17 +- lib/internal/modules/esm/translators.js | 37 +-- 3 files changed, 227 insertions(+), 145 deletions(-) diff --git a/lib/internal/modules/esm/loader.js b/lib/internal/modules/esm/loader.js index 1fac67191b87df..3d239f2ea5ab8b 100644 --- a/lib/internal/modules/esm/loader.js +++ b/lib/internal/modules/esm/loader.js @@ -206,13 +206,11 @@ class ModuleLoader { } async eval(source, url, isEntryPoint = false) { - const evalInstance = (url) => { - return compileSourceTextModule(url, source, this); - }; const { ModuleJob } = require('internal/modules/esm/module_job'); + const modulePromise = compileSourceTextModule(url, source, this); const module = await onImport.tracePromise(async () => { const job = new ModuleJob( - this, url, undefined, evalInstance, false, false); + this, url, undefined, modulePromise, false, false); this.loadCache.set(url, undefined, job); const { module } = await job.run(isEntryPoint); return module; @@ -230,40 +228,49 @@ class ModuleLoader { } /** - * Get a (possibly still pending) module job from the cache, - * or create one and return its Promise. - * @param {string} specifier The string after `from` in an `import` statement, - * or the first parameter of an `import()` - * expression - * @param {string | undefined} parentURL The URL of the module importing this - * one, unless this is the Node.js entry - * point. - * @param {Record} importAttributes Validations for the - * module import. - * @returns {Promise} The (possibly pending) module job + * Get a (possibly not yet fully linked) module job from the cache, or create one and return its Promise. + * @param {string} specifier The module request of the module to be resolved. Typically, what's + * requested by `import ''` or `import('')`. + * @param {string} [parentURL] The URL of the module where the module request is initiated. + * It's undefined if it's from the root module. + * @param {ImportAttributes} importAttributes Attributes from the import statement or expression. + * @returns {Promise} importAttributes Validations for the - * module import. - * @param {string} [parentURL] The absolute URL of the module importing this - * one, unless this is the Node.js entry point - * @param {string} [format] The format hint possibly returned by the - * `resolve` hook - * @returns {Promise} The (possibly pending) module job + * Translate a loaded module source into a ModuleWrap. This is run synchronously, + * but the translator may return the ModuleWrap in a Promise. + * @param {stirng} url URL of the module to be translated. + * @param {string} format Format of the module to be translated. This is used to find + * matching translators. + * @param {ModuleSource} source Source of the module to be translated. + * @param {boolean} isMain Whether the module to be translated is the entry point. + * @returns {ModuleWrap | Promise} */ - #createModuleJob(url, importAttributes, parentURL, format, sync) { - const callTranslator = ({ format: finalFormat, responseURL, source }, isMain) => { - const translator = getTranslators().get(finalFormat); + #translate(url, format, source, isMain) { + this.validateLoadResult(url, format); + const translator = getTranslators().get(format); - if (!translator) { - throw new ERR_UNKNOWN_MODULE_FORMAT(finalFormat, responseURL); + if (!translator) { + throw new ERR_UNKNOWN_MODULE_FORMAT(format, url); + } + + return FunctionPrototypeCall(translator, this, url, source, isMain); + } + + /** + * Load a module and translate it into a ModuleWrap for require() in imported CJS. + * This is run synchronously, and the translator always return a ModuleWrap synchronously. + * @param {string} url URL of the module to be translated. + * @param {object} loadContext See {@link load} + * @param {boolean} isMain Whether the module to be translated is the entry point. + * @returns {ModuleWrap} + */ + loadAndTranslateForRequireInImportedCJS(url, loadContext, isMain) { + const { format: formatFromLoad, source } = this.#loadSync(url, loadContext); + + if (formatFromLoad === 'wasm') { // require(wasm) is not supported. + throw new ERR_UNKNOWN_MODULE_FORMAT(formatFromLoad, url); + } + + if (formatFromLoad === 'module' || formatFromLoad === 'module-typescript') { + if (!getOptionValue('--experimental-require-module')) { + throw new ERR_REQUIRE_ESM(url, true); } + } - return FunctionPrototypeCall(translator, this, responseURL, source, isMain); - }; - const context = { format, importAttributes }; + let finalFormat = formatFromLoad; + if (formatFromLoad === 'commonjs') { + finalFormat = 'require-commonjs'; + } + if (formatFromLoad === 'commonjs-typescript') { + finalFormat = 'require-commonjs-typescript'; + } - const moduleProvider = sync ? - (url, isMain) => callTranslator(this.loadSync(url, context), isMain) : - async (url, isMain) => callTranslator(await this.load(url, context), isMain); + const wrap = this.#translate(url, finalFormat, source, isMain); + assert(wrap instanceof ModuleWrap, `Translator used for require(${url}) should not be async`); + return wrap; + } + + /** + * Load a module and translate it into a ModuleWrap for ordinary imported ESM. + * This is run asynchronously. + * @param {string} url URL of the module to be translated. + * @param {object} loadContext See {@link load} + * @param {boolean} isMain Whether the module to be translated is the entry point. + * @returns {Promise} + */ + async loadAndTranslate(url, loadContext, isMain) { + const { format, source } = await this.load(url, loadContext); + return this.#translate(url, format, source, isMain); + } + + /** + * Load a module and translate it into a ModuleWrap, and create a ModuleJob from it. + * This runs synchronously. If isForRequireInImportedCJS is true, the module should be linked + * by the time this returns. Otherwise it may still have pending module requests. + * @param {string} url The URL that was resolved for this module. + * @param {ImportAttributes} importAttributes See {@link getModuleJobForImport} + * @param {string} [parentURL] See {@link getModuleJobForImport} + * @param {string} [format] The format hint possibly returned by the `resolve` hook + * @param {boolean} isForRequireInImportedCJS Whether this module job is created for require() + * in imported CJS. + * @returns {ModuleJobBase} The (possibly pending) module job + */ + #createModuleJob(url, importAttributes, parentURL, format, isForRequireInImportedCJS) { + const context = { format, importAttributes }; const isMain = parentURL === undefined; + let moduleOrModulePromise; + if (isForRequireInImportedCJS) { + moduleOrModulePromise = this.loadAndTranslateForRequireInImportedCJS(url, context, isMain); + } else { + moduleOrModulePromise = this.loadAndTranslate(url, context, isMain); + } + const inspectBrk = ( isMain && getOptionValue('--inspect-brk') @@ -456,10 +515,10 @@ class ModuleLoader { this, url, importAttributes, - moduleProvider, + moduleOrModulePromise, isMain, inspectBrk, - sync, + isForRequireInImportedCJS, ); this.loadCache.set(url, importAttributes.type, job); @@ -478,7 +537,7 @@ class ModuleLoader { */ async import(specifier, parentURL, importAttributes, isEntryPoint = false) { return onImport.tracePromise(async () => { - const moduleJob = await this.getModuleJob(specifier, parentURL, importAttributes); + const moduleJob = await this.getModuleJobForImport(specifier, parentURL, importAttributes); const { module } = await moduleJob.run(isEntryPoint); return module.getNamespace(); }, { @@ -503,37 +562,70 @@ class ModuleLoader { } /** - * Resolve the location of the module. - * @param {string} originalSpecifier The specified URL path of the module to - * be resolved. - * @param {string} [parentURL] The URL path of the module's parent. - * @param {ImportAttributes} importAttributes Attributes from the import - * statement or expression. - * @returns {{ format: string, url: URL['href'] }} + * Resolve a module request to a URL identifying the location of the module. Handles customization hooks, + * if any. + * @param {string} specifier The module request of the module to be resolved. Typically, what's + * requested by `import ''` or `import('')`. + * @param {string} [parentURL] The URL of the module where the module request is initiated. + * It's undefined if it's from the root module. + * @param {ImportAttributes} importAttributes Attributes from the import statement or expression. + * @returns {Promise<{format: string, url: string}>} */ - resolve(originalSpecifier, parentURL, importAttributes) { - if (this.#customizations) { - return this.#customizations.resolve(originalSpecifier, parentURL, importAttributes); + resolve(specifier, parentURL, importAttributes) { + if (this.#customizations) { // Only has module.register hooks. + return this.#customizations.resolve(specifier, parentURL, importAttributes); } - const requestKey = this.#resolveCache.serializeKey(originalSpecifier, importAttributes); + return this.#cachedDefaultResolve(specifier, parentURL, importAttributes); + } + + /** + * Either return a cached resolution, or perform the default resolution which is synchronous, and + * cache the result. + * @param {string} specifier See {@link resolve}. + * @param {string} [parentURL] See {@link resolve}. + * @param {ImportAttributes} importAttributes See {@link resolve}. + * @returns {{ format: string, url: string }} + */ + #cachedDefaultResolve(specifier, parentURL, importAttributes) { + const requestKey = this.#resolveCache.serializeKey(specifier, importAttributes); const cachedResult = this.#resolveCache.get(requestKey, parentURL); if (cachedResult != null) { return cachedResult; } - const result = this.defaultResolve(originalSpecifier, parentURL, importAttributes); + const result = this.defaultResolve(specifier, parentURL, importAttributes); this.#resolveCache.set(requestKey, parentURL, result); return result; } /** - * Just like `resolve` except synchronous. This is here specifically to support - * `import.meta.resolve` which must happen synchronously. + * This is the default resolve step for future synchronous hooks, which incorporates asynchronous hooks + * from module.register() which are run in a blocking fashion for it to be synchronous. + * @param {string} specifier See {@link resolveSync}. + * @param {{ parentURL?: string, importAttributes: ImportAttributes}} context See {@link resolveSync}. + * @returns {{ format: string, url: string }} */ - resolveSync(originalSpecifier, parentURL, importAttributes) { + #resolveAndMaybeBlockOnLoaderThread(specifier, context) { if (this.#customizations) { - return this.#customizations.resolveSync(originalSpecifier, parentURL, importAttributes); + return this.#customizations.resolveSync(specifier, context.parentURL, context.importAttributes); } - return this.defaultResolve(originalSpecifier, parentURL, importAttributes); + return this.#cachedDefaultResolve(specifier, context.parentURL, context.importAttributes); + } + + /** + * Similar to {@link resolve}, but the results are always synchronously returned. If there are any + * asynchronous resolve hooks from module.register(), it will block until the results are returned + * from the loader thread for this to be synchornous. + * This is here to support `import.meta.resolve()`, `require()` in imported CJS, and + * future synchronous hooks. + * + * TODO(joyeecheung): consolidate the cache behavior and use this in require(esm). + * @param {string} specifier See {@link resolve}. + * @param {string} [parentURL] See {@link resolve}. + * @param {ImportAttributes} [importAttributes] See {@link resolve}. + * @returns {{ format: string, url: string }} + */ + resolveSync(specifier, parentURL, importAttributes = { __proto__: null }) { + return this.#resolveAndMaybeBlockOnLoaderThread(specifier, { parentURL, importAttributes }); } /** @@ -555,41 +647,49 @@ class ModuleLoader { } /** - * Provide source that is understood by one of Node's translators. - * @param {URL['href']} url The URL/path of the module to be loaded - * @param {object} [context] Metadata about the module + * Provide source that is understood by one of Node's translators. Handles customization hooks, + * if any. + * @param {string} url The URL of the module to be loaded. + * @param {object} context Metadata about the module * @returns {Promise<{ format: ModuleFormat, source: ModuleSource }>} */ async load(url, context) { + if (this.#customizations) { + return this.#customizations.load(url, context); + } + defaultLoad ??= require('internal/modules/esm/load').defaultLoad; - const result = this.#customizations ? - await this.#customizations.load(url, context) : - await defaultLoad(url, context); - this.validateLoadResult(url, result?.format); - return result; + return defaultLoad(url, context); } - loadSync(url, context) { - defaultLoadSync ??= require('internal/modules/esm/load').defaultLoadSync; - - let result = this.#customizations ? - this.#customizations.loadSync(url, context) : - defaultLoadSync(url, context); - let format = result?.format; - if (format === 'module' || format === 'module-typescript') { - throw new ERR_REQUIRE_ESM(url, true); - } - if (format === 'commonjs') { - format = 'require-commonjs'; - result = { __proto__: result, format }; - } - if (format === 'commonjs-typescript') { - format = 'require-commonjs-typescript'; - result = { __proto__: result, format }; + /** + * This is the default load step for future synchronous hooks, which incorporates asynchronous hooks + * from module.register() which are run in a blocking fashion for it to be synchronous. + * @param {string} url See {@link load} + * @param {object} context See {@link load} + * @returns {{ format: ModuleFormat, source: ModuleSource }} + */ + #loadAndMaybeBlockOnLoaderThread(url, context) { + if (this.#customizations) { + return this.#customizations.loadSync(url, context); } + defaultLoadSync ??= require('internal/modules/esm/load').defaultLoadSync; + return defaultLoadSync(url, context); + } - this.validateLoadResult(url, format); - return result; + /** + * Similar to {@link load} but this is always run synchronously. If there are asynchronous hooks + * from module.register(), this blocks on the loader thread for it to return synchronously. + * + * This is here to support `require()` in imported CJS and future synchronous hooks. + * + * TODO(joyeecheung): consolidate the cache behavior and use this in require(esm). + * @param {string} url See {@link load} + * @param {object} [context] See {@link load} + * @returns {{ format: ModuleFormat, source: ModuleSource }} + */ + #loadSync(url, context) { + return this.#loadAndMaybeBlockOnLoaderThread(url, context); } validateLoadResult(url, format) { diff --git a/lib/internal/modules/esm/module_job.js b/lib/internal/modules/esm/module_job.js index 2f42909e0c6f82..c3eaf9a380a110 100644 --- a/lib/internal/modules/esm/module_job.js +++ b/lib/internal/modules/esm/module_job.js @@ -8,7 +8,6 @@ const { ObjectSetPrototypeOf, PromisePrototypeThen, PromiseResolve, - ReflectApply, RegExpPrototypeExec, RegExpPrototypeSymbolReplace, SafePromiseAllReturnArrayLike, @@ -72,19 +71,17 @@ class ModuleJob extends ModuleJobBase { #loader = null; // `loader` is the Loader instance used for loading dependencies. constructor(loader, url, importAttributes = { __proto__: null }, - moduleProvider, isMain, inspectBrk, sync = false) { - const modulePromise = ReflectApply(moduleProvider, loader, [url, isMain]); - super(url, importAttributes, modulePromise, isMain, inspectBrk); + moduleOrModulePromise, isMain, inspectBrk, sync = false) { + super(url, importAttributes, moduleOrModulePromise, isMain, inspectBrk); this.#loader = loader; + // Expose the promise to the ModuleWrap directly for linking below. // `this.module` is also filled in below. - this.modulePromise = modulePromise; - if (sync) { - this.module = this.modulePromise; + this.module = moduleOrModulePromise; this.modulePromise = PromiseResolve(this.module); } else { - this.modulePromise = PromiseResolve(this.modulePromise); + this.modulePromise = moduleOrModulePromise; } // Promise for the list of all dependencyJobs. @@ -123,7 +120,9 @@ class ModuleJob extends ModuleJobBase { for (let idx = 0; idx < moduleRequests.length; idx++) { const { specifier, attributes } = moduleRequests[idx]; - const dependencyJobPromise = this.#loader.getModuleJob( + // TODO(joyeecheung): the resolution and loading should be done in BFS + // order to maximize parallelism. + const dependencyJobPromise = this.#loader.getModuleJobForImport( specifier, this.url, attributes, ); const modulePromise = PromisePrototypeThen(dependencyJobPromise, (job) => { diff --git a/lib/internal/modules/esm/translators.js b/lib/internal/modules/esm/translators.js index b1e7b86095c37e..e7cd0be7a91785 100644 --- a/lib/internal/modules/esm/translators.js +++ b/lib/internal/modules/esm/translators.js @@ -68,28 +68,11 @@ function getSource(url) { /** @type {import('deps/cjs-module-lexer/lexer.js').parse} */ let cjsParse; /** - * Initializes the CommonJS module lexer parser. - * If WebAssembly is available, it uses the optimized version from the dist folder. - * Otherwise, it falls back to the JavaScript version from the lexer folder. + * Initializes the CommonJS module lexer parser using the JavaScript version. + * TODO(joyeecheung): Use `require('internal/deps/cjs-module-lexer/dist/lexer').initSync()` + * when cjs-module-lexer 1.4.0 is rolled in. */ -async function initCJSParse() { - if (typeof WebAssembly === 'undefined') { - initCJSParseSync(); - } else { - const { parse, init } = - require('internal/deps/cjs-module-lexer/dist/lexer'); - try { - await init(); - cjsParse = parse; - } catch { - initCJSParseSync(); - } - } -} - function initCJSParseSync() { - // TODO(joyeecheung): implement a binding that directly compiles using - // v8::WasmModuleObject::Compile() synchronously. if (cjsParse === undefined) { cjsParse = require('internal/deps/cjs-module-lexer/lexer').parse; } @@ -159,7 +142,7 @@ function loadCJSModule(module, source, url, filename, isMain) { } specifier = `${pathToFileURL(path)}`; } - const job = cascadedLoader.getModuleJobSync(specifier, url, importAttributes); + const job = cascadedLoader.getModuleJobForRequireInImportedCJS(specifier, url, importAttributes); job.runSync(); return cjsCache.get(job.url).exports; }; @@ -252,6 +235,7 @@ translators.set('commonjs-sync', function requireCommonJS(url, source, isMain) { // Handle CommonJS modules referenced by `require` calls. // This translator function must be sync, as `require` is sync. translators.set('require-commonjs', (url, source, isMain) => { + initCJSParseSync(); assert(cjsParse); return createCJSModuleWrap(url, source); @@ -268,10 +252,9 @@ translators.set('require-commonjs-typescript', (url, source, isMain) => { // Handle CommonJS modules referenced by `import` statements or expressions, // or as the initial entry point when the ESM loader handles a CommonJS entry. -translators.set('commonjs', async function commonjsStrategy(url, source, - isMain) { +translators.set('commonjs', function commonjsStrategy(url, source, isMain) { if (!cjsParse) { - await initCJSParse(); + initCJSParseSync(); } // For backward-compatibility, it's possible to return a nullish value for @@ -289,7 +272,6 @@ translators.set('commonjs', async function commonjsStrategy(url, source, // Continue regardless of error. } return createCJSModuleWrap(url, source, isMain, cjsLoader); - }); /** @@ -450,8 +432,9 @@ translators.set('wasm', async function(url, source) { let compiled; try { - // TODO(joyeecheung): implement a binding that directly compiles using - // v8::WasmModuleObject::Compile() synchronously. + // TODO(joyeecheung): implement a translator that just uses + // compiled = new WebAssembly.Module(source) to compile it + // synchronously. compiled = await WebAssembly.compile(source); } catch (err) { err.message = errPath(url) + ': ' + err.message;