From 1feab5c05ff31980f965fea5898c382ed1551019 Mon Sep 17 00:00:00 2001 From: userquin Date: Sat, 28 Oct 2023 15:50:48 +0200 Subject: [PATCH 1/4] feat(node): add duplicated ids and bad anchor links detection --- package.json | 1 + pnpm-lock.yaml | 7 ++ src/node/build/build.ts | 2 + src/node/build/checkAnchorRefs.ts | 113 ++++++++++++++++++++++++++++++ 4 files changed, 123 insertions(+) create mode 100644 src/node/build/checkAnchorRefs.ts diff --git a/package.json b/package.json index 2bda2e8af04e..f4b6d365fa08 100644 --- a/package.json +++ b/package.json @@ -192,6 +192,7 @@ "sitemap": "^7.1.1", "supports-color": "^9.4.0", "typescript": "^5.2.2", + "ultrahtml": "^1.5.2", "vitest": "^0.34.6", "vue-tsc": "^1.8.19", "wait-on": "^7.0.1" diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 0013817ca35f..603d65f51dcb 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -279,6 +279,9 @@ importers: typescript: specifier: ^5.2.2 version: 5.2.2 + ultrahtml: + specifier: ^1.5.2 + version: 1.5.2 vitest: specifier: ^0.34.6 version: 0.34.6(supports-color@9.4.0) @@ -4623,6 +4626,10 @@ packages: dev: true optional: true + /ultrahtml@1.5.2: + resolution: {integrity: sha512-qh4mBffhlkiXwDAOxvSGxhL0QEQsTbnP9BozOK3OYPEGvPvdWzvAUaXNtUSMdNsKDtuyjEbyVUPFZ52SSLhLqw==} + dev: true + /unbox-primitive@1.0.2: resolution: {integrity: sha512-61pPlCD9h51VoreyJ0BReideM3MDKMKnh6+V9L08331ipq6Q8OFXZYiqP6n/tbHx4s5I9uRhcye6BrbkizkBDw==} dependencies: diff --git a/src/node/build/build.ts b/src/node/build/build.ts index b5c5097850b8..ecf7c81694d5 100644 --- a/src/node/build/build.ts +++ b/src/node/build/build.ts @@ -13,6 +13,7 @@ import { task } from '../utils/task' import { bundle } from './bundle' import { generateSitemap } from './generateSitemap' import { renderPage } from './render' +import { checkIdsAndAnchorHrefs } from './checkAnchorRefs' export async function build( root?: string, @@ -137,6 +138,7 @@ export async function build( if (!process.env.DEBUG) await rimraf(siteConfig.tempDir) } + await checkIdsAndAnchorHrefs(siteConfig) await generateSitemap(siteConfig) await siteConfig.buildEnd?.(siteConfig) diff --git a/src/node/build/checkAnchorRefs.ts b/src/node/build/checkAnchorRefs.ts new file mode 100644 index 000000000000..ad4ca487e1f7 --- /dev/null +++ b/src/node/build/checkAnchorRefs.ts @@ -0,0 +1,113 @@ +import type { SiteConfig } from '../config' +import fg from 'fast-glob' +import { task } from '../utils/task' +import fs from 'fs-extra' +import { parse, walkSync, ELEMENT_NODE } from 'ultrahtml' +import { dirname, join, resolve } from 'path' + +export async function checkIdsAndAnchorHrefs(siteConfig: SiteConfig) { + await task('checking for duplicate ids and bad anchor hrefs', async () => { + for await (const error of collectErrors(siteConfig)) { + // TODO: use picocolors here + console.error(error) + } + }) +} + +/* exporting this function for testing purposes */ +export async function* collectErrors(siteConfig: SiteConfig) { + const outDir = siteConfig.outDir + const files = new Set( + siteConfig.pages.map((page) => + `${siteConfig.rewrites.map[page] || page}` + .replace(/\\/g, '/') + .replace(/\.md$/, '.html') + ) + ) + // add public html files to the list: i.e. VP docs has public/pure.html + for await (const entry of fg.stream('*.html', { + cwd: outDir, + deep: 1 + })) { + files.add(entry.toString().replace(/\\/g, '/')) + } + const checkHtmlExt = siteConfig.site.cleanUrls === false + const stream = fg.stream('**/*.html', { + cwd: siteConfig.outDir + }) + for await (const entry of stream) { + const localLinks = new Set() + const localIds = new Set() + const localErrors: string[] = [] + const content = parse( + await fs.promises.readFile(resolve(outDir, entry.toString()), 'utf8') + ) + // collect id headings and href anchors + walkSync(content, (node) => { + if (node.type === ELEMENT_NODE) { + const id = node.attributes.id + if (id) { + if (localIds.has(id)) localErrors.push(`duplicate id="${id}"`) + else localIds.add(id) + } + if (node.name.toLowerCase() === 'a') { + const href = node.attributes.href + if ( + !href || + href.startsWith('http://') || + href.startsWith('https://') + ) + return + localLinks.add(href) + } + } + }) + // check for local hrefs and external links + for (const href of localLinks) { + // 1) check for local heading ids + if (href[0] === '#') { + const id = href.slice(1) + if (!localIds.has(id)) + localErrors.push(`missing local id for "${href}"`) + + continue + } + // 2) check for external links + // Remove parameters and hash + let localLink = href.split(/[#?]/).shift() + if (!localLink) continue + + // Append .html + if (checkHtmlExt) { + if (!localLink.endsWith('/')) { + localLink += 'index.html' + } + if (!localLink.endsWith('.html')) { + localErrors.push(`bad href link "${href}"`) + continue + } + } else { + if (localLink === '/') localLink = '/index.html' + if (!localLink.endsWith('.html')) localLink += '.html' + } + // Get absolute link + if (localLink.startsWith('.')) { + localLink = + '/' + join(dirname(entry.toString()), localLink).replace(/\\/g, '/') + } + if (!localLink.startsWith('/')) { + localErrors.push(`bad href link "${href}"`) + continue + } + localLink = localLink.slice(1) + if (!localLink) localLink = 'index.html' + + // Check if target html page exists + if (!files.has(localLink)) { + localErrors.push(`bad href link "${href}" (missing file)`) + } + } + if (localErrors.length) + yield `\n${entry}\n${localErrors.map((e) => `\t${e}`).join('\n')}` + } +} From bbb229cdc4d8257845bdf0d80a1156a34bda0fe9 Mon Sep 17 00:00:00 2001 From: userquin Date: Sat, 28 Oct 2023 16:00:29 +0200 Subject: [PATCH 2/4] chore: rename module --- src/node/build/build.ts | 2 +- .../build/{checkAnchorRefs.ts => checkIdsAndAnchorHrefs.ts} | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename src/node/build/{checkAnchorRefs.ts => checkIdsAndAnchorHrefs.ts} (97%) diff --git a/src/node/build/build.ts b/src/node/build/build.ts index ecf7c81694d5..83dc0ef9a525 100644 --- a/src/node/build/build.ts +++ b/src/node/build/build.ts @@ -13,7 +13,7 @@ import { task } from '../utils/task' import { bundle } from './bundle' import { generateSitemap } from './generateSitemap' import { renderPage } from './render' -import { checkIdsAndAnchorHrefs } from './checkAnchorRefs' +import { checkIdsAndAnchorHrefs } from './checkIdsAndAnchorHrefs' export async function build( root?: string, diff --git a/src/node/build/checkAnchorRefs.ts b/src/node/build/checkIdsAndAnchorHrefs.ts similarity index 97% rename from src/node/build/checkAnchorRefs.ts rename to src/node/build/checkIdsAndAnchorHrefs.ts index ad4ca487e1f7..7fb756acd730 100644 --- a/src/node/build/checkAnchorRefs.ts +++ b/src/node/build/checkIdsAndAnchorHrefs.ts @@ -42,7 +42,7 @@ export async function* collectErrors(siteConfig: SiteConfig) { const content = parse( await fs.promises.readFile(resolve(outDir, entry.toString()), 'utf8') ) - // collect id headings and href anchors + // collect ids and href anchors walkSync(content, (node) => { if (node.type === ELEMENT_NODE) { const id = node.attributes.id @@ -64,7 +64,7 @@ export async function* collectErrors(siteConfig: SiteConfig) { }) // check for local hrefs and external links for (const href of localLinks) { - // 1) check for local heading ids + // 1) check for local ids if (href[0] === '#') { const id = href.slice(1) if (!localIds.has(id)) From b4115a7a56be183914ad069a7b61400d11c4d263 Mon Sep 17 00:00:00 2001 From: userquin Date: Sat, 28 Oct 2023 16:07:00 +0200 Subject: [PATCH 3/4] chore: refactor module --- src/node/build/checkIdsAndAnchorHrefs.ts | 48 ++++++++++++------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/src/node/build/checkIdsAndAnchorHrefs.ts b/src/node/build/checkIdsAndAnchorHrefs.ts index 7fb756acd730..084e9d38107b 100644 --- a/src/node/build/checkIdsAndAnchorHrefs.ts +++ b/src/node/build/checkIdsAndAnchorHrefs.ts @@ -14,8 +14,8 @@ export async function checkIdsAndAnchorHrefs(siteConfig: SiteConfig) { }) } -/* exporting this function for testing purposes */ -export async function* collectErrors(siteConfig: SiteConfig) { +// TODO: export this function for testing purposes? +async function* collectErrors(siteConfig: SiteConfig) { const outDir = siteConfig.outDir const files = new Set( siteConfig.pages.map((page) => @@ -25,30 +25,30 @@ export async function* collectErrors(siteConfig: SiteConfig) { ) ) // add public html files to the list: i.e. VP docs has public/pure.html - for await (const entry of fg.stream('*.html', { + for await (const file of fg.stream('*.html', { cwd: outDir, deep: 1 })) { - files.add(entry.toString().replace(/\\/g, '/')) + files.add(file.toString().replace(/\\/g, '/')) } const checkHtmlExt = siteConfig.site.cleanUrls === false const stream = fg.stream('**/*.html', { cwd: siteConfig.outDir }) - for await (const entry of stream) { - const localLinks = new Set() - const localIds = new Set() - const localErrors: string[] = [] + for await (const file of stream) { + const links = new Set() + const ids = new Set() + const errors: string[] = [] const content = parse( - await fs.promises.readFile(resolve(outDir, entry.toString()), 'utf8') + await fs.promises.readFile(resolve(outDir, file.toString()), 'utf8') ) // collect ids and href anchors walkSync(content, (node) => { if (node.type === ELEMENT_NODE) { const id = node.attributes.id if (id) { - if (localIds.has(id)) localErrors.push(`duplicate id="${id}"`) - else localIds.add(id) + if (ids.has(id)) errors.push(`duplicate id "${id}"`) + else ids.add(id) } if (node.name.toLowerCase() === 'a') { const href = node.attributes.href @@ -58,17 +58,17 @@ export async function* collectErrors(siteConfig: SiteConfig) { href.startsWith('https://') ) return - localLinks.add(href) + + links.add(href) } } }) // check for local hrefs and external links - for (const href of localLinks) { + for (const href of links) { // 1) check for local ids if (href[0] === '#') { const id = href.slice(1) - if (!localIds.has(id)) - localErrors.push(`missing local id for "${href}"`) + if (!ids.has(id)) errors.push(`missing local id for "${href}"`) continue } @@ -79,11 +79,11 @@ export async function* collectErrors(siteConfig: SiteConfig) { // Append .html if (checkHtmlExt) { - if (!localLink.endsWith('/')) { + if (localLink[localLink.length - 1] !== '/') { localLink += 'index.html' } if (!localLink.endsWith('.html')) { - localErrors.push(`bad href link "${href}"`) + errors.push(`bad href link "${href}"`) continue } } else { @@ -91,12 +91,12 @@ export async function* collectErrors(siteConfig: SiteConfig) { if (!localLink.endsWith('.html')) localLink += '.html' } // Get absolute link - if (localLink.startsWith('.')) { + if (localLink[0] === '.') { localLink = - '/' + join(dirname(entry.toString()), localLink).replace(/\\/g, '/') + '/' + join(dirname(file.toString()), localLink).replace(/\\/g, '/') } - if (!localLink.startsWith('/')) { - localErrors.push(`bad href link "${href}"`) + if (localLink[0] !== '/') { + errors.push(`bad href link "${href}"`) continue } localLink = localLink.slice(1) @@ -104,10 +104,10 @@ export async function* collectErrors(siteConfig: SiteConfig) { // Check if target html page exists if (!files.has(localLink)) { - localErrors.push(`bad href link "${href}" (missing file)`) + errors.push(`bad href link "${href}" (missing file)`) } } - if (localErrors.length) - yield `\n${entry}\n${localErrors.map((e) => `\t${e}`).join('\n')}` + if (errors.length) + yield `\n${file}\n${errors.map((e) => ` - ${e}`).join('\n')}` } } From 5e3f0ba1c120e102c34889a55d91550d91821eec Mon Sep 17 00:00:00 2001 From: userquin Date: Sat, 28 Oct 2023 16:14:21 +0200 Subject: [PATCH 4/4] chore: . --- src/node/build/checkIdsAndAnchorHrefs.ts | 56 ++++++++++++------------ 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/src/node/build/checkIdsAndAnchorHrefs.ts b/src/node/build/checkIdsAndAnchorHrefs.ts index 084e9d38107b..03749aa33194 100644 --- a/src/node/build/checkIdsAndAnchorHrefs.ts +++ b/src/node/build/checkIdsAndAnchorHrefs.ts @@ -16,7 +16,7 @@ export async function checkIdsAndAnchorHrefs(siteConfig: SiteConfig) { // TODO: export this function for testing purposes? async function* collectErrors(siteConfig: SiteConfig) { - const outDir = siteConfig.outDir + const cwd = siteConfig.outDir const files = new Set( siteConfig.pages.map((page) => `${siteConfig.rewrites.map[page] || page}` @@ -26,50 +26,48 @@ async function* collectErrors(siteConfig: SiteConfig) { ) // add public html files to the list: i.e. VP docs has public/pure.html for await (const file of fg.stream('*.html', { - cwd: outDir, + cwd, deep: 1 })) { files.add(file.toString().replace(/\\/g, '/')) } const checkHtmlExt = siteConfig.site.cleanUrls === false - const stream = fg.stream('**/*.html', { - cwd: siteConfig.outDir - }) - for await (const file of stream) { + for await (const file of fg.stream('**/*.html', { + cwd + })) { const links = new Set() const ids = new Set() const errors: string[] = [] - const content = parse( - await fs.promises.readFile(resolve(outDir, file.toString()), 'utf8') - ) // collect ids and href anchors - walkSync(content, (node) => { - if (node.type === ELEMENT_NODE) { - const id = node.attributes.id - if (id) { - if (ids.has(id)) errors.push(`duplicate id "${id}"`) - else ids.add(id) - } - if (node.name.toLowerCase() === 'a') { - const href = node.attributes.href - if ( - !href || - href.startsWith('http://') || - href.startsWith('https://') - ) - return + walkSync( + parse(await fs.promises.readFile(resolve(cwd, file.toString()), 'utf8')), + (node) => { + if (node.type === ELEMENT_NODE) { + const id = node.attributes.id + if (id) { + if (ids.has(id)) errors.push(`duplicate id "${id}"`) + else ids.add(id) + } + if (node.name.toLowerCase() === 'a') { + const href = node.attributes.href + if ( + !href || + href.startsWith('http://') || + href.startsWith('https://') + ) + return - links.add(href) + links.add(href) + } } } - }) + ) // check for local hrefs and external links for (const href of links) { // 1) check for local ids if (href[0] === '#') { - const id = href.slice(1) - if (!ids.has(id)) errors.push(`missing local id for "${href}"`) - + if (!ids.has(href.slice(1))) + errors.push(`missing local id for "${href}"`) continue } // 2) check for external links