diff --git a/entities/core-result.entity.ts b/entities/core-result.entity.ts index 89b0b72d..a816bc1e 100644 --- a/entities/core-result.entity.ts +++ b/entities/core-result.entity.ts @@ -266,6 +266,10 @@ export class CoreResult { @Expose({ name: 'sitemap_xml_pdf_count' }) sitemapXmlPdfCount?: number; + @Column({ nullable: true }) + @Exclude() + sitemapXmlLastMod?: string; + @Column({ nullable: true }) @Expose({ name: 'third_party_service_domains' }) @Transform(({ value }: { value: string }) => { diff --git a/entities/scan-data.entity.ts b/entities/scan-data.entity.ts index edef820f..4124484a 100644 --- a/entities/scan-data.entity.ts +++ b/entities/scan-data.entity.ts @@ -86,6 +86,7 @@ export type SitemapXmlScan = { sitemapXmlFinalUrlMimeType: string; sitemapXmlStatusCode: number; sitemapXmlDetected: boolean; + sitemapXmlLastMod?: string; }; export type NotFoundScan = { diff --git a/libs/core-scanner/src/pages/sitemap-xml.spec.ts b/libs/core-scanner/src/pages/sitemap-xml.spec.ts index 15f04054..ac4f9efe 100644 --- a/libs/core-scanner/src/pages/sitemap-xml.spec.ts +++ b/libs/core-scanner/src/pages/sitemap-xml.spec.ts @@ -57,6 +57,7 @@ describe('sitemap-xml scanner', () => { sitemapXmlFinalUrlLive: true, sitemapTargetUrlRedirects: true, sitemapXmlFinalUrlMimeType: 'text/xml', + sitemapXmlLastMod: null, sitemapXmlStatusCode: 200, sitemapXmlDetected: true, }, diff --git a/libs/core-scanner/src/pages/sitemap-xml.ts b/libs/core-scanner/src/pages/sitemap-xml.ts index b6d981f4..9d35be13 100644 --- a/libs/core-scanner/src/pages/sitemap-xml.ts +++ b/libs/core-scanner/src/pages/sitemap-xml.ts @@ -61,11 +61,18 @@ const buildSitemapResult = async ( sitemapXmlFinalUrlFilesize: Buffer.byteLength(sitemapText, 'utf-8'), sitemapXmlCount: await getUrlCount(sitemapPage), sitemapXmlPdfCount: getPdfCount(sitemapText), + sitemapXmlLastMod: getLastModDate(sitemapText), } : {}), }; }; +function getLastModDate(sitemapText: string) { + const re = /(.*?)<\/lastmod>/g; + const matches = [...sitemapText.matchAll(re)]; + return matches.length > 0 ? matches[matches.length - 1][1] : null; +} + const getUrlCount = async (page: Page) => { const urlCount = await page.evaluate(() => { const urls = [...document.getElementsByTagName('url')]; diff --git a/libs/database/src/core-results/core-result.service.ts b/libs/database/src/core-results/core-result.service.ts index da856124..4ac2d92b 100644 --- a/libs/database/src/core-results/core-result.service.ts +++ b/libs/database/src/core-results/core-result.service.ts @@ -282,6 +282,7 @@ export class CoreResultService { sitemap.sitemapXmlFinalUrlMimeType; coreResult.sitemapXmlStatusCode = sitemap.sitemapXmlStatusCode; coreResult.sitemapXmlDetected = sitemap.sitemapXmlDetected; + coreResult.sitemapXmlLastMod = sitemap.sitemapXmlLastMod; } else { logger.error({ msg: pages.sitemapXml.error, @@ -295,6 +296,7 @@ export class CoreResultService { coreResult.sitemapXmlFinalUrlMimeType = null; coreResult.sitemapXmlStatusCode = null; coreResult.sitemapXmlDetected = null; + coreResult.sitemapXmlLastMod = null; } }