Skip to content

Commit

Permalink
Merge pull request #434 from GSA/lc/1358-sitemap-lastmod
Browse files Browse the repository at this point in the history
1358 - Prototype sitemap.xml lastmod field
  • Loading branch information
luke-at-flexion authored Feb 14, 2025
2 parents 33263d1 + 376340a commit b63068e
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 0 deletions.
4 changes: 4 additions & 0 deletions entities/core-result.entity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,10 @@ export class CoreResult {
@Expose({ name: 'sitemap_xml_pdf_count' })
sitemapXmlPdfCount?: number;

@Column({ nullable: true })
@Exclude()
sitemapXmlLastMod?: string;

@Column({ nullable: true })
@Expose({ name: 'third_party_service_domains' })
@Transform(({ value }: { value: string }) => {
Expand Down
1 change: 1 addition & 0 deletions entities/scan-data.entity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ export type SitemapXmlScan = {
sitemapXmlFinalUrlMimeType: string;
sitemapXmlStatusCode: number;
sitemapXmlDetected: boolean;
sitemapXmlLastMod?: string;
};

export type NotFoundScan = {
Expand Down
1 change: 1 addition & 0 deletions libs/core-scanner/src/pages/sitemap-xml.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ describe('sitemap-xml scanner', () => {
sitemapXmlFinalUrlLive: true,
sitemapTargetUrlRedirects: true,
sitemapXmlFinalUrlMimeType: 'text/xml',
sitemapXmlLastMod: null,
sitemapXmlStatusCode: 200,
sitemapXmlDetected: true,
},
Expand Down
7 changes: 7 additions & 0 deletions libs/core-scanner/src/pages/sitemap-xml.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,18 @@ const buildSitemapResult = async (
sitemapXmlFinalUrlFilesize: Buffer.byteLength(sitemapText, 'utf-8'),
sitemapXmlCount: await getUrlCount(sitemapPage),
sitemapXmlPdfCount: getPdfCount(sitemapText),
sitemapXmlLastMod: getLastModDate(sitemapText),
}
: {}),
};
};

function getLastModDate(sitemapText: string) {
const re = /<lastmod>(.*?)<\/lastmod>/g;
const matches = [...sitemapText.matchAll(re)];
return matches.length > 0 ? matches[matches.length - 1][1] : null;
}

const getUrlCount = async (page: Page) => {
const urlCount = await page.evaluate(() => {
const urls = [...document.getElementsByTagName('url')];
Expand Down
2 changes: 2 additions & 0 deletions libs/database/src/core-results/core-result.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@ export class CoreResultService {
sitemap.sitemapXmlFinalUrlMimeType;
coreResult.sitemapXmlStatusCode = sitemap.sitemapXmlStatusCode;
coreResult.sitemapXmlDetected = sitemap.sitemapXmlDetected;
coreResult.sitemapXmlLastMod = sitemap.sitemapXmlLastMod;
} else {
logger.error({
msg: pages.sitemapXml.error,
Expand All @@ -295,6 +296,7 @@ export class CoreResultService {
coreResult.sitemapXmlFinalUrlMimeType = null;
coreResult.sitemapXmlStatusCode = null;
coreResult.sitemapXmlDetected = null;
coreResult.sitemapXmlLastMod = null;
}
}

Expand Down

0 comments on commit b63068e

Please sign in to comment.