Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: add fixed status to suggestions #634

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
35 changes: 9 additions & 26 deletions src/backlinks/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,40 +10,22 @@
* governing permissions and limitations under the License.
*/

import { composeAuditURL, getPrompt, tracingFetch as fetch } from '@adobe/spacecat-shared-utils';
import { composeAuditURL, getPrompt } from '@adobe/spacecat-shared-utils';
import AhrefsAPIClient from '@adobe/spacecat-shared-ahrefs-client';
import { AbortController, AbortError } from '@adobe/fetch';
import { FirefallClient } from '@adobe/spacecat-shared-gpt-client';
import { syncSuggestions } from '../utils/data-access.js';
import { AuditBuilder } from '../common/audit-builder.js';
import { getScrapedDataForSiteId, sleep } from '../support/utils.js';

const TIMEOUT = 3000;
import {
getScrapedDataForSiteId,
isFixedURL,
sleep,
fetchWithTimeout, TIMEOUT,
} from '../support/utils.js';

async function filterOutValidBacklinks(backlinks, log) {
const fetchWithTimeout = async (url, timeout) => {
const controller = new AbortController();
const { signal } = controller;
const id = setTimeout(() => controller.abort(), timeout);

try {
const response = await fetch(url, { signal });
clearTimeout(id);
return response;
} catch (error) {
if (error instanceof AbortError) {
log.warn(`Request to ${url} timed out after ${timeout}ms`);
return { ok: false, status: 408 };
}
} finally {
clearTimeout(id);
}
return null;
};

const isStillBrokenBacklink = async (backlink) => {
try {
const response = await fetchWithTimeout(backlink.url_to, TIMEOUT);
const response = await fetchWithTimeout(backlink.url_to, TIMEOUT, log);
if (!response.ok && response.status !== 404
&& response.status >= 400 && response.status < 500) {
log.warn(`Backlink ${backlink.url_to} returned status ${response.status}`);
Expand Down Expand Up @@ -272,6 +254,7 @@ export const convertToOpportunity = async (auditUrl, auditData, context) => {
opportunity,
newData: auditData.auditResult.brokenBacklinks,
buildKey,
isFixed: isFixedURL,
mapNewSuggestion: (backlink) => ({
opportunityId: opportunity.getId(),
type: 'REDIRECT_UPDATE',
Expand Down
1 change: 1 addition & 0 deletions src/cwv/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ export async function convertToOppty(auditUrl, auditData, context, site) {
opportunity,
newData: auditData.auditResult.cwv,
buildKey,
isFixed: () => false,
mapNewSuggestion: (entry) => ({
opportunityId: opportunity.getId(),
type: 'CODE_CHANGE',
Expand Down
3 changes: 2 additions & 1 deletion src/internal-links/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

import RUMAPIClient from '@adobe/spacecat-shared-rum-api-client';
import { internalServerError } from '@adobe/spacecat-shared-http-utils';
import { getRUMDomainkey, getRUMUrl } from '../support/utils.js';
import { getRUMDomainkey, getRUMUrl, isFixedURL } from '../support/utils.js';
import { AuditBuilder } from '../common/audit-builder.js';
import { noopUrlResolver } from '../common/audit.js';
import { syncSuggestions } from '../utils/data-access.js';
Expand Down Expand Up @@ -151,6 +151,7 @@ export async function convertToOpportunity(auditUrl, auditData, context) {
opportunity,
newData: auditData?.auditResult?.brokenInternalLinks,
buildKey,
isFixed: isFixedURL,
mapNewSuggestion: (entry) => ({
opportunityId: opportunity.getId(),
type: 'CONTENT_UPDATE',
Expand Down
3 changes: 2 additions & 1 deletion src/sitemap/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import {
extractDomainAndProtocol,
getBaseUrlPagesFromSitemapContents,
getSitemapUrlsFromSitemapIndex,
getUrlWithoutPath,
getUrlWithoutPath, isFixedURL,
toggleWWW,
} from '../support/utils.js';
import { AuditBuilder } from '../common/audit-builder.js';
Expand Down Expand Up @@ -574,6 +574,7 @@ export async function convertToOpportunity(auditUrl, auditData, context) {
opportunity,
newData: auditData.suggestions,
buildKey,
isFixed: isFixedURL,
mapNewSuggestion: (issue) => ({
opportunityId: opportunity.getId(),
type: 'REDIRECT_UPDATE',
Expand Down
1 change: 1 addition & 0 deletions src/structured-data/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ export async function convertToOpportunity(auditUrl, auditData, context) {
opportunity,
newData: filteredAuditResult,
buildKey,
isFixed: () => true,
mapNewSuggestion: (data) => {
const errors = data.richResults.detectedIssues.flatMap((issue) => issue.items.flatMap((item) => item.issues.map((i) => `${i.issueMessage}`))).sort();
return {
Expand Down
33 changes: 32 additions & 1 deletion src/support/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@ import URI from 'urijs';
import { JSDOM } from 'jsdom';
import { GetSecretValueCommand, SecretsManagerClient } from '@aws-sdk/client-secrets-manager';
import { ListObjectsV2Command } from '@aws-sdk/client-s3';
import { AbortController, AbortError } from '@adobe/fetch';
import { getObjectFromKey } from '../utils/s3-utils.js';

URI.preventInvalidHostname = true;

export const TIMEOUT = 3000;
// weekly pageview threshold to eliminate urls with lack of samples

export async function getRUMUrl(url) {
Expand Down Expand Up @@ -298,6 +299,36 @@ export const getScrapedDataForSiteId = async (site, context) => {
};
};

export const fetchWithTimeout = async (url, timeout, log = console) => {
const controller = new AbortController();
const { signal } = controller;
const id = setTimeout(() => controller.abort(), timeout);

try {
const response = await fetch(url, { signal });
clearTimeout(id);
return response;
} catch (error) {
if (error instanceof AbortError) {
log.warn(`Request to ${url} timed out after ${timeout}ms`);
return { ok: false, status: 408 };
}
} finally {
clearTimeout(id);
}
return null;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of returning null we could return { ok: false } to avoid exceptions

};

export const isFixedURL = async (backlink) => {
try {
const response = await fetchWithTimeout(backlink.url_to, TIMEOUT);
return response.ok;
// eslint-disable-next-line @typescript-eslint/no-unused-vars
} catch (e) {
return false;
}
};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might not work for other audits than backlinks right now.

Also we could introduce an expectedStatusCode, for broken backlinks it should be 301 and for sitemap the url is fixed when it is 200.

Suggested change
export const isFixedURL = async (backlink) => {
try {
const response = await fetchWithTimeout(backlink.url_to, TIMEOUT);
return response.ok;
// eslint-disable-next-line @typescript-eslint/no-unused-vars
} catch (e) {
return false;
}
};
export const isFixedURL = async (url, expectedStatusCode = 200) => {
try {
const response = await fetchWithTimeout(url, TIMEOUT);
return response.status === expectedStatusCode;
// eslint-disable-next-line @typescript-eslint/no-unused-vars
} catch (e) {
return false;
}
};


export async function sleep(ms) {
return new Promise((resolve) => {
setTimeout(resolve, ms);
Expand Down
12 changes: 10 additions & 2 deletions src/utils/data-access.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
* governing permissions and limitations under the License.
*/
import { isObject } from '@adobe/spacecat-shared-utils';
import { Suggestion } from '@adobe/spacecat-shared-data-access';

/**
* Fetches site data based on the given base URL. If no site is found for the given
Expand Down Expand Up @@ -53,16 +54,23 @@ export async function syncSuggestions({
opportunity,
newData,
buildKey,
isFixed,
mapNewSuggestion,
log,
}) {
const newDataKeys = new Set(newData.map(buildKey));
const existingSuggestions = await opportunity.getSuggestions();
// Remove outdated suggestions
await Promise.all(
existingSuggestions
.filter((existing) => !newDataKeys.has(buildKey(existing.getData())))
.map((suggestion) => suggestion.remove()),
.map(async (suggestion) => {
const isSuggestionFixed = await isFixed(suggestion);
if (isSuggestionFixed) {
suggestion.setStatus(Suggestion.STATUSES.FiXED);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it would be better to use Suggestion.bulkUpdateStatus

return suggestion.save();
}
return suggestion.remove();
}),
);

// Update existing suggestions
Expand Down
23 changes: 20 additions & 3 deletions test/utils/data-access.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -99,15 +99,26 @@ describe('data-access', () => {
};
});

it('should remove outdated suggestions and add new ones', async () => {
it('should remove unfix outdated suggestions keep fixed ones and add the new ones', async () => {
const suggestionsData = [{ key: '1' }, { key: '2' }];
const existingSuggestions = [
{
id: '1', data: suggestionsData[0], remove: sinon.stub(), getData: sinon.stub().returns(suggestionsData[0]),
id: '1',
data: suggestionsData[0],
remove: sinon.stub(),
getData: sinon.stub().returns(suggestionsData[0]),
setStatus: (status) => sinon.stub().returns({ ...suggestionsData[0], status }),
save: sinon.stub(),
},
{
id: '2', data: suggestionsData[1], remove: sinon.stub(), getData: sinon.stub().returns(suggestionsData[1]),
id: '2',
data: suggestionsData[1],
remove: sinon.stub(),
getData: sinon.stub().returns(suggestionsData[1]),
setStatus: (status) => sinon.stub().returns({ ...suggestionsData[0], status }),
save: sinon.stub(),
}];
const isFixed = (suggestion) => suggestion.id === '1';
const newData = [{ key: '3' }, { key: '4' }];

mockOpportunity.getSuggestions.resolves(existingSuggestions);
Expand All @@ -117,6 +128,7 @@ describe('data-access', () => {
opportunity: mockOpportunity,
newData,
buildKey,
isFixed,
mapNewSuggestion,
log: mockLogger,
});
Expand All @@ -137,6 +149,8 @@ describe('data-access', () => {
key: '4',
},
}]);
expect(existingSuggestions[0].save).to.have.been.called;
expect(existingSuggestions[1].remove).to.have.been.calledOnce;
expect(mockLogger.error).to.not.have.been.called;
});

Expand Down Expand Up @@ -166,6 +180,7 @@ describe('data-access', () => {
opportunity: mockOpportunity,
newData,
buildKey,
isFixed: () => false,
mapNewSuggestion,
log: mockLogger,
});
Expand Down Expand Up @@ -193,6 +208,7 @@ describe('data-access', () => {
await syncSuggestions({
opportunity: mockOpportunity,
newData,
isFixed: () => false,
buildKey,
mapNewSuggestion,
log: mockLogger,
Expand Down Expand Up @@ -222,6 +238,7 @@ describe('data-access', () => {
await expect(syncSuggestions({
opportunity: mockOpportunity,
newData,
isFixed: () => false,
buildKey,
mapNewSuggestion,
log: mockLogger,
Expand Down