Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: add fixed status to suggestions #634

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
44 changes: 17 additions & 27 deletions src/backlinks/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,41 +10,30 @@
* governing permissions and limitations under the License.
*/

import { composeAuditURL, getPrompt, tracingFetch as fetch } from '@adobe/spacecat-shared-utils';
import { composeAuditURL, getPrompt } from '@adobe/spacecat-shared-utils';
import AhrefsAPIClient from '@adobe/spacecat-shared-ahrefs-client';
import { AbortController, AbortError } from '@adobe/fetch';
import { FirefallClient } from '@adobe/spacecat-shared-gpt-client';
import { syncSuggestions } from '../utils/data-access.js';
import { AuditBuilder } from '../common/audit-builder.js';
import { getScrapedDataForSiteId, sleep } from '../support/utils.js';
import {
getScrapedDataForSiteId,
sleep,
fetchWithTimeout,
} from '../support/utils.js';

const TIMEOUT = 3000;
export const isFixedSuggestion = async (suggestion) => {
try {
const response = await fetchWithTimeout(suggestion?.data?.url_to, console, { redirect: 'follow' });
return response.ok;
// eslint-disable-next-line @typescript-eslint/no-unused-vars
} catch (e) {
return false;
}
};

async function filterOutValidBacklinks(backlinks, log) {
const fetchWithTimeout = async (url, timeout) => {
const controller = new AbortController();
const { signal } = controller;
const id = setTimeout(() => controller.abort(), timeout);

try {
const response = await fetch(url, { signal });
clearTimeout(id);
return response;
} catch (error) {
if (error instanceof AbortError) {
log.warn(`Request to ${url} timed out after ${timeout}ms`);
return { ok: false, status: 408 };
} else {
log.warn(`Request to ${url} failed with error: ${error.message}`);
}
} finally {
clearTimeout(id);
}
return { ok: false, status: 500 };
};

const isStillBrokenBacklink = async (backlink) => {
const response = await fetchWithTimeout(backlink.url_to, TIMEOUT);
const response = await fetchWithTimeout(backlink.url_to);
if (!response.ok && response.status !== 404
&& response.status >= 400 && response.status < 500) {
log.warn(`Backlink ${backlink.url_to} returned status ${response.status}`);
Expand Down Expand Up @@ -269,6 +258,7 @@ export const convertToOpportunity = async (auditUrl, auditData, context) => {
opportunity,
newData: auditData.auditResult.brokenBacklinks,
buildKey,
isFixed: isFixedSuggestion,
mapNewSuggestion: (backlink) => ({
opportunityId: opportunity.getId(),
type: 'REDIRECT_UPDATE',
Expand Down
1 change: 1 addition & 0 deletions src/cwv/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ export async function convertToOppty(auditUrl, auditData, context, site) {
opportunity,
newData: auditData.auditResult.cwv,
buildKey,
isFixed: () => false,
mapNewSuggestion: (entry) => ({
opportunityId: opportunity.getId(),
type: 'CODE_CHANGE',
Expand Down
15 changes: 14 additions & 1 deletion src/internal-links/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,26 @@

import RUMAPIClient from '@adobe/spacecat-shared-rum-api-client';
import { internalServerError } from '@adobe/spacecat-shared-http-utils';
import { getRUMDomainkey, getRUMUrl } from '../support/utils.js';
import {
fetchWithTimeout, getRUMDomainkey, getRUMUrl,
} from '../support/utils.js';
import { AuditBuilder } from '../common/audit-builder.js';
import { noopUrlResolver } from '../common/audit.js';
import { syncSuggestions } from '../utils/data-access.js';

const INTERVAL = 30; // days
const AUDIT_TYPE = 'broken-internal-links';

export const isFixedSuggestion = async (suggestion) => {
try {
const response = await fetchWithTimeout(suggestion?.data?.url_to, console, { redirect: 'follow' });
return response.ok;
// eslint-disable-next-line @typescript-eslint/no-unused-vars
} catch (e) {
return false;
}
};

/**
* Classifies links into priority categories based on views
* High: top 25%, Medium: next 25%, Low: bottom 50%
Expand Down Expand Up @@ -151,6 +163,7 @@ export async function convertToOpportunity(auditUrl, auditData, context) {
opportunity,
newData: auditData?.auditResult?.brokenInternalLinks,
buildKey,
isFixed: isFixedSuggestion,
mapNewSuggestion: (entry) => ({
opportunityId: opportunity.getId(),
type: 'CONTENT_UPDATE',
Expand Down
13 changes: 12 additions & 1 deletion src/sitemap/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import {
composeAuditURL, isArray, prependSchema, tracingFetch as fetch,
} from '@adobe/spacecat-shared-utils';
import {
extractDomainAndProtocol,
extractDomainAndProtocol, fetchWithTimeout,
getBaseUrlPagesFromSitemapContents,
getSitemapUrlsFromSitemapIndex,
getUrlWithoutPath,
Expand Down Expand Up @@ -42,6 +42,16 @@ const VALID_MIME_TYPES = Object.freeze([
'text/plain',
]);

export const isFixedSuggestion = async (suggestion) => {
try {
const response = await fetchWithTimeout(suggestion?.data?.pageUrl, console, { redirect: 'manual' });
return response.ok;
// eslint-disable-next-line @typescript-eslint/no-unused-vars
} catch (e) {
return false;
}
};

/**
* Fetches the content from a given URL.
*
Expand Down Expand Up @@ -574,6 +584,7 @@ export async function convertToOpportunity(auditUrl, auditData, context) {
opportunity,
newData: auditData.suggestions,
buildKey,
isFixed: isFixedSuggestion,
mapNewSuggestion: (issue) => ({
opportunityId: opportunity.getId(),
type: 'REDIRECT_UPDATE',
Expand Down
1 change: 1 addition & 0 deletions src/structured-data/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ export async function convertToOpportunity(auditUrl, auditData, context) {
opportunity,
newData: filteredAuditResult,
buildKey,
isFixed: () => true,
mapNewSuggestion: (data) => {
const errors = data.richResults.detectedIssues.flatMap((issue) => issue.items.flatMap((item) => item.issues.map((i) => `${i.issueMessage}`))).sort();
return {
Expand Down
23 changes: 22 additions & 1 deletion src/support/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@ import URI from 'urijs';
import { JSDOM } from 'jsdom';
import { GetSecretValueCommand, SecretsManagerClient } from '@aws-sdk/client-secrets-manager';
import { ListObjectsV2Command } from '@aws-sdk/client-s3';
import { AbortController, AbortError } from '@adobe/fetch';
import { getObjectFromKey } from '../utils/s3-utils.js';

URI.preventInvalidHostname = true;

export const TIMEOUT = 3000;
// weekly pageview threshold to eliminate urls with lack of samples

export async function getRUMUrl(url) {
Expand Down Expand Up @@ -298,6 +299,26 @@ export const getScrapedDataForSiteId = async (site, context) => {
};
};

export const fetchWithTimeout = async (url, log = console, options = {}) => {
const controller = new AbortController();
const { signal } = controller;
const id = setTimeout(() => controller.abort(), TIMEOUT);

try {
const response = await fetch(url, { signal, ...options });
clearTimeout(id);
return response;
} catch (error) {
if (error instanceof AbortError) {
log.warn(`Request to ${url} timed out after ${TIMEOUT}ms`);
return { ok: false, status: 408 };
}
} finally {
clearTimeout(id);
}
return { ok: false };
};

export async function sleep(ms) {
return new Promise((resolve) => {
setTimeout(resolve, ms);
Expand Down
12 changes: 10 additions & 2 deletions src/utils/data-access.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
* governing permissions and limitations under the License.
*/
import { isObject } from '@adobe/spacecat-shared-utils';
import { Suggestion } from '@adobe/spacecat-shared-data-access';

/**
* Fetches site data based on the given base URL. If no site is found for the given
Expand Down Expand Up @@ -53,16 +54,23 @@ export async function syncSuggestions({
opportunity,
newData,
buildKey,
isFixed,
mapNewSuggestion,
log,
}) {
const newDataKeys = new Set(newData.map(buildKey));
const existingSuggestions = await opportunity.getSuggestions();
// Remove outdated suggestions
await Promise.all(
existingSuggestions
.filter((existing) => !newDataKeys.has(buildKey(existing.getData())))
.map((suggestion) => suggestion.remove()),
.map(async (suggestion) => {
const isSuggestionFixed = await isFixed(suggestion);
if (isSuggestionFixed) {
suggestion.setStatus(Suggestion.STATUSES.FiXED);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it would be better to use Suggestion.bulkUpdateStatus

return suggestion.save();
}
return suggestion.remove();
}),
);

// Update existing suggestions
Expand Down
23 changes: 20 additions & 3 deletions test/utils/data-access.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -99,15 +99,26 @@ describe('data-access', () => {
};
});

it('should remove outdated suggestions and add new ones', async () => {
it('should remove unfix outdated suggestions keep fixed ones and add the new ones', async () => {
const suggestionsData = [{ key: '1' }, { key: '2' }];
const existingSuggestions = [
{
id: '1', data: suggestionsData[0], remove: sinon.stub(), getData: sinon.stub().returns(suggestionsData[0]),
id: '1',
data: suggestionsData[0],
remove: sinon.stub(),
getData: sinon.stub().returns(suggestionsData[0]),
setStatus: (status) => sinon.stub().returns({ ...suggestionsData[0], status }),
save: sinon.stub(),
},
{
id: '2', data: suggestionsData[1], remove: sinon.stub(), getData: sinon.stub().returns(suggestionsData[1]),
id: '2',
data: suggestionsData[1],
remove: sinon.stub(),
getData: sinon.stub().returns(suggestionsData[1]),
setStatus: (status) => sinon.stub().returns({ ...suggestionsData[0], status }),
save: sinon.stub(),
}];
const isFixed = (suggestion) => suggestion.id === '1';
const newData = [{ key: '3' }, { key: '4' }];

mockOpportunity.getSuggestions.resolves(existingSuggestions);
Expand All @@ -117,6 +128,7 @@ describe('data-access', () => {
opportunity: mockOpportunity,
newData,
buildKey,
isFixed,
mapNewSuggestion,
log: mockLogger,
});
Expand All @@ -137,6 +149,8 @@ describe('data-access', () => {
key: '4',
},
}]);
expect(existingSuggestions[0].save).to.have.been.called;
expect(existingSuggestions[1].remove).to.have.been.calledOnce;
expect(mockLogger.error).to.not.have.been.called;
});

Expand Down Expand Up @@ -166,6 +180,7 @@ describe('data-access', () => {
opportunity: mockOpportunity,
newData,
buildKey,
isFixed: () => false,
mapNewSuggestion,
log: mockLogger,
});
Expand Down Expand Up @@ -193,6 +208,7 @@ describe('data-access', () => {
await syncSuggestions({
opportunity: mockOpportunity,
newData,
isFixed: () => false,
buildKey,
mapNewSuggestion,
log: mockLogger,
Expand Down Expand Up @@ -222,6 +238,7 @@ describe('data-access', () => {
await expect(syncSuggestions({
opportunity: mockOpportunity,
newData,
isFixed: () => false,
buildKey,
mapNewSuggestion,
log: mockLogger,
Expand Down
Loading