Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

facebook fix + configurable storage/cookie archiving #277

Merged
merged 5 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
"dependencies": {
"@fortawesome/fontawesome-free": "^5.13.0",
"@ipld/car": "^5.3.1",
"@webrecorder/awp-sw": "^0.5.0",
"@webrecorder/wabac": "^2.20.3",
"@webrecorder/awp-sw": "^0.5.2",
"@webrecorder/wabac": "^2.20.5",
"auto-js-ipfs": "^2.3.0",
"browsertrix-behaviors": "^0.6.4",
"btoa": "^1.2.1",
Expand All @@ -31,7 +31,7 @@
"tsconfig-paths-webpack-plugin": "^4.1.0",
"unused-filename": "^4.0.1",
"uuid": "^8.3.2",
"warcio": "^2.3.1"
"warcio": "^2.4.2"
},
"devDependencies": {
"@typescript-eslint/eslint-plugin": "^6.15.0",
Expand Down Expand Up @@ -64,7 +64,7 @@
"webpack-extension-reloader": "^1.1.4"
},
"resolutions": {
"@webrecorder/wabac": "^2.20.3"
"@webrecorder/wabac": "^2.20.5"
},
"files": [
"src/",
Expand Down
7 changes: 3 additions & 4 deletions src/localstorage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,10 @@ export function setLocalOption(name, value) {
}

// ===========================================================================
// @ts-expect-error - TS7006 - Parameter 'name' implicitly has an 'any' type.
export function getLocalOption(name) {
export function getLocalOption(name: string) : Promise<string | null> {
// @ts-expect-error - TS2339 - Property 'chrome' does not exist on type 'Window & typeof globalThis'. | TS2339 - Property 'chrome' does not exist on type 'Window & typeof globalThis'.
if (self.chrome?.storage) {
return new Promise((resolve) => {
return new Promise<string>((resolve) => {
// @ts-expect-error - TS2339 - Property 'chrome' does not exist on type 'Window & typeof globalThis'.
self.chrome.storage.local.get(name, (res) => {
resolve(res[name]);
Expand All @@ -35,7 +34,7 @@ export function getLocalOption(name) {
return Promise.resolve(localStorage.getItem(name));
}

return Promise.reject();
return Promise.reject(null);
}

// ===========================================================================
Expand Down
122 changes: 98 additions & 24 deletions src/recorder.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import { RequestResponseInfo } from "./requestresponseinfo";

import { getCustomRewriter, rewriteDASH, rewriteHLS } from "@webrecorder/wabac";
import {
getCustomRewriter,
rewriteDASH,
rewriteHLS,
removeRangeAsQuery,
} from "@webrecorder/wabac";

import { Buffer } from "buffer";

Expand All @@ -15,6 +20,7 @@ import {
BEHAVIOR_PAUSED,
BEHAVIOR_DONE,
} from "./consts";
import { getLocalOption } from "./localstorage";

// @ts-expect-error - TS2554 - Expected 0 arguments, but got 1.
const encoder = new TextEncoder("utf-8");
Expand All @@ -34,9 +40,26 @@ function sleep(time) {
return new Promise((resolve) => setTimeout(() => resolve(), time));
}

type FetchEntry = {
url: string;
headers?: Headers;
rangeReplaced?: boolean;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
sessions?: any[];
// eslint-disable-next-line @typescript-eslint/no-explicit-any
pageInfo?: any;

rangeRemoved?: boolean;
doRangeCheck?: boolean;
redirectOnly?: boolean;
};

// ===========================================================================
class Recorder {
recordStorage = true;
archiveStorage = false;
archiveCookies = false;

_fetchQueue: FetchEntry[] = [];

constructor() {
// @ts-expect-error - TS2339 - Property 'flatMode' does not exist on type 'Recorder'.
Expand Down Expand Up @@ -79,8 +102,7 @@ class Recorder {

// @ts-expect-error - TS2339 - Property '_fetchPending' does not exist on type 'Recorder'.
this._fetchPending = new Map();
// @ts-expect-error - TS2339 - Property '_fetchQueue' does not exist on type 'Recorder'.
this._fetchQueue = [];

// @ts-expect-error - TS2339 - Property '_fetchUrls' does not exist on type 'Recorder'.
this._fetchUrls = new Set();

Expand Down Expand Up @@ -128,6 +150,13 @@ class Recorder {
this.defaultFetchOpts = {
redirect: "manual",
};

this.initOpts();
}

async initOpts() {
this.archiveCookies = (await getLocalOption("archiveCookies") === "1");
this.archiveStorage = (await getLocalOption("archiveStorage") === "1");
}

// @ts-expect-error - TS7006 - Parameter 'autorun' implicitly has an 'any' type.
Expand Down Expand Up @@ -860,7 +889,7 @@ class Recorder {
// @ts-expect-error - TS7006 - Parameter 'url' implicitly has an 'any' type. | TS7006 - Parameter 'sessions' implicitly has an 'any' type.
handleWindowOpen(url, sessions) {
// @ts-expect-error - TS2339 - Property 'pageInfo' does not exist on type 'Recorder'.
const headers = { Referer: this.pageInfo.url };
const headers = new Headers({ Referer: this.pageInfo.url });
this.doAsyncFetch({ url, headers, redirectOnly: true }, sessions);
}

Expand Down Expand Up @@ -1450,8 +1479,12 @@ class Recorder {
//this._fetchPending.set(requestId, pending);

try {
// @ts-expect-error - TS2339 - Property 'pageInfo' does not exist on type 'Recorder'.
const data = reqresp.toDBRecord(reqresp.payload, this.pageInfo);
const data = reqresp.toDBRecord(
reqresp.payload,
// @ts-expect-error - TS2339 - Property 'pageInfo' does not exist on type 'Recorder'.
this.pageInfo,
this.archiveCookies,
);

// top-level URL is a non-GET request
if (
Expand Down Expand Up @@ -1513,7 +1546,7 @@ class Recorder {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
async getStorage(sessions: any) {
// check if recording storage is allowed
if (!this.recordStorage) {
if (!this.archiveStorage) {
return null;
}

Expand Down Expand Up @@ -1576,7 +1609,7 @@ class Recorder {

reqresp.fillResponseRedirect(params);
// @ts-expect-error - TS2339 - Property 'pageInfo' does not exist on type 'Recorder'.
data = reqresp.toDBRecord(null, this.pageInfo);
data = reqresp.toDBRecord(null, this.pageInfo, this.archiveCookies);
}

reqresp.fillRequest(params);
Expand Down Expand Up @@ -1629,14 +1662,14 @@ class Recorder {
for (const { value } of params.events) {
if (value.indexOf('"kLoad"') > 0) {
const { url } = JSON.parse(value);
this.doAsyncFetch({ url }, sessions);
this.doAsyncFetch({ url, doRangeCheck: true }, sessions);
break;
}
}
}

// @ts-expect-error - TS7006 - Parameter 'request' implicitly has an 'any' type. | TS7006 - Parameter 'resp' implicitly has an 'any' type.
async attemptFetchRedirect(request, resp) {
async attemptFetchRedirect(request: FetchEntry, resp) {
if (request.redirectOnly && resp.type === "opaqueredirect") {
const abort = new AbortController();
// @ts-expect-error - TS2345 - Argument of type '{ abort: AbortController; }' is not assignable to parameter of type 'RequestInit'.
Expand Down Expand Up @@ -1671,11 +1704,19 @@ class Recorder {
}

// @ts-expect-error - TS7006 - Parameter 'request' implicitly has an 'any' type. | TS7006 - Parameter 'sessions' implicitly has an 'any' type.
doAsyncFetch(request, sessions) {
doAsyncFetch(request: FetchEntry, sessions) {
if (!request || !this.isValidUrl(request.url)) {
return;
}

if (request.doRangeCheck) {
const url = removeRangeAsQuery(request.url);
if (url) {
request.url = url;
request.rangeRemoved = true;
}
}

// @ts-expect-error - TS2339 - Property '_fetchUrls' does not exist on type 'Recorder'.
if (this._fetchUrls.has(request.url)) {
console.log("Skipping, already fetching: " + request.url);
Expand All @@ -1686,15 +1727,13 @@ class Recorder {
request.pageInfo = this.pageInfo;
request.sessions = sessions;

// @ts-expect-error - TS2339 - Property '_fetchQueue' does not exist on type 'Recorder'.
this._fetchQueue.push(request);

this.doBackgroundFetch();
}

async doBackgroundFetch() {
if (
// @ts-expect-error - TS2339 - Property '_fetchQueue' does not exist on type 'Recorder'.
!this._fetchQueue.length ||
// @ts-expect-error - TS2339 - Property '_fetchPending' does not exist on type 'Recorder'.
this._fetchPending.size >= MAX_CONCURRENT_FETCH ||
Expand All @@ -1704,8 +1743,10 @@ class Recorder {
return;
}

// @ts-expect-error - TS2339 - Property '_fetchQueue' does not exist on type 'Recorder'.
const request = this._fetchQueue.shift();
if (!request) {
return;
}

// @ts-expect-error - TS2339 - Property '_fetchUrls' does not exist on type 'Recorder'.
if (this._fetchUrls.has(request.url)) {
Expand All @@ -1732,11 +1773,9 @@ class Recorder {
// @ts-expect-error - TS2339 - Property 'defaultFetchOpts' does not exist on type 'Recorder'.
const opts = { ...this.defaultFetchOpts };

if (request.getRequestHeadersDict) {
opts.headers = request.getRequestHeadersDict().headers;
opts.headers.delete("range");
} else if (request.headers) {
if (request.headers) {
opts.headers = request.headers;
opts.headers.delete("range");
}

let resp = await fetch(request.url, opts);
Expand Down Expand Up @@ -1779,8 +1818,12 @@ class Recorder {
// @ts-expect-error - TS2339 - Property 'payload' does not exist on type 'RequestResponseInfo'.
reqresp.payload = new Uint8Array(payload);

// @ts-expect-error - TS2339 - Property 'payload' does not exist on type 'RequestResponseInfo'.
const data = reqresp.toDBRecord(reqresp.payload, request.pageInfo);
const data = reqresp.toDBRecord(
// @ts-expect-error - TS2339 - Property 'payload' does not exist on type 'RequestResponseInfo'.
reqresp.payload,
request.pageInfo,
this.archiveCookies,
);

if (data) {
await this.commitResource(data, request.pageInfo);
Expand Down Expand Up @@ -1813,9 +1856,36 @@ class Recorder {
let payload;

if (reqresp.status === 206) {
sleep(500).then(() => this.doAsyncFetch(reqresp, sessions));
sleep(500).then(() =>
this.doAsyncFetch(
{
url: reqresp.url,
headers: reqresp.getRequestHeadersDict().headers,
},
sessions,
),
);
reqresp.payload = null;
return null;
} else {
const changedUrl = removeRangeAsQuery(reqresp.url);

if (changedUrl) {
reqresp.url = changedUrl;
this.removeReqResp(reqresp.requestId);
sleep(500).then(() =>
this.doAsyncFetch(
{
url: changedUrl,
headers: reqresp.getRequestHeadersDict().headers,
rangeRemoved: true,
},
sessions,
),
);
reqresp.payload = null;
return null;
}
}

if (!this.noResponseForStatus(reqresp.status)) {
Expand Down Expand Up @@ -1888,9 +1958,13 @@ class Recorder {
if (reqresp.payload) {
// @ts-expect-error - TS2571 - Object is of type 'unknown'.
console.log(`Committing Finished ${id} - ${reqresp.url}`);

// @ts-expect-error - TS2571 - Object is of type 'unknown'. | TS2571 - Object is of type 'unknown'.
const data = reqresp.toDBRecord(reqresp.payload, pageInfo);
const data = reqresp.toDBRecord(
// @ts-expect-error - TS2571 - Object is of type 'unknown'. | TS2571 - Object is of type 'unknown'.
reqresp.payload,
pageInfo,
this.archiveCookies,
);

if (data) {
// @ts-expect-error - TS2554 - Expected 2 arguments, but got 1.
Expand Down
12 changes: 8 additions & 4 deletions src/requestresponseinfo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ const encoder = new TextEncoder();

// ===========================================================================
class RequestResponseInfo {
extraOpts: Record<string, string>;

// @ts-expect-error - TS7006 - Parameter 'requestId' implicitly has an 'any' type.
constructor(requestId) {
// @ts-expect-error - TS2339 - Property '_created' does not exist on type 'RequestResponseInfo'.
Expand Down Expand Up @@ -70,7 +72,6 @@ class RequestResponseInfo {
// @ts-expect-error - TS2339 - Property 'resourceType' does not exist on type 'RequestResponseInfo'.
this.resourceType = null;

// @ts-expect-error - TS2339 - Property 'extraOpts' does not exist on type 'RequestResponseInfo'.
this.extraOpts = {};
}

Expand Down Expand Up @@ -212,7 +213,7 @@ class RequestResponseInfo {
}

// @ts-expect-error - TS7006 - Parameter 'payload' implicitly has an 'any' type. | TS7006 - Parameter 'pageInfo' implicitly has an 'any' type.
toDBRecord(payload, pageInfo) {
toDBRecord(payload, pageInfo, allowCookies) {
// don't save 304 (todo: turn into 'revisit' style entry?)
// extra check for 206, should already be skipped
if (
Expand Down Expand Up @@ -257,7 +258,11 @@ class RequestResponseInfo {
const cookie = reqHeaders.headers.get("cookie");

if (cookie) {
respHeaders.headersDict["x-wabac-preset-cookie"] = cookie;
if (allowCookies) {
respHeaders.headersDict["x-wabac-preset-cookie"] = cookie;
} else {
reqHeaders.headers.delete("cookie");
}
}

// @ts-expect-error - TS2339 - Property 'url' does not exist on type 'RequestResponseInfo'.
Expand Down Expand Up @@ -312,7 +317,6 @@ class RequestResponseInfo {
mime,
respHeaders: respHeaders.headersDict,
reqHeaders: reqHeaders.headersDict,
// @ts-expect-error - TS2339 - Property 'extraOpts' does not exist on type 'RequestResponseInfo'.
extraOpts: this.extraOpts,
};

Expand Down
Loading
Loading