Skip to content

Commit

Permalink
Merge pull request #116 from sillsdev/DownloadVideo
Browse files Browse the repository at this point in the history
fix: Download Notion-hosted videos so the links don't expire (#110)
  • Loading branch information
andrew-polk authored Dec 17, 2024
2 parents f1176c5 + ea4e865 commit b8e6e9c
Show file tree
Hide file tree
Showing 5 changed files with 174 additions and 58 deletions.
23 changes: 23 additions & 0 deletions src/assets.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import * as fs from "fs-extra";
import * as Path from "path";
import { verbose } from "./log";

export enum AssetType {
Image = "image",
Video = "video",
}

export function writeAsset(path: string, buffer: Buffer): void {
// Note: it's tempting to not spend time writing this out if we already have
// it from a previous run. But we don't really know it's the same. A) it
// could just have the same name, B) it could have been previously
// unlocalized and thus filled with a copy of the primary language image
// while and now is localized.
if (fs.pathExistsSync(path)) {
verbose("Replacing asset " + path);
} else {
verbose("Adding asset " + path);
fs.mkdirsSync(Path.dirname(path));
}
fs.createWriteStream(path).write(buffer); // async but we're not waiting
}
33 changes: 10 additions & 23 deletions src/images.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import * as fs from "fs-extra";
import FileType, { FileTypeResult } from "file-type";
import axios from "axios";
import * as Path from "path";
import { makeImagePersistencePlan } from "./MakeImagePersistencePlan";
import { warning, logDebug, verbose, info } from "./log";
import { ListBlockChildrenResponseResult } from "notion-to-md/build/types";
Expand All @@ -10,6 +9,7 @@ import {
IDocuNotionContextPageInfo,
IPlugin,
} from "./plugins/pluginTypes";
import { writeAsset } from "./assets";

// We handle several things here:
// 1) copy images locally instead of leaving them in Notion
Expand Down Expand Up @@ -158,11 +158,13 @@ async function readPrimaryImage(imageSet: ImageSet) {
}

async function saveImage(imageSet: ImageSet): Promise<void> {
writeImageIfNew(imageSet.primaryFileOutputPath!, imageSet.primaryBuffer!);
const path = imageSet.primaryFileOutputPath!;
imageWasSeen(path);
writeAsset(path, imageSet.primaryBuffer!);

for (const localizedImage of imageSet.localizedUrls) {
let buffer = imageSet.primaryBuffer!;
// if we have a urls for the localized screenshot, download it
// if we have a url for the localized screenshot, download it
if (localizedImage?.url.length > 0) {
verbose(`Retrieving ${localizedImage.iso632Code} version...`);
const response = await fetch(localizedImage.url);
Expand All @@ -180,30 +182,15 @@ async function saveImage(imageSet: ImageSet): Promise<void> {
imageSet.pageInfo!.relativeFilePathToFolderContainingPage
}`;

writeImageIfNew(
(directory + "/" + imageSet.outputFileName!).replaceAll("//", "/"),
buffer
const newPath = (directory + "/" + imageSet.outputFileName!).replaceAll(
"//",
"/"
);
imageWasSeen(newPath);
writeAsset(newPath, buffer);
}
}

function writeImageIfNew(path: string, buffer: Buffer) {
imageWasSeen(path);

// Note: it's tempting to not spend time writing this out if we already have
// it from a previous run. But we don't really know it's the same. A) it
// could just have the same name, B) it could have been previously
// unlocalized and thus filled with a copy of the primary language image
// while and now is localized.
if (fs.pathExistsSync(path)) {
verbose("Replacing image " + path);
} else {
verbose("Adding image " + path);
fs.mkdirsSync(Path.dirname(path));
}
fs.createWriteStream(path).write(buffer); // async but we're not waiting
}

export function parseImageBlock(image: any): ImageSet {
if (!locales) throw Error("Did you call initImageHandling()?");
const imageSet: ImageSet = {
Expand Down
60 changes: 55 additions & 5 deletions src/plugins/VideoTransformer.spec.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,28 @@
import * as fs from "fs-extra";
import { setLogLevel } from "../log";
import { NotionBlock } from "../types";
import { standardVideoTransformer } from "./VideoTransformer";
import { blocksToMarkdown } from "./pluginTestRun";
import { blocksToMarkdown, kTemporaryTestDirectory } from "./pluginTestRun";

beforeAll(async () => {
try {
if (await fs.pathExists(kTemporaryTestDirectory)) {
await fs.emptyDir(kTemporaryTestDirectory);
} else {
await fs.mkdirp(kTemporaryTestDirectory);
}
} catch (err) {
console.error("Error in beforeAll:", err);
}
});

afterAll(async () => {
try {
await fs.remove(kTemporaryTestDirectory);
} catch (err) {
console.error("Error in afterAll:", err);
}
});

test("youtube embedded", async () => {
const config = { plugins: [standardVideoTransformer] };
Expand Down Expand Up @@ -89,6 +110,9 @@ test("video link, not embedded", async () => {
test("direct upload to to Notion (embedded)", async () => {
setLogLevel("verbose");
const config = { plugins: [standardVideoTransformer] };

const fileName1 = "first_video.mp4";
const fileName2 = "second_video.mp4";
const result = await blocksToMarkdown(config, [
{
object: "block",
Expand All @@ -103,13 +127,39 @@ test("direct upload to to Notion (embedded)", async () => {
caption: [],
type: "file",
file: {
url: "https://s3.us-west-2.amazonaws.com/secure.notion-static.com/f6bc4746-011e-2124-86ca-ed4337d70891/people_fre_motionAsset_p3.mp4?X-Blah-blah",
url: `https://s3.us-west-2.amazonaws.com/secure.notion-static.com/f6bc4746-011e-2124-86ca-ed4337d70891/${fileName1}?X-Blah-blah`,
},
},
} as unknown as NotionBlock,
{
object: "block",
id: "12f7db3b-4412-4be9-a3f7-6ac423fee94b",
parent: {
type: "page_id",
page_id: "edaffeb2-ece8-4d44-976f-351e6b5757bb",
},

type: "video",
video: {
caption: [],
type: "file",
file: {
url: `https://s3.us-west-2.amazonaws.com/secure.notion-static.com/f6bc4746-011e-2124-86ca-ed4337d70891/${fileName2}?X-Blah-blah`,
},
},
} as unknown as NotionBlock,
]);

expect(result).toContain(`import ReactPlayer from "react-player";`);
expect(result).toContain(
`<ReactPlayer controls url="https://s3.us-west-2.amazonaws.com/secure.notion-static.com/f6bc4746-011e-2124-86ca-ed4337d70891/people_fre_motionAsset_p3.mp4?X-Blah-blah" />`
);
expect(result).toContain(`import video1 from "./${fileName1}";`);
expect(result).toContain(`import video2 from "./${fileName2}";`);
expect(result).toContain(`<ReactPlayer controls url={video1} />`);
expect(result).toContain(`<ReactPlayer controls url={video2} />`);

// Wait half a second for the files to be written
await new Promise(resolve => setTimeout(resolve, 500));

// We should have actually created files in "tempTestFileDir/"
expect(await fs.pathExists("tempTestFileDir/" + fileName1)).toBe(true);
expect(await fs.pathExists("tempTestFileDir/" + fileName2)).toBe(true);
});
112 changes: 83 additions & 29 deletions src/plugins/VideoTransformer.ts
Original file line number Diff line number Diff line change
@@ -1,42 +1,96 @@
import * as Path from "path";
import { VideoBlockObjectResponse } from "@notionhq/client/build/src/api-endpoints";
import { ListBlockChildrenResponseResult } from "notion-to-md/build/types";
import { IDocuNotionContext, IPlugin } from "./pluginTypes";
import { warning } from "../log";
import { NotionBlock } from "../types";
import { writeAsset } from "../assets";

export const standardVideoTransformer: IPlugin = {
name: "video",
notionToMarkdownTransforms: [
{
type: "video",
getStringFromBlock: (
context: IDocuNotionContext,
block: NotionBlock
): string => {
const video = (block as VideoBlockObjectResponse).video;
let url = "";
switch (video.type) {
case "external":
url = video.external.url;
break;
case "file":
url = video.file.url;
break;
default:
// video.type can only be "external" or "file" as of the writing of this code, so typescript
// isn't happy trying to turn video.type into a string. But this default in our switch is
// just attempting some future-proofing. Thus the strange typing/stringifying below.
warning(
`[standardVideoTransformer] Found Notion "video" block with type ${JSON.stringify(
(video as any).type
)}. The best docu-notion can do for now is ignore it.`
);
return "";
break;
}

context.imports.push(`import ReactPlayer from "react-player";`);
return `<ReactPlayer controls url="${url}" />`;
},
getStringFromBlock: (context: IDocuNotionContext, block: NotionBlock) =>
markdownToMDVideoTransformer(block, context),
},
],
};

async function markdownToMDVideoTransformer(
block: ListBlockChildrenResponseResult,
context: IDocuNotionContext
): Promise<string> {
const videoBlock = block as VideoBlockObjectResponse;
const video = videoBlock.video;
let url = "";
switch (video.type) {
case "external":
url = `"${video.external.url}"`;
break;
case "file":
// The url we get for a Notion-hosted asset expires after an hour, so we have to download it locally.
url = await downloadVideoAndConvertUrl(
context,
video.file.url,
videoBlock.id
);
break;
default:
// video.type can only be "external" or "file" as of the writing of this code, so typescript
// isn't happy trying to turn video.type into a string. But this default in our switch is
// just attempting some future-proofing. Thus the strange typing/stringifying below.
warning(
`[standardVideoTransformer] Found Notion "video" block with type ${JSON.stringify(
(video as any).type
)}. The best docu-notion can do for now is ignore it.`
);
return "";
}

context.imports.push(`import ReactPlayer from "react-player";`);
return `<ReactPlayer controls url=${url} />`;
}

// ENHANCE: One day, we may want to allow for options of where to place the files, how
// to name them, etc. Or we could at least follow the image options.
// But for now, I'm just trying to fix the bug that Notion-hosted videos don't work at all.
async function downloadVideoAndConvertUrl(
context: IDocuNotionContext,
notionVideoUrl: string,
blockId: string
): Promise<string> {
// Get the file name from the url. Ignore query parameters and fragments.
let newFileName = notionVideoUrl.split("?")[0].split("#")[0].split("/").pop();

if (!newFileName) {
// If something went wrong, fall back to the block ID.
// But at least try to get the extension from the url.
const extension = notionVideoUrl
.split("?")[0]
.split("#")[0]
.split(".")
.pop();
newFileName = blockId + (extension ? "." + extension : "");
}

const newPath = Path.posix.join(
context.pageInfo.directoryContainingMarkdown,
newFileName
);

const response = await fetch(notionVideoUrl);
const arrayBuffer = await response.arrayBuffer();
const buffer = Buffer.from(arrayBuffer);
writeAsset(newPath, buffer);

// Add an import statement for the video file.
// Otherwise, the docusaurus build won't include the video file in the build.
const countVideoImports = context.imports.filter(i => {
return /import video\d+/.exec(i);
}).length;
const importName = `video${countVideoImports + 1}`;
context.imports.push(`import ${importName} from "./${newFileName}";`);

return `{${importName}}`;
}
4 changes: 3 additions & 1 deletion src/plugins/pluginTestRun.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import { NotionBlock } from "../types";
import { convertInternalUrl } from "./internalLinks";
import { numberChildrenIfNumberedList } from "../pull";

export const kTemporaryTestDirectory = "tempTestFileDir";

export async function blocksToMarkdown(
config: IDocuNotionConfig,
blocks: NotionBlock[],
Expand Down Expand Up @@ -49,7 +51,7 @@ export async function blocksToMarkdown(

//TODO might be needed for some tests, e.g. the image transformer...
pageInfo: {
directoryContainingMarkdown: "not yet",
directoryContainingMarkdown: kTemporaryTestDirectory,
relativeFilePathToFolderContainingPage: "not yet",
slug: "not yet",
},
Expand Down

0 comments on commit b8e6e9c

Please sign in to comment.