Skip to content

Commit

Permalink
fix: process images that are nested inside of columns
Browse files Browse the repository at this point in the history
  • Loading branch information
hatton committed Aug 17, 2022
1 parent fe2d623 commit c3dc8ff
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 66 deletions.
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
"notion-download": "node dist/index.js",
"cmdhelp": "ts-node --compiler-options \"{\\\"module\\\": \\\"commonjs\\\"}\" src/index.ts",
"// test out with a private sample notion db": "",
"pull-test": "cross-var rm -rf ./docs/ && ts-node --compiler-options \"{\\\"module\\\": \\\"commonjs\\\"}\" src/index.ts -n %DOCU_NOTION_INTEGRATION_TOKEN% -r %DOCU_NOTION_TEST_ROOT_PAGE% --log-level debug",
"pull-test-tagged": "cross-var rm -rf ./docs/ && ts-node --compiler-options \"{\\\"module\\\": \\\"commonjs\\\"}\" src/index.ts -n %DOCU_NOTION_INTEGRATION_TOKEN% -r %DOCU_NOTION_TEST_ROOT_PAGE% --log-level debug --status-tag test",
"pull-test-outline": "cross-var rm -rf ./docs/ && ts-node --compiler-options \"{\\\"module\\\": \\\"commonjs\\\"}\" src/index.ts -n %DOCU_NOTION_INTEGRATION_TOKEN% -r %DOCU_NOTION_TEST_ROOT_PAGE% --log-level debug",
"// test with a semi-stable/public site:": "",
"pull-sample": "cross-var ts-node --compiler-options \"{\\\"module\\\": \\\"commonjs\\\"}\" src/index.ts -n %DOCU_NOTION_INTEGRATION_TOKEN% -r %DOCU_NOTION_SAMPLE_ROOT_PAGE% -m ./sample --locales en,es,fr,de --log-level verbose",
"pull-sample-with-paths": "cross-var ts-node --compiler-options \"{\\\"module\\\": \\\"commonjs\\\"}\" src/index.ts -n %DOCU_NOTION_INTEGRATION_TOKEN% -r %DOCU_NOTION_SAMPLE_ROOT_PAGE% -m ./sample --img-output-path ./sample_img"
Expand Down
4 changes: 3 additions & 1 deletion src/CustomTranformers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ export function setupCustomTransformers(
(block: ListBlockChildrenResponseResult) =>
notionColumnToMarkdown(notionToMarkdown, notionClient, block)
);

// Note: Pull.ts also adds an image transformer, but has to do that for each
// page so we don't do it here.
}

async function notionColumnListToMarkdown(
Expand All @@ -45,7 +48,6 @@ async function notionColumnListToMarkdown(

return `<div class='notion-row'>\n${columns.join("\n\n")}\n</div>`;
}

async function notionColumnToMarkdown(
notionToMarkdown: NotionToMarkdown,
notionClient: Client,
Expand Down
127 changes: 69 additions & 58 deletions src/images.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,14 @@ import FileType, { FileTypeResult } from "file-type";
import fetch from "node-fetch";
import * as Path from "path";
import { makeImagePersistencePlan } from "./MakeImagePersistencePlan";
import { logDebug, verbose, info } from "./log";
import { ListBlockChildrenResponse } from "@notionhq/client/build/src/api-endpoints";
import { warning, logDebug, verbose, info } from "./log";
import { ListBlockChildrenResponseResult } from "notion-to-md/build/types";

// We several things here:
// 1) copy images locally instead of leaving them in Notion
// 2) change the links to point here
// 3) read the caption and if there are localized images, get those too
// 4) prepare for localized documents, which need a copy of every image

let existingImagesNotSeenYetInPull: string[] = [];
let imageOutputPath = ""; // default to putting in the same directory as the document referring to it.
Expand Down Expand Up @@ -59,22 +65,65 @@ export async function initImageHandling(
}
}

export async function outputImages(
blocks: (
| ListBlockChildrenResponse
| /* not avail in types: BlockObjectResponse so we use any*/ any
)[],
// This is a "custom transformer" function passed to notion-to-markdown
// eslint-disable-next-line @typescript-eslint/require-await
export async function markdownToMDImageTransformer(
block: ListBlockChildrenResponseResult,
fullPathToDirectoryContainingMarkdown: string,
relativePathToThisPage: string
): Promise<string> {
const image = (block as any).image;

await processImageBlock(
image,
fullPathToDirectoryContainingMarkdown,
relativePathToThisPage
);

// just concatenate the caption text parts together
const altText: string = image.caption
// eslint-disable-next-line @typescript-eslint/no-unsafe-return
.map((item: any) => item.plain_text)
.join("");

const href: string =
image.type === "external" ? image.external.url : image.file.url;
return `![${altText}](${href})`;
}

async function processImageBlock(
imageBlock: any,
pathToParentDocument: string,
relativePathToThisPage: string
): Promise<void> {
for (const b of blocks) {
if ("image" in b) {
await processImageBlock(
b,
fullPathToDirectoryContainingMarkdown,
relativePathToThisPage
);
}
logDebug("processImageBlock", JSON.stringify(imageBlock));

// this is broken into all these steps to facilitate unit testing without IO
const imageSet = parseImageBlock(imageBlock);
imageSet.pathToParentDocument = pathToParentDocument;
imageSet.relativePathToParentDocument = relativePathToThisPage;

await readPrimaryImage(imageSet);
makeImagePersistencePlan(imageSet, imageOutputPath, imagePrefix);
await saveImage(imageSet);

// change the src to point to our copy of the image
if ("file" in imageBlock) {
imageBlock.file.url = imageSet.filePathToUseInMarkdown;
} else {
imageBlock.external.url = imageSet.filePathToUseInMarkdown;
}
// put back the simplified caption, stripped of the meta information
if (imageSet.caption) {
imageBlock.caption = [
{
type: "text",
text: { content: imageSet.caption, link: null },
plain_text: imageSet.caption,
},
];
} else {
imageBlock.caption = [];
}
}

Expand Down Expand Up @@ -127,20 +176,20 @@ function writeImageIfNew(path: string, buffer: Buffer) {
fs.createWriteStream(path).write(buffer); // async but we're not waiting
}

export function parseImageBlock(b: any): ImageSet {
export function parseImageBlock(image: any): ImageSet {
const imageSet: ImageSet = {
primaryUrl: "",
caption: "",
localizedUrls: locales.map(l => ({ iso632Code: l, url: "" })),
};

if ("file" in b.image) {
imageSet.primaryUrl = b.image.file.url; // image saved on notion (actually AWS)
if ("file" in image) {
imageSet.primaryUrl = image.file.url; // image saved on notion (actually AWS)
} else {
imageSet.primaryUrl = b.image.external.url; // image still pointing somewhere else. I've see this happen when copying a Google Doc into Notion. Notion kep pointing at the google doc.
imageSet.primaryUrl = image.external.url; // image still pointing somewhere else. I've see this happen when copying a Google Doc into Notion. Notion kep pointing at the google doc.
}

const mergedCaption: string = b.image.caption
const mergedCaption: string = image.caption
// eslint-disable-next-line @typescript-eslint/no-unsafe-return
.map((c: any) => c.plain_text)
.join("");
Expand Down Expand Up @@ -169,44 +218,6 @@ export function parseImageBlock(b: any): ImageSet {
return imageSet;
}

// Download the image if we don't have it, give it a good name, and
// change the src to point to our copy of the image.
async function processImageBlock(
b: any,
pathToParentDocument: string,
relativePathToThisPage: string
): Promise<void> {
logDebug("processImageBlock", JSON.stringify(b));

// this is broken into all these steps to facilitate unit testing without IO
const imageSet = parseImageBlock(b);
imageSet.pathToParentDocument = pathToParentDocument;
imageSet.relativePathToParentDocument = relativePathToThisPage;

await readPrimaryImage(imageSet);
makeImagePersistencePlan(imageSet, imageOutputPath, imagePrefix);
await saveImage(imageSet);

// change the src to point to our copy of the image
if ("file" in b.image) {
b.image.file.url = imageSet.filePathToUseInMarkdown;
} else {
b.image.external.url = imageSet.filePathToUseInMarkdown;
}
// put back the simplified caption, stripped of the meta information
if (imageSet.caption) {
b.image.caption = [
{
type: "text",
text: { content: imageSet.caption, link: null },
plain_text: imageSet.caption,
},
];
} else {
b.image.caption = [];
}
}

function imageWasSeen(path: string) {
existingImagesNotSeenYetInPull = existingImagesNotSeenYetInPull.filter(
p => p !== path
Expand Down
27 changes: 21 additions & 6 deletions src/pull.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,18 @@ import { NotionToMarkdown } from "notion-to-md";
import { HierarchicalNamedLayoutStrategy } from "./HierarchicalNamedLayoutStrategy";
import { LayoutStrategy } from "./LayoutStrategy";
import { initNotionClient, NotionPage, PageType } from "./NotionPage";
import { initImageHandling, cleanupOldImages, outputImages } from "./images";
import {
initImageHandling,
cleanupOldImages,
markdownToMDImageTransformer,
} from "./images";

import { tweakForDocusaurus } from "./DocusaurusTweaks";
import { setupCustomTransformers } from "./CustomTranformers";
import * as Path from "path";
import { error, info, logDebug, verbose, warning } from "./log";
import { convertInternalLinks } from "./links";
import { ListBlockChildrenResponseResult } from "notion-to-md/build/types";

export type Options = {
notionToken: string;
Expand Down Expand Up @@ -164,14 +169,21 @@ async function outputPage(page: NotionPage) {
const relativePathToFolderContainingPage = Path.dirname(
layoutStrategy.getLinkPathForPage(page)
);
await outputImages(
blocks,
directoryContainingMarkdown,
relativePathToFolderContainingPage
);
logDebug("pull", JSON.stringify(blocks));

currentSidebarPosition++;

// we have to set this one up for each page because we need to
// give it two extra parameters that are context for each page
notionToMarkdown.setCustomTransformer(
"image",
(block: ListBlockChildrenResponseResult) =>
markdownToMDImageTransformer(
block,
directoryContainingMarkdown,
relativePathToFolderContainingPage
)
);
const mdBlocks = await notionToMarkdown.blocksToMarkdown(blocks);

// if (page.nameOrTitle.startsWith("Embed")) {
Expand All @@ -187,8 +199,11 @@ async function outputPage(page: NotionPage) {
frontmatter += "---\n";

let markdown = notionToMarkdown.toMarkdownString(mdBlocks);

// Improve: maybe this could be another markdown-to-md "custom transformer"
markdown = convertInternalLinks(markdown, pages, layoutStrategy);

// Improve: maybe this could be another markdown-to-md "custom transformer"
const { body, imports } = tweakForDocusaurus(markdown);
const output = `${frontmatter}\n${imports}\n${body}`;

Expand Down

0 comments on commit c3dc8ff

Please sign in to comment.