-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added options for custom output and image paths.
- Loading branch information
1 parent
3318889
commit 399efd8
Showing
11 changed files
with
2,830 additions
and
415 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
{ | ||
"env": { | ||
"browser": true, | ||
"commonjs": true, | ||
"es2021": true | ||
}, | ||
"extends": "eslint:recommended", | ||
"parserOptions": { | ||
"ecmaVersion": 12 | ||
}, | ||
"rules": {} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,30 +1,35 @@ | ||
#!/usr/bin/env node | ||
'use strict'; | ||
|
||
const program = require('commander'); | ||
const workflow = require('./lib/workflow'); | ||
const packageJson = require('./package.json'); | ||
|
||
program | ||
.version(packageJson.version) | ||
.command('convertLocal [inputDirectory]') | ||
.description('Converts Medium exported html files to markdown from a local directory.') | ||
.option('-d, --drafts', 'Convert drafts too.') | ||
.option('-f, --frontMatter', 'Add front-matter.') | ||
.option('-i, --images', 'Download images in local directory.') | ||
.action(workflow.processAll); | ||
|
||
// Convert from url has been removed. | ||
// Medium posts seem to have updated (random) css classes and html attributes, | ||
// and the reader is unable to extract the article content from the html body. | ||
|
||
// program | ||
// .version(packageJson.version) | ||
// .command('convertUrl [url]') | ||
// .description('Converts Medium post to markdown from its url.') | ||
// .option('-o, --outputDir <>', 'Output directory path.') | ||
// .option('-f, --frontMatter', 'Add front-matter.') | ||
// .option('-i, --images', 'Download images in local directory.') | ||
// .action(workflow.processSingle); | ||
|
||
program.parse(process.argv); | ||
"use strict"; | ||
|
||
const program = require("commander"); | ||
const workflow = require("./lib/workflow"); | ||
const packageJson = require("./package.json"); | ||
|
||
program | ||
.version(packageJson.version) | ||
.command("convertLocal [inputDirectory]") | ||
.description( | ||
"Converts Medium exported html files to markdown from a local directory." | ||
) | ||
.option("-d, --drafts", "Convert drafts too.") | ||
.option("-f, --frontMatter", "Add front-matter.") | ||
.option("-i, --images", "Download images at default path.") | ||
.option("-op, --path <path>", "Custom path for saving markdown files.") | ||
.option("-ip, --img-path <imgpath>", "Custom path for downloading images.") | ||
.action(workflow.processAll); | ||
|
||
// Convert from url has been removed. | ||
// Medium posts seem to have updated (random) css classes and html attributes, | ||
// and the reader is unable to extract the article content from the html body. | ||
|
||
// program | ||
// .version(packageJson.version) | ||
// .command('convertUrl [url]') | ||
// .description('Converts Medium post to markdown from its url.') | ||
// .option('-o, --outputDir <>', 'Output directory path.') | ||
// .option('-f, --frontMatter', 'Add front-matter.') | ||
// .option('-i, --images', 'Download images in local directory.') | ||
// .action(workflow.processSingle); | ||
|
||
// eslint-disable-next-line no-undef | ||
program.parse(process.argv); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,74 +1,78 @@ | ||
const TurndownService = require('turndown'); | ||
const path = require('path'); | ||
const url = require('url'); | ||
const utils = require('./utils'); | ||
|
||
const config = { | ||
headingStyle: "atx", | ||
hr: "---", | ||
bulletListMarker: "*", | ||
codeBlockStyle: "fenced", | ||
fence: "```", | ||
emDelimiter: "_", | ||
strongDelimiter: "**", | ||
linkStyle: "inlined", | ||
linkReferenceStyle: "full" | ||
}; | ||
|
||
const td = new TurndownService(config); | ||
let images = []; | ||
let downloadImages = false; | ||
|
||
// parsing figure and figcaption for markdown | ||
td.addRule('figure', { | ||
filter: 'figure', | ||
replacement: function (content) { | ||
// This is a hack based on string parsing; | ||
// ugly and error prone. | ||
// Need to find a better way to do this! | ||
|
||
const lines = content.split('\n'); | ||
const imageStr = "![](https://cdn-images" | ||
const imageIndex = lines.findIndex((el)=>{return el.includes(imageStr) }); | ||
let element = lines[imageIndex]; | ||
|
||
if (downloadImages === true && element) { | ||
const imgSrc = element.substring(4, element.length - 1); | ||
|
||
// This check is important as Medium renders embeds (YouTube, etc.) also as figures. | ||
if (utils.isUrl(imgSrc)) { | ||
const imgFileName = getImageName(imgSrc); | ||
const localImgPath = path.join('img', imgFileName); | ||
element = "![](" + localImgPath + ")"; | ||
images.push({ | ||
src: imgSrc, | ||
name: imgFileName | ||
}); | ||
} | ||
} | ||
|
||
if (lines[4]) { | ||
element = [element.slice(0, 2), lines[4], element.slice(2)].join(''); | ||
} | ||
|
||
return (element || '') + '\n' + (lines[4] || ''); | ||
} | ||
}) | ||
|
||
const convert = function (htmlStr, downloadImagesFlag) { | ||
downloadImages = downloadImagesFlag; | ||
images = []; | ||
return { md: td.turndown(htmlStr), images }; | ||
} | ||
|
||
const getImageName = function (imgSrc) { | ||
const imgUrl = url.parse(imgSrc); | ||
let imgFileName = path.basename(imgUrl.pathname); | ||
const parsed = path.parse(imgFileName); | ||
const name = parsed.name.replace(/[^a-zA-Z0-9]/g, '__'); | ||
const ext = parsed.ext ? parsed.ext : ".jpg"; // if no extension, add .jpg | ||
imgFileName = name + ext; | ||
return imgFileName; | ||
} | ||
|
||
module.exports = convert; | ||
const TurndownService = require("turndown"); | ||
const path = require("path"); | ||
const url = require("url"); | ||
const utils = require("./utils"); | ||
|
||
const config = { | ||
headingStyle: "atx", | ||
hr: "---", | ||
bulletListMarker: "*", | ||
codeBlockStyle: "fenced", | ||
fence: "```", | ||
emDelimiter: "_", | ||
strongDelimiter: "**", | ||
linkStyle: "inlined", | ||
linkReferenceStyle: "full", | ||
}; | ||
|
||
const td = new TurndownService(config); | ||
let images = []; | ||
let downloadImages = false; | ||
let imgPath = "img"; | ||
|
||
// Parsing figure and figcaption for markdown. | ||
td.addRule("figure", { | ||
filter: "figure", | ||
replacement: function (content) { | ||
// This is a hack based on string parsing; | ||
// ugly and error prone. | ||
// Need to find a better way to do this! | ||
|
||
const lines = content.split("\n"); | ||
const imageStr = "![](https://cdn-images"; | ||
const imageIndex = lines.findIndex((el) => { | ||
return el.includes(imageStr); | ||
}); | ||
let element = lines[imageIndex]; | ||
|
||
if (downloadImages === true && element) { | ||
const imgSrc = element.substring(4, element.length - 1); | ||
|
||
// This check is important as Medium renders embeds (YouTube, etc.) also as figures. | ||
if (utils.isUrl(imgSrc)) { | ||
const imgFileName = getImageName(imgSrc); | ||
const localImgPath = path.join(imgPath, imgFileName); | ||
element = "![](" + localImgPath + ")"; | ||
images.push({ | ||
src: imgSrc, | ||
name: imgFileName, | ||
}); | ||
} | ||
} | ||
|
||
if (lines[4]) { | ||
element = [element.slice(0, 2), lines[4], element.slice(2)].join(""); | ||
} | ||
|
||
return (element || "") + "\n" + (lines[4] || ""); | ||
}, | ||
}); | ||
|
||
const convert = function (htmlStr, downloadImagesFlag, imagesPath) { | ||
downloadImages = downloadImagesFlag; | ||
images = []; | ||
imgPath = imagesPath; | ||
return { md: td.turndown(htmlStr), images }; | ||
}; | ||
|
||
const getImageName = function (imgSrc) { | ||
const imgUrl = url.parse(imgSrc); | ||
let imgFileName = path.basename(imgUrl.pathname); | ||
const parsed = path.parse(imgFileName); | ||
const name = parsed.name.replace(/[^a-zA-Z0-9]/g, "__"); | ||
const ext = parsed.ext ? parsed.ext : ".jpg"; // If no extension, add .jpg. | ||
imgFileName = name + ext; | ||
return imgFileName; | ||
}; | ||
|
||
module.exports = convert; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,16 @@ | ||
const fs = require('fs'); | ||
const fetch = require('node-fetch'); | ||
const fs = require("fs"); | ||
const fetch = require("node-fetch"); | ||
|
||
async function downloadImage(url, localPath) { | ||
await fetch(url, { method: 'HEAD' }); | ||
await fetch(url, { method: "HEAD" }); | ||
|
||
const response = await fetch(url); | ||
const response = await fetch(url); | ||
|
||
return await new Promise(resolve => response.body | ||
.pipe(fs.createWriteStream(localPath)) | ||
.on('close', resolve(localPath))); | ||
return await new Promise((resolve) => | ||
response.body | ||
.pipe(fs.createWriteStream(localPath)) | ||
.on("close", resolve(localPath)) | ||
); | ||
} | ||
|
||
module.exports = downloadImage; | ||
module.exports = downloadImage; |
Oops, something went wrong.