Skip to content

Commit

Permalink
Added new CLI option
Browse files Browse the repository at this point in the history
  • Loading branch information
Balearica committed Dec 12, 2024
1 parent 3450e13 commit 0cb244b
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 1 deletion.
11 changes: 10 additions & 1 deletion cli/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ scribe.opt.saveDebugImages = debugMode;
* @param {Array<string>} [params.list]
* @param {boolean} [params.robustConfMode]
* @param {boolean} [params.printConf]
* @param {boolean} [params.hocr]
* @param {"eval" | "ebook" | "proof" | "invis"} [params.overlayMode]
* @param {number} [params.workerN]
*/
Expand Down Expand Up @@ -89,6 +90,11 @@ async function main(func, params) {

const outputPath = `${outputDir}/${path.basename(backgroundArg).replace(/\.\w{1,5}$/i, `${outputSuffix}.pdf`)}`;
await scribe.download('pdf', outputPath);

if (params.hocr) {
const outputPathHocr = `${outputDir}/${path.basename(backgroundArg).replace(/\.\w{1,5}$/i, '.hocr')}`;
await scribe.download('hocr', outputPathHocr);
}
}

if (debugMode) {
Expand Down Expand Up @@ -155,9 +161,12 @@ export const overlay = async (pdfFile, ocrFile, outputDir, options) => (main('ov
* @param {string} pdfFile - Path to PDF file.
* @param {Object} options
* @param {"eval" | "ebook" | "proof" | "invis"} [options.overlayMode]
* @param {boolean} [options.hocr]
* @param {number} [options.workers]
*/
export const recognize = async (pdfFile, options) => (main('recognize', { pdfFile, overlayMode: options?.overlayMode || 'invis', workerN: options?.workers }));
export const recognize = async (pdfFile, options) => (main('recognize', {
pdfFile, overlayMode: options?.overlayMode || 'invis', workerN: options?.workers, hocr: options?.hocr,
}));

/**
*
Expand Down
1 change: 1 addition & 0 deletions cli/scribe.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ program
.argument('<pdf_file>', 'Input PDF file.')
.description('Recognize text in PDF file using internal OCR engine.')
.option('-v, --vis', 'Print OCR text visibly over provided PDF file with colors coded by confidence.')
.option('-h, --hocr', 'Output .hocr intermediate data in addition to .pdf.')
.option('-w, --workers <number>', 'Number of workers to use. Default is up to 8.')
.action(recognizeCLI);

Expand Down

0 comments on commit 0cb244b

Please sign in to comment.