Skip to content

Commit

Permalink
Add base64 image support
Browse files Browse the repository at this point in the history
  • Loading branch information
jeromewu committed Jul 2, 2019
1 parent c9444f3 commit 5dd382a
Show file tree
Hide file tree
Showing 8 changed files with 42 additions and 3 deletions.
2 changes: 2 additions & 0 deletions docs/image-format.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ On a browser, an image can be:
- an `img`, `video`, or `canvas` element
- a `File` object (from a file `<input>`)
- a path or URL to an accessible image
- a base64 encoded image fits `data:image\/([a-zA-Z]*);base64,([^"]*)` regexp

In Node.js, an image can be
- a path to a local image
- a Buffer storing binary image
- a base64 encoded image fits `data:image\/([a-zA-Z]*);base64,([^"]*)` regexp
1 change: 1 addition & 0 deletions src/browser/b64toU8Array.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
module.exports = s => new Uint8Array(atob(s).split('').map(c => c.charCodeAt(0)));
7 changes: 7 additions & 0 deletions src/browser/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
const check = require('check-types');
const resolveURL = require('resolve-url');
const axios = require('axios');
const b64toU8Array = require('./b64toU8Array');
const { defaultOptions } = require('../common/options');
const { version } = require('../../package.json');

Expand Down Expand Up @@ -38,6 +39,7 @@ const readFromBlobOrFile = (blob, res) => {
* @access private
* @param {string, object} image - image source, supported formats:
* string: URL string, can be relative path
* string: base64 image
* img HTMLElement: extract image source from src attribute
* video HTMLElement: extract image source from poster attribute
* canvas HTMLElement: extract image data by converting to Blob
Expand All @@ -46,6 +48,11 @@ const readFromBlobOrFile = (blob, res) => {
*/
const loadImage = (image) => {
if (check.string(image)) {
// Base64 Image
if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) {
return Promise.resolve(b64toU8Array(image.split(',')[1]));
}
// Image URL
return axios.get(resolveURL(image), {
responseType: 'arraybuffer',
})
Expand Down
3 changes: 2 additions & 1 deletion src/browser/worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

const check = require('check-types');
const workerUtils = require('../common/workerUtils');
const b64toU8Array = require('./b64toU8Array');

/*
* register message handler
Expand Down Expand Up @@ -42,7 +43,7 @@ workerUtils.setAdapter({
}
return global.TesseractCore;
},
b64toU8Array: s => new Uint8Array(atob(s).split('').map(c => c.charCodeAt(0))),
b64toU8Array,
writeFile: (path, data, type) => {
postMessage({
jobId: 'Download',
Expand Down
1 change: 1 addition & 0 deletions src/node/b64toU8Array.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
module.exports = s => Buffer.from(s, 'base64');
6 changes: 6 additions & 0 deletions src/node/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ const axios = require('axios');
const isURL = require('is-url');
const { fork } = require('child_process');
const path = require('path');
const b64toU8Array = require('./b64toU8Array');
const { defaultOptions } = require('../common/options');

const readFile = util.promisify(fs.readFile);
Expand All @@ -25,6 +26,7 @@ const readFile = util.promisify(fs.readFile);
* @access public
* @param {string} image - image source, supported formats:
* string: URL string or file path
* string: base64 image
* buffer: image buffer
* @returns {array} binary image in array format
*/
Expand All @@ -36,6 +38,10 @@ const loadImage = (image) => {
.then(resp => resp.data);
}

if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) {
return Promise.resolve(b64toU8Array(image.split(',')[1]));
}

if (Buffer.isBuffer(image)) {
return Promise.resolve(image);
}
Expand Down
3 changes: 2 additions & 1 deletion src/node/worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

const check = require('check-types');
const workerUtils = require('../common/workerUtils');
const b64toU8Array = require('./b64toU8Array');

let TesseractCore = null;

Expand All @@ -33,7 +34,7 @@ workerUtils.setAdapter({
}
return TesseractCore;
},
b64toU8Array: s => Buffer.from(s, 'base64'),
b64toU8Array,
writeFile: (path, data) => {
const fs = require('fs');
fs.writeFile(path, data, (err) => {
Expand Down
22 changes: 21 additions & 1 deletion tests/recognize.test.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
const { TesseractWorker } = Tesseract;

const isBrowser = typeof window !== 'undefined' && typeof window.document !== 'undefined';
const SIMPLE_PNG = '';
const SIMPLE_JPG = '';
const IMAGE_PATH = 'http://localhost:3000/tests/assets/images';
const SIMPLE_TEXT = 'Tesseract.js\n';
const COMSIC_TEXT = 'HellO World\nfrom beyond\nthe Cosmic Void\n';
Expand All @@ -23,6 +25,24 @@ const getWorker = options => (
);

describe('recognize()', () => {
describe('should recognize base64 image', () => {
[
{ format: 'png', image: SIMPLE_PNG, ans: SIMPLE_TEXT },
{ format: 'jpg', image: SIMPLE_JPG, ans: SIMPLE_TEXT },
].forEach(({ format, image, ans }) => (
it(`recongize ${format} in base64`, (done) => {
const worker = getWorker();
worker
.recognize(image)
.then(({ text }) => {
expect(text).to.be(ans);
worker.terminate();
done();
});
}).timeout(30000)
));
});

describe('should recognize different langs', () => {
[
{ name: 'chinese.png', lang: 'chi_tra', ans: CHINESE_TEXT },
Expand All @@ -39,7 +59,7 @@ describe('recognize()', () => {
}).timeout(30000)
));
});

describe('should read bmp, jpg, png and pbm format images', () => {
FORMATS.forEach(format => (
it(`support ${format} format`, (done) => {
Expand Down

0 comments on commit 5dd382a

Please sign in to comment.