From 1f9cdc91d8b752afd812a2aaf92f6bce792a9cd4 Mon Sep 17 00:00:00 2001 From: Christopher Jeffrey Date: Sun, 4 Feb 2024 11:47:29 -0500 Subject: [PATCH 1/6] Add another sanity check for Buffer#write --- index.js | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/index.js b/index.js index cbe4c51..0945910 100644 --- a/index.js +++ b/index.js @@ -897,6 +897,10 @@ function ucs2Write (buf, string, offset, length) { } Buffer.prototype.write = function write (string, offset, length, encoding) { + if (typeof string !== 'string') { + throw new TypeError('"string" argument must be a string') + } + // Buffer#write(string) if (offset === undefined) { encoding = 'utf8' @@ -926,7 +930,7 @@ Buffer.prototype.write = function write (string, offset, length, encoding) { const remaining = this.length - offset if (length === undefined || length > remaining) length = remaining - if ((string.length > 0 && (length < 0 || offset < 0)) || offset > this.length) { + if (length < 0 || offset < 0 || offset > this.length) { throw new RangeError('Attempt to write outside buffer bounds') } From 14af73f428f11dd88fe2d05f816cf9dbff4ba493 Mon Sep 17 00:00:00 2001 From: Christopher Jeffrey Date: Sun, 4 Feb 2024 11:49:03 -0500 Subject: [PATCH 2/6] Simplify hexWrite --- index.js | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/index.js b/index.js index 0945910..67252e7 100644 --- a/index.js +++ b/index.js @@ -847,21 +847,10 @@ Buffer.prototype.lastIndexOf = function lastIndexOf (val, byteOffset, encoding) } function hexWrite (buf, string, offset, length) { - offset = Number(offset) || 0 - const remaining = buf.length - offset - if (!length) { - length = remaining - } else { - length = Number(length) - if (length > remaining) { - length = remaining - } - } - - const strLen = string.length + const bytes = string.length >>> 1 - if (length > (strLen >>> 1)) { - length = strLen >>> 1 + if (length > bytes) { + length = bytes } for (let i = 0; i < length; ++i) { From b5742b6eae2f3e65e0c5d95d0e6bcf6b279975e6 Mon Sep 17 00:00:00 2001 From: Christopher Jeffrey Date: Sun, 4 Feb 2024 11:50:44 -0500 Subject: [PATCH 3/6] Optimize asciiWrite --- index.js | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/index.js b/index.js index 67252e7..7479a66 100644 --- a/index.js +++ b/index.js @@ -874,7 +874,15 @@ function utf8Write (buf, string, offset, length) { } function asciiWrite (buf, string, offset, length) { - return blitBuffer(asciiToBytes(string), buf, offset, length) + if (length > string.length) { + length = string.length + } + + for (let i = 0; i < length; i++) { + buf[offset + i] = string.charCodeAt(i) + } + + return length } function base64Write (buf, string, offset, length) { @@ -2049,15 +2057,6 @@ function utf8ToBytes (string, units) { return bytes } -function asciiToBytes (str) { - const byteArray = [] - for (let i = 0; i < str.length; ++i) { - // Node's code seems to be doing this and not & 0x7F.. - byteArray.push(str.charCodeAt(i) & 0xFF) - } - return byteArray -} - function utf16leToBytes (str, units) { let c, hi, lo const byteArray = [] From 922b89e5952ba86cf43e6440c89603ae0dcecae6 Mon Sep 17 00:00:00 2001 From: Christopher Jeffrey Date: Sun, 4 Feb 2024 11:54:18 -0500 Subject: [PATCH 4/6] Optimize ucs2Write --- index.js | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/index.js b/index.js index 7479a66..ee5a223 100644 --- a/index.js +++ b/index.js @@ -890,7 +890,20 @@ function base64Write (buf, string, offset, length) { } function ucs2Write (buf, string, offset, length) { - return blitBuffer(utf16leToBytes(string, buf.length - offset), buf, offset, length) + length >>>= 1 + + if (length > string.length) { + length = string.length + } + + for (let i = 0; i < length; i++) { + const ch = string.charCodeAt(i) + + buf[offset + i * 2 + 0] = ch >> 0 + buf[offset + i * 2 + 1] = ch >> 8 + } + + return length * 2 } Buffer.prototype.write = function write (string, offset, length, encoding) { @@ -2057,22 +2070,6 @@ function utf8ToBytes (string, units) { return bytes } -function utf16leToBytes (str, units) { - let c, hi, lo - const byteArray = [] - for (let i = 0; i < str.length; ++i) { - if ((units -= 2) < 0) break - - c = str.charCodeAt(i) - hi = c >> 8 - lo = c % 256 - byteArray.push(lo) - byteArray.push(hi) - } - - return byteArray -} - function base64ToBytes (str) { return base64.toByteArray(base64clean(str)) } From 39d2fa96318b4e812beb346f18a3bdfaece75eb5 Mon Sep 17 00:00:00 2001 From: Christopher Jeffrey Date: Sun, 4 Feb 2024 12:04:51 -0500 Subject: [PATCH 5/6] Optimize utf8Write --- index.js | 228 +++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 145 insertions(+), 83 deletions(-) diff --git a/index.js b/index.js index ee5a223..4c20d06 100644 --- a/index.js +++ b/index.js @@ -480,7 +480,7 @@ function byteLength (string, encoding) { return len case 'utf8': case 'utf-8': - return utf8ToBytes(string).length + return utf8ByteLength(string) case 'ucs2': case 'ucs-2': case 'utf16le': @@ -492,7 +492,7 @@ function byteLength (string, encoding) { return base64ToBytes(string).length default: if (loweredCase) { - return mustMatch ? -1 : utf8ToBytes(string).length // assume utf8 + return mustMatch ? -1 : utf8ByteLength(string) // assume utf8 } encoding = ('' + encoding).toLowerCase() loweredCase = true @@ -870,7 +870,141 @@ function hexWrite (buf, string, offset, length) { } function utf8Write (buf, string, offset, length) { - return blitBuffer(utf8ToBytes(string, buf.length - offset), buf, offset, length) + let remaining = length + let leadSurrogate = 0 + let pos = offset + + for (let i = 0; i < string.length; i++) { + let codePoint = string.charCodeAt(i) + + // is surrogate component + if (codePoint > 0xd7ff && codePoint < 0xe000) { + // last char was a lead + if (!leadSurrogate) { + // no lead yet + if (codePoint > 0xdbff) { + // unexpected trail + if (remaining >= 3) pos = writeInvalid(buf, pos) + remaining -= 3 + continue + } else if (i + 1 === string.length) { + // unpaired lead + if (remaining >= 3) pos = writeInvalid(buf, pos) + remaining -= 3 + continue + } + + // valid lead + leadSurrogate = codePoint + + continue + } + + // 2 leads in a row + if (codePoint < 0xdc00) { + if (remaining >= 3) pos = writeInvalid(buf, pos) + remaining -= 3 + leadSurrogate = codePoint + continue + } + + // valid surrogate pair + codePoint -= 0xdc00 + codePoint |= (leadSurrogate - 0xd800) << 10 + codePoint += 0x10000 + } else if (leadSurrogate) { + // valid bmp char, but last char was a lead + if (remaining >= 3) pos = writeInvalid(buf, pos) + remaining -= 3 + } + + leadSurrogate = 0 + + // encode utf8 + if (codePoint < 0x80) { + if (remaining < 1) break + buf[pos++] = codePoint + remaining -= 1 + } else if (codePoint < 0x800) { + if (remaining < 2) break + buf[pos++] = (codePoint >> 6) | 0xc0 + buf[pos++] = (codePoint & 0x3f) | 0x80 + remaining -= 2 + } else if (codePoint < 0x10000) { + if (remaining < 3) break + buf[pos++] = (codePoint >> 12) | 0xe0 + buf[pos++] = ((codePoint >> 6) & 0x3f) | 0x80 + buf[pos++] = (codePoint & 0x3f) | 0x80 + remaining -= 3 + } else if (codePoint < 0x110000) { + if (remaining < 4) break + buf[pos++] = (codePoint >> 18) | 0xf0 + buf[pos++] = ((codePoint >> 12) & 0x3f) | 0x80 + buf[pos++] = ((codePoint >> 6) & 0x3f) | 0x80 + buf[pos++] = (codePoint & 0x3f) | 0x80 + remaining -= 4 + } else { + throw new Error('Invalid code point') + } + } + + return pos - offset +} + +function utf8ByteLength (string) { + let leadSurrogate = 0 + let size = 0 + + for (let i = 0; i < string.length; i++) { + let codePoint = string.charCodeAt(i) + + // is surrogate component + if (codePoint > 0xd7ff && codePoint < 0xe000) { + // last char was a lead + if (!leadSurrogate) { + // no lead yet + if (codePoint > 0xdbff) { + // unexpected trail + size += 3 + continue + } else if (i + 1 === string.length) { + // unpaired lead + size += 3 + continue + } + + // valid lead + leadSurrogate = codePoint + + continue + } + + // 2 leads in a row + if (codePoint < 0xdc00) { + size += 3 + leadSurrogate = codePoint + continue + } + + // valid surrogate pair + codePoint -= 0xdc00 + codePoint |= (leadSurrogate - 0xd800) << 10 + codePoint += 0x10000 + } else if (leadSurrogate) { + // valid bmp char, but last char was a lead + size += 3 + } + + leadSurrogate = 0 + + // encode utf8 + size += 1 + size += (codePoint >= 0x80) | 0 + size += (codePoint >= 0x800) | 0 + size += (codePoint >= 0x10000) | 0 + } + + return size } function asciiWrite (buf, string, offset, length) { @@ -1990,90 +2124,18 @@ function base64clean (str) { return str } -function utf8ToBytes (string, units) { - units = units || Infinity - let codePoint - const length = string.length - let leadSurrogate = null - const bytes = [] - - for (let i = 0; i < length; ++i) { - codePoint = string.charCodeAt(i) - - // is surrogate component - if (codePoint > 0xD7FF && codePoint < 0xE000) { - // last char was a lead - if (!leadSurrogate) { - // no lead yet - if (codePoint > 0xDBFF) { - // unexpected trail - if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD) - continue - } else if (i + 1 === length) { - // unpaired lead - if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD) - continue - } - - // valid lead - leadSurrogate = codePoint - - continue - } - - // 2 leads in a row - if (codePoint < 0xDC00) { - if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD) - leadSurrogate = codePoint - continue - } - - // valid surrogate pair - codePoint = (leadSurrogate - 0xD800 << 10 | codePoint - 0xDC00) + 0x10000 - } else if (leadSurrogate) { - // valid bmp char, but last char was a lead - if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD) - } - - leadSurrogate = null - - // encode utf8 - if (codePoint < 0x80) { - if ((units -= 1) < 0) break - bytes.push(codePoint) - } else if (codePoint < 0x800) { - if ((units -= 2) < 0) break - bytes.push( - codePoint >> 0x6 | 0xC0, - codePoint & 0x3F | 0x80 - ) - } else if (codePoint < 0x10000) { - if ((units -= 3) < 0) break - bytes.push( - codePoint >> 0xC | 0xE0, - codePoint >> 0x6 & 0x3F | 0x80, - codePoint & 0x3F | 0x80 - ) - } else if (codePoint < 0x110000) { - if ((units -= 4) < 0) break - bytes.push( - codePoint >> 0x12 | 0xF0, - codePoint >> 0xC & 0x3F | 0x80, - codePoint >> 0x6 & 0x3F | 0x80, - codePoint & 0x3F | 0x80 - ) - } else { - throw new Error('Invalid code point') - } - } - - return bytes -} - function base64ToBytes (str) { return base64.toByteArray(base64clean(str)) } +function writeInvalid (buf, pos) { + // U+FFFD (Replacement Character) + buf[pos++] = 0xef + buf[pos++] = 0xbf + buf[pos++] = 0xbd + return pos +} + function blitBuffer (src, dst, offset, length) { let i for (i = 0; i < length; ++i) { From 61582b4e825bc9fb1879f1b7d26c0a69356d7180 Mon Sep 17 00:00:00 2001 From: Christopher Jeffrey Date: Mon, 5 Feb 2024 07:11:05 -0500 Subject: [PATCH 6/6] Use atob/btoa for base64, remove base64-js dependency --- index.js | 146 +++++++++++++++++++++++++++++++++++++++---------- package.json | 1 - test/base64.js | 47 ++++++++++++++++ 3 files changed, 163 insertions(+), 31 deletions(-) diff --git a/index.js b/index.js index 4c20d06..11d5885 100644 --- a/index.js +++ b/index.js @@ -8,7 +8,6 @@ 'use strict' -const base64 = require('base64-js') const ieee754 = require('ieee754') const customInspectSymbol = (typeof Symbol === 'function' && typeof Symbol['for'] === 'function') // eslint-disable-line dot-notation @@ -38,10 +37,54 @@ exports.constants = { MAX_STRING_LENGTH: K_STRING_MAX_LENGTH } -exports.Blob = global.Blob -exports.File = global.File -exports.atob = global.atob -exports.btoa = global.btoa +exports.Blob = typeof Blob !== 'undefined' ? Blob : undefined +exports.File = typeof File !== 'undefined' ? File : undefined +exports.atob = typeof atob !== 'undefined' ? atob : undefined +exports.btoa = typeof btoa !== 'undefined' ? btoa : undefined + +/** + * The `atob` and `btoa` functions are unoptimized in node.js[1][2]. + * As a result of this, we call out to Buffer directly when running + * inside of node.js. Unfortunately, detecting node.js is tricky: + * + * We can't check `process.browser` because it will cause browserify + * to pull in the entire `process` module. + * + * Instead, we check for a global `Buffer` object with `asciiSlice` + * defined on the prototype. This undocumented method has been + * defined on the node.js Buffer prototype since the _very_ early + * days of node.js (as early as 0.4.0) and is still defined to this + * day (but is not defined on _our_ Buffer prototype). + * + * Because our `Buffer` constructor is hoisted, we can't check for + * `typeof Buffer === 'function'`. Instead, we need to access `global`. + * + * Unfortunately, we can't assume `global` exists as there may be a + * non-browserify bundler which supports CJS but not a full node.js + * environment which includes `global`. + * + * As an added bonus, this hack also accounts for nodes prior to + * v16.0.0 (when `atob` and `btoa` were first exposed globally). + * + * [1] https://github.com/feross/buffer/issues/339 + * [2] https://github.com/nodejs/node/pull/38433 + */ +let _atob = exports.atob +let _btoa = exports.btoa + +if (typeof global !== 'undefined' && global && global.Math === Math && + typeof global.Buffer === 'function' && global.Buffer.prototype && + typeof global.Buffer.prototype.asciiSlice === 'function') { + const NodeBuffer = global.Buffer + + _atob = function atob (str) { + return NodeBuffer.from(str, 'base64').toString('binary') + } + + _btoa = function btoa (str) { + return NodeBuffer.from(str, 'binary').toString('base64') + } +} /** * If `Buffer.TYPED_ARRAY_SUPPORT`: @@ -398,6 +441,7 @@ Buffer.isEncoding = function isEncoding (encoding) { case 'latin1': case 'binary': case 'base64': + case 'base64url': case 'ucs2': case 'ucs-2': case 'utf16le': @@ -489,7 +533,8 @@ function byteLength (string, encoding) { case 'hex': return len >>> 1 case 'base64': - return base64ToBytes(string).length + case 'base64url': + return base64ByteLength(string, len) default: if (loweredCase) { return mustMatch ? -1 : utf8ByteLength(string) // assume utf8 @@ -557,6 +602,9 @@ function slowToString (encoding, start, end) { case 'base64': return base64Slice(this, start, end) + case 'base64url': + return base64UrlSlice(this, start, end) + case 'ucs2': case 'ucs-2': case 'utf16le': @@ -1020,7 +1068,14 @@ function asciiWrite (buf, string, offset, length) { } function base64Write (buf, string, offset, length) { - return blitBuffer(base64ToBytes(string), buf, offset, length) + try { + // Parse optimistically as base64. + string = _atob(string) + } catch (e) { + // Fall back to full preprocessing. + string = _atob(base64clean(string)) + } + return asciiWrite(buf, string, offset, length) } function ucs2Write (buf, string, offset, length) { @@ -1096,6 +1151,7 @@ Buffer.prototype.write = function write (string, offset, length, encoding) { return asciiWrite(this, string, offset, length) case 'base64': + case 'base64url': // Warning: maxLength not taken into account in base64Write return base64Write(this, string, offset, length) @@ -1121,11 +1177,11 @@ Buffer.prototype.toJSON = function toJSON () { } function base64Slice (buf, start, end) { - if (start === 0 && end === buf.length) { - return base64.fromByteArray(buf) - } else { - return base64.fromByteArray(buf.slice(start, end)) - } + return _btoa(latin1Slice(buf, start, end)) +} + +function base64UrlSlice (buf, start, end) { + return base64convert(base64Slice(buf, start, end)) } function utf8Slice (buf, start, end) { @@ -2109,23 +2165,62 @@ function boundsError (value, length, type) { // ================ const INVALID_BASE64_RE = /[^+/0-9A-Za-z-_]/g +const BASE64URL_62 = /-/g +const BASE64URL_63 = /_/g function base64clean (str) { - // Node takes equal signs as end of the Base64 encoding - str = str.split('=')[0] - // Node strips out invalid characters like \n and \t from the string, base64-js does not - str = str.trim().replace(INVALID_BASE64_RE, '') + // Node takes equal signs as end of the encoding + const index = str.indexOf('=') + + if (index >= 0) { + str = str.slice(0, index) + } + + // Node strips out invalid characters, atob does not + str = str.replace(INVALID_BASE64_RE, '') + // Node converts strings with length < 2 to '' if (str.length < 2) return '' - // Node allows for non-padded base64 strings (missing trailing ===), base64-js does not - while (str.length % 4 !== 0) { - str = str + '=' + + // Node handles base64-url, atob does not + str = str.replace(BASE64URL_62, '+') + str = str.replace(BASE64URL_63, '/') + + // Node allows for non-padded strings, atob _may_ not + while (str.length & 3) { + str += '=' + } + + return str +} + +const BASE64_62 = /\+/g +const BASE64_63 = /\//g + +function base64convert (str) { + // Convert base64 to base64-url. + let len = str.length + + if (len > 0 && str[len - 1] === '=') len-- + if (len > 0 && str[len - 1] === '=') len-- + + if (len !== str.length) { + str = str.slice(0, len) } + + str = str.replace(BASE64_62, '-') + str = str.replace(BASE64_63, '_') + return str } -function base64ToBytes (str) { - return base64.toByteArray(base64clean(str)) +function base64ByteLength (str, bytes) { + // Handle padding + if (bytes > 0 && str.charCodeAt(bytes - 1) === 0x3d) bytes-- + if (bytes > 1 && str.charCodeAt(bytes - 1) === 0x3d) bytes-- + + // Base64 ratio: 3/4 + return (bytes * 3) >>> 2 } function writeInvalid (buf, pos) { @@ -2136,15 +2231,6 @@ function writeInvalid (buf, pos) { return pos } -function blitBuffer (src, dst, offset, length) { - let i - for (i = 0; i < length; ++i) { - if ((i + offset >= dst.length) || (i >= src.length)) break - dst[i + offset] = src[i] - } - return i -} - // ArrayBuffer or Uint8Array objects from other contexts (i.e. iframes) do not pass // the `instanceof` check but they should be treated as of that type. // See: https://github.com/feross/buffer/issues/166 diff --git a/package.json b/package.json index 5ee6b8d..93b695e 100644 --- a/package.json +++ b/package.json @@ -16,7 +16,6 @@ "James Halliday " ], "dependencies": { - "base64-js": "^1.3.1", "ieee754": "^1.2.1" }, "devDependencies": { diff --git a/test/base64.js b/test/base64.js index 977225b..ff3f89c 100644 --- a/test/base64.js +++ b/test/base64.js @@ -53,3 +53,50 @@ test('base64: high byte', function (t) { ) t.end() }) + +test('base64: rfc test vectors', function (t) { + // https://tools.ietf.org/html/rfc4648#section-10 + const vectors = [ + ['', ''], + ['66', 'Zg=='], + ['666f', 'Zm8='], + ['666f6f', 'Zm9v'], + ['666f6f62', 'Zm9vYg=='], + ['666f6f6261', 'Zm9vYmE='], + ['666f6f626172', 'Zm9vYmFy'], + ['53e9363b2962fcaf', 'U+k2Oyli/K8='] + ] + + for (const [base16, base64] of vectors) { + const buf16 = B.from(base16, 'hex') + const buf64 = B.from(base64, 'base64') + + t.equal(buf16.toString('base64'), base64) + t.equal(buf64.toString('hex'), base16) + } + + t.end() +}) + +test('base64url: rfc test vectors', function (t) { + const vectors = [ + ['', ''], + ['66', 'Zg'], + ['666f', 'Zm8'], + ['666f6f', 'Zm9v'], + ['666f6f62', 'Zm9vYg'], + ['666f6f6261', 'Zm9vYmE'], + ['666f6f626172', 'Zm9vYmFy'], + ['53e9363b2962fcaf', 'U-k2Oyli_K8'] + ] + + for (const [base16, base64] of vectors) { + const buf16 = B.from(base16, 'hex') + const buf64 = B.from(base64, 'base64url') + + t.equal(buf16.toString('base64url'), base64) + t.equal(buf64.toString('hex'), base16) + } + + t.end() +})