From e58781feb7c9a956b66f1109fc639bdb86326293 Mon Sep 17 00:00:00 2001 From: twoeths Date: Sat, 9 Nov 2024 10:14:17 +0800 Subject: [PATCH] feat: implement merkleizeBlockArray (#421) * feat: implement merkleizeBlockArray * chore: add comments * chore: more docs for doMerkleizeBlockArray() --- .../src/hasher/as-sha256.ts | 15 +- .../src/hasher/hashtree.ts | 9 +- .../src/hasher/index.ts | 19 ++- .../src/hasher/noble.ts | 18 +- .../src/hasher/types.ts | 19 ++- .../persistent-merkle-tree/src/hasher/util.ts | 157 +++++++++++++++--- .../test/unit/hasher.test.ts | 61 ++++++- 7 files changed, 261 insertions(+), 37 deletions(-) diff --git a/packages/persistent-merkle-tree/src/hasher/as-sha256.ts b/packages/persistent-merkle-tree/src/hasher/as-sha256.ts index 943f9b49..3328827d 100644 --- a/packages/persistent-merkle-tree/src/hasher/as-sha256.ts +++ b/packages/persistent-merkle-tree/src/hasher/as-sha256.ts @@ -8,14 +8,23 @@ import { import type {Hasher} from "./types"; import {Node} from "../node"; import type {HashComputationLevel} from "../hashComputation"; -import {doDigestNLevel, doMerkleizeInto} from "./util"; +import {BLOCK_SIZE, doDigestNLevel, doMerkleizeBlockArray, doMerkleizeBlocksBytes} from "./util"; + +/** + * hashInto() function of as-sha256 loop through every 256 bytes + * This is the same to hashInto() function of as-sha256 https://github.com/ChainSafe/ssz/blob/cf3e1f038c8bf7cba1bb27c38540e50b0391d0e6/packages/as-sha256/src/index.ts#L270 + */ +const buffer = new Uint8Array(4 * BLOCK_SIZE); export const hasher: Hasher = { name: "as-sha256", digest64: digest2Bytes32, digest64HashObjects: digest64HashObjectsInto, - merkleizeInto(data: Uint8Array, padFor: number, output: Uint8Array, offset: number): void { - return doMerkleizeInto(data, padFor, output, offset, hashInto); + merkleizeBlocksBytes(blocksBytes: Uint8Array, padFor: number, output: Uint8Array, offset: number): void { + return doMerkleizeBlocksBytes(blocksBytes, padFor, output, offset, hashInto); + }, + merkleizeBlockArray(blocks, blockLimit, padFor, output, offset) { + return doMerkleizeBlockArray(blocks, blockLimit, padFor, output, offset, hashInto, buffer); }, digestNLevel(data: Uint8Array, nLevel: number): Uint8Array { return doDigestNLevel(data, nLevel, hashInto); diff --git a/packages/persistent-merkle-tree/src/hasher/hashtree.ts b/packages/persistent-merkle-tree/src/hasher/hashtree.ts index f578d50c..9d04eb07 100644 --- a/packages/persistent-merkle-tree/src/hasher/hashtree.ts +++ b/packages/persistent-merkle-tree/src/hasher/hashtree.ts @@ -3,7 +3,7 @@ import {Hasher, HashObject} from "./types"; import {Node} from "../node"; import type {HashComputationLevel} from "../hashComputation"; import {byteArrayIntoHashObject} from "@chainsafe/as-sha256/lib/hashObject"; -import {doDigestNLevel, doMerkleizeInto} from "./util"; +import {doDigestNLevel, doMerkleizeBlockArray, doMerkleizeBlocksBytes} from "./util"; /** * Best SIMD implementation is in 512 bits = 64 bytes @@ -40,8 +40,11 @@ export const hasher: Hasher = { hashInto(hash64Input, hash64Output); byteArrayIntoHashObject(hash64Output, 0, parent); }, - merkleizeInto(data: Uint8Array, padFor: number, output: Uint8Array, offset: number): void { - return doMerkleizeInto(data, padFor, output, offset, hashInto); + merkleizeBlocksBytes(blocksBytes: Uint8Array, padFor: number, output: Uint8Array, offset: number): void { + return doMerkleizeBlocksBytes(blocksBytes, padFor, output, offset, hashInto); + }, + merkleizeBlockArray(blocks, blockLimit, padFor, output, offset) { + return doMerkleizeBlockArray(blocks, blockLimit, padFor, output, offset, hashInto, uint8Input); }, digestNLevel(data: Uint8Array, nLevel: number): Uint8Array { return doDigestNLevel(data, nLevel, hashInto); diff --git a/packages/persistent-merkle-tree/src/hasher/index.ts b/packages/persistent-merkle-tree/src/hasher/index.ts index 75442232..414ee703 100644 --- a/packages/persistent-merkle-tree/src/hasher/index.ts +++ b/packages/persistent-merkle-tree/src/hasher/index.ts @@ -27,8 +27,23 @@ export function digestNLevel(data: Uint8Array, nLevel: number): Uint8Array { return hasher.digestNLevel(data, nLevel); } -export function merkleizeInto(data: Uint8Array, padFor: number, output: Uint8Array, offset: number): void { - hasher.merkleizeInto(data, padFor, output, offset); +export function merkleizeBlocksBytes( + blocksBytes: Uint8Array, + padFor: number, + output: Uint8Array, + offset: number +): void { + hasher.merkleizeBlocksBytes(blocksBytes, padFor, output, offset); +} + +export function merkleizeBlockArray( + blocks: Uint8Array[], + blockLimit: number, + padFor: number, + output: Uint8Array, + offset: number +): void { + hasher.merkleizeBlockArray(blocks, blockLimit, padFor, output, offset); } export function executeHashComputations(hashComputations: HashComputationLevel[]): void { diff --git a/packages/persistent-merkle-tree/src/hasher/noble.ts b/packages/persistent-merkle-tree/src/hasher/noble.ts index eb1224b6..e4a98bc4 100644 --- a/packages/persistent-merkle-tree/src/hasher/noble.ts +++ b/packages/persistent-merkle-tree/src/hasher/noble.ts @@ -1,7 +1,13 @@ import {sha256} from "@noble/hashes/sha256"; import {digest64HashObjects, byteArrayIntoHashObject} from "@chainsafe/as-sha256"; import type {Hasher} from "./types"; -import {doDigestNLevel, doMerkleizeInto, hashObjectToUint8Array} from "./util"; +import { + BLOCK_SIZE, + doDigestNLevel, + doMerkleizeBlockArray, + doMerkleizeBlocksBytes, + hashObjectToUint8Array, +} from "./util"; const digest64 = (a: Uint8Array, b: Uint8Array): Uint8Array => sha256.create().update(a).update(b).digest(); const hashInto = (input: Uint8Array, output: Uint8Array): void => { @@ -22,14 +28,20 @@ const hashInto = (input: Uint8Array, output: Uint8Array): void => { } }; +/** should be multiple of 64, make it the same to as-sha256 */ +const buffer = new Uint8Array(4 * BLOCK_SIZE); + export const hasher: Hasher = { name: "noble", digest64, digest64HashObjects: (left, right, parent) => { byteArrayIntoHashObject(digest64(hashObjectToUint8Array(left), hashObjectToUint8Array(right)), 0, parent); }, - merkleizeInto(data: Uint8Array, padFor: number, output: Uint8Array, offset: number): void { - return doMerkleizeInto(data, padFor, output, offset, hashInto); + merkleizeBlocksBytes(blocksBytes: Uint8Array, padFor: number, output: Uint8Array, offset: number): void { + return doMerkleizeBlocksBytes(blocksBytes, padFor, output, offset, hashInto); + }, + merkleizeBlockArray(blocks, blockLimit, padFor, output, offset) { + return doMerkleizeBlockArray(blocks, blockLimit, padFor, output, offset, hashInto, buffer); }, digestNLevel(data: Uint8Array, nLevel: number): Uint8Array { return doDigestNLevel(data, nLevel, hashInto); diff --git a/packages/persistent-merkle-tree/src/hasher/types.ts b/packages/persistent-merkle-tree/src/hasher/types.ts index 9f5813f0..e9075e6a 100644 --- a/packages/persistent-merkle-tree/src/hasher/types.ts +++ b/packages/persistent-merkle-tree/src/hasher/types.ts @@ -15,11 +15,24 @@ export type Hasher = { */ digest64HashObjects(left: HashObject, right: HashObject, parent: HashObject): void; /** - * Merkleize n chunk of data, 32 bytes each + * Merkleize n SHA256 blocks in a single Uint8Array, each block is 64 bytes * padFor is maxChunkCount, use it to compute layers to hash - * data is mutated after the function + * blocksBytes is mutated after the function */ - merkleizeInto(data: Uint8Array, padFor: number, output: Uint8Array, offset: number): void; + merkleizeBlocksBytes(blocksBytes: Uint8Array, padFor: number, output: Uint8Array, offset: number): void; + /** + * Merkleize n SHA256 blocks, each is 64 bytes Uint8Array + * blockLimit is the number of blocks to hash, should be <= blocks.length + * padFor is maxChunkCount, use it to compute layers to hash + * blocks are mutated after the function + */ + merkleizeBlockArray( + blocks: Uint8Array[], + blockLimit: number, + padFor: number, + output: Uint8Array, + offset: number + ): void; /** * Hash multiple chunks (1 chunk = 32 bytes) at multiple levels * With nLevel = 3, hash multiple of 256 bytes, return multiple of 32 bytes. diff --git a/packages/persistent-merkle-tree/src/hasher/util.ts b/packages/persistent-merkle-tree/src/hasher/util.ts index a028253c..365307eb 100644 --- a/packages/persistent-merkle-tree/src/hasher/util.ts +++ b/packages/persistent-merkle-tree/src/hasher/util.ts @@ -13,14 +13,18 @@ export function uint8ArrayToHashObject(byteArr: Uint8Array): HashObject { type HashIntoFn = (input: Uint8Array, output: Uint8Array) => void; +/** a SHA256 block is 64 bytes */ +export const BLOCK_SIZE = 64; + /** - * Input data is unsafe because it's modified - * If its chunk count is not even, need to be appended with zero hash at layer 0 so that we don't need - * a new memory allocation here (even through we don't need it if padFor = 1) - * The Uint8Array(32) will be written to output at offset + * Merkleize multiple SHA256 blocks in a single Uint8Array into ${output} at ${offset} + * - if padFor > 1 blocksBytes need to be multiple of 64 bytes. + * - if padFor = 1, blocksBytes need to be at least 32 bytes + * - if padFor = 0, throw error + * blocksBytes is unsafe because it's modified */ -export function doMerkleizeInto( - data: Uint8Array, +export function doMerkleizeBlocksBytes( + blocksBytes: Uint8Array, padFor: number, output: Uint8Array, offset: number, @@ -31,33 +35,35 @@ export function doMerkleizeInto( } const layerCount = Math.ceil(Math.log2(padFor)); - if (data.length === 0) { + if (blocksBytes.length === 0) { output.set(zeroHash(layerCount), offset); return; } - if (data.length % 32 !== 0) { - throw new Error(`Invalid input length, expect to be multiple of 32 bytes, got ${data.length}`); + if (blocksBytes.length % 32 !== 0) { + throw new Error(`Invalid input length, expect to be multiple of 32 bytes, got ${blocksBytes.length}`); } // if padFor = 1, only need 32 bytes - if (padFor > 1 && data.length % 64 !== 0) { - throw new Error(`Invalid input length, expect to be multiple of 64 bytes, got ${data.length}, padFor=${padFor}`); + if (padFor > 1 && blocksBytes.length % BLOCK_SIZE !== 0) { + throw new Error( + `Invalid input length, expect to be multiple of 64 bytes, got ${blocksBytes.length}, padFor=${padFor}` + ); } - let inputLength = data.length; + let inputLength = blocksBytes.length; let outputLength = Math.floor(inputLength / 2); - let bufferIn = data; - // hash into the same buffer - for (let i = 0; i < layerCount; i++) { - const bufferOut = data.subarray(0, outputLength); + let bufferIn = blocksBytes; + // hash into the same buffer to save memory allocation + for (let layer = 0; layer < layerCount; layer++) { + const bufferOut = blocksBytes.subarray(0, outputLength); hashInto(bufferIn, bufferOut); const chunkCount = Math.floor(outputLength / 32); - if (chunkCount % 2 === 1 && i < layerCount - 1) { + if (chunkCount % 2 === 1 && layer < layerCount - 1) { // extend to 1 more chunk inputLength = outputLength + 32; - bufferIn = data.subarray(0, inputLength); - bufferIn.set(zeroHash(i + 1), outputLength); + bufferIn = blocksBytes.subarray(0, inputLength); + bufferIn.set(zeroHash(layer + 1), outputLength); } else { bufferIn = bufferOut; inputLength = outputLength; @@ -68,6 +74,119 @@ export function doMerkleizeInto( output.set(bufferIn.subarray(0, 32), offset); } +/** + * Merkleize multiple SHA256 blocks into ${output} at ${offset} + * @param blockLimit number of blocks, should be <= blocks.length so that consumer can reuse memory + * @param padFor is maxChunkCount, should be >= 2 + * @param blocks is unsafe because it's modified + * @param output the result is stored here + * @param offset the offset to store the result + * @param hashInto the hash function of each hasher + * @param buffer is a temporary buffer of each hasher to work with the hashInto() function + */ +export function doMerkleizeBlockArray( + blocks: Uint8Array[], + blockLimit: number, + padFor: number, + output: Uint8Array, + offset: number, + hashInto: HashIntoFn, + buffer: Uint8Array +): void { + if (padFor < 1) { + throw new Error(`Invalid padFor, expect to be at least 1, got ${padFor}`); + } + + if (blockLimit > blocks.length) { + throw new Error( + `Invalid blockLimit, expect to be less than or equal blocks.length ${blocks.length}, got ${blockLimit}` + ); + } + + const layerCount = Math.ceil(Math.log2(padFor)); + if (blockLimit === 0) { + output.set(zeroHash(layerCount), offset); + return; + } + + for (const block of blocks) { + if (block.length !== BLOCK_SIZE) { + throw new Error(`Invalid block length, expect to be 64 bytes, got ${block.length}`); + } + } + + // as-sha256 has a buffer of 4 * 64 bytes + // hashtree has a buffer of 16 * 64 bytes + if (buffer.length === 0 || buffer.length % (4 * BLOCK_SIZE) !== 0) { + throw new Error(`Invalid buffer length, expect to be multiple of 64 bytes, got ${buffer.length}`); + } + + // batchSize is 4 for as-sha256, 16 for hashtree + const batchSize = Math.floor(buffer.length / BLOCK_SIZE); + const halfBatchSize = Math.floor(batchSize / 2); + let bufferIn = buffer; + // hash into the same buffer + let bufferOut = buffer.subarray(0, halfBatchSize * BLOCK_SIZE); + // ignore remaining blocks + let blockCount = blockLimit; + // hash into the same blocks to save memory allocation + for (let layer = 0; layer < layerCount; layer++) { + let outBlockIndex = 0; + const sameLayerLoop = Math.floor(blockCount / batchSize); + for (let i = 0; i < sameLayerLoop; i++) { + // populate bufferIn + for (let j = 0; j < batchSize; j++) { + const blockIndex = i * batchSize + j; + bufferIn.set(blocks[blockIndex], j * BLOCK_SIZE); + } + + // hash into bufferOut + hashInto(bufferIn, bufferOut); + + // copy bufferOut to blocks, bufferOut.len = halfBatchSize * BLOCK_SIZE + for (let j = 0; j < halfBatchSize; j++) { + blocks[outBlockIndex].set(bufferOut.subarray(j * BLOCK_SIZE, (j + 1) * BLOCK_SIZE)); + outBlockIndex++; + } + } + + // remaining blocks + const remainingBlocks = blockCount % batchSize; + bufferIn = buffer.subarray(0, remainingBlocks * BLOCK_SIZE); + bufferOut = buffer.subarray(0, Math.floor(bufferIn.length / 2)); + + // populate bufferIn + for (let blockIndex = Math.floor(blockCount / batchSize) * batchSize; blockIndex < blockCount; blockIndex++) { + bufferIn.set(blocks[blockIndex], (blockIndex % batchSize) * BLOCK_SIZE); + } + + // hash into bufferOut + hashInto(bufferIn, bufferOut); + + // copy bufferOut to blocks, note that bufferOut.len may not be divisible by BLOCK_SIZE + for (let j = 0; j < Math.floor(bufferOut.length / BLOCK_SIZE); j++) { + blocks[outBlockIndex].set(bufferOut.subarray(j * BLOCK_SIZE, (j + 1) * BLOCK_SIZE)); + outBlockIndex++; + } + + if (bufferOut.length % BLOCK_SIZE !== 0) { + // set the last 32 bytes of bufferOut + blocks[outBlockIndex].set(bufferOut.subarray(bufferOut.length - 32, bufferOut.length), 0); + // add zeroHash + blocks[outBlockIndex].set(zeroHash(layer + 1), 32); + outBlockIndex++; + } + + // end of layer, update blockCount, bufferIn, bufferOut + blockCount = outBlockIndex; + bufferIn = buffer.subarray(0, blockCount * BLOCK_SIZE); + bufferOut = buffer.subarray(0, Math.floor(bufferIn.length / 2)); + } + + // the end result stays in blocks[0] + output.set(blocks[0].subarray(0, 32), offset); +} + /** * Input data is unsafe because it's modified * given nLevel = 3 diff --git a/packages/persistent-merkle-tree/test/unit/hasher.test.ts b/packages/persistent-merkle-tree/test/unit/hasher.test.ts index ee129fd0..6205bd56 100644 --- a/packages/persistent-merkle-tree/test/unit/hasher.test.ts +++ b/packages/persistent-merkle-tree/test/unit/hasher.test.ts @@ -89,24 +89,77 @@ describe("hasher.digestNLevel", function () { }); -describe("hasher.merkleizeInto", function () { +describe("hasher.merkleizeBlocksBytes", function () { const numNodes = [0, 1, 2, 3, 4, 5, 6, 7, 8]; for (const hasher of [nobleHasher, hashtreeHasher, asSha256Hasher]) { it (`${hasher.name} should throw error if not multiple of 64 bytes`, () => { const data = Buffer.alloc(63, 0); const output = Buffer.alloc(32); - expect(() => hasher.merkleizeInto(data, 2, output, 0)).to.throw("Invalid input length"); + expect(() => hasher.merkleizeBlocksBytes(data, 2, output, 0)).to.throw("Invalid input length"); }); for (const numNode of numNodes) { - it(`${hasher.name}.merkleizeInto for ${numNode} nodes`, () => { + it(`${hasher.name}.merkleizeBlocksBytes for ${numNode} nodes`, () => { const nodes = Array.from({length: numNode}, (_, i) => LeafNode.fromRoot(Buffer.alloc(32, i))); const data = Buffer.concat(nodes.map((node) => node.root)); const output = Buffer.alloc(32); const chunkCount = Math.max(numNode, 1); const padData = numNode % 2 === 1 ? Buffer.concat([data, zeroHash(0)]) : data; - hasher.merkleizeInto(padData, chunkCount, output, 0); + hasher.merkleizeBlocksBytes(padData, chunkCount, output, 0); + const depth = Math.ceil(Math.log2(chunkCount)); + const root = subtreeFillToContents(nodes, depth).root; + expectEqualHex(output, root); + }); + } + } +}); + +/** + * The same to the previous test, but using the merkleizeBlockArray method + */ +describe("hasher.merkleizeBlockArray", function () { + for (const hasher of [nobleHasher, hashtreeHasher, asSha256Hasher]) { + it (`${hasher.name} should throw error if invalid blockLimit`, () => { + const data = Buffer.alloc(64, 0); + const output = Buffer.alloc(32); + expect(() => hasher.merkleizeBlockArray([data], 2, 2, output, 0)).to.throw("Invalid blockLimit, expect to be less than or equal blocks.length 1, got 2"); + }); + + it (`${hasher.name} should throw error if not multiple of 64 bytes`, () => { + const data = Buffer.alloc(63, 0); + const output = Buffer.alloc(32); + expect(() => hasher.merkleizeBlockArray([data], 1, 2, output, 0)).to.throw("Invalid block length, expect to be 64 bytes, got 63"); + }); + + it (`${hasher.name} should throw error if chunkCount < 1`, () => { + const data = Buffer.alloc(64, 0); + const output = Buffer.alloc(32); + const chunkCount = 0; + expect(() => hasher.merkleizeBlockArray([data], 1, chunkCount, output, 0)).to.throw("Invalid padFor, expect to be at least 1, got 0"); + }); + + // hashtree has a buffer of 16 * 64 bytes = 32 nodes + const numNodes = [64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79]; + for (const numNode of numNodes) { + it(`${hasher.name}.merkleizeBlockArray for ${numNode} nodes`, () => { + + const nodes = Array.from({length: numNode}, (_, i) => LeafNode.fromRoot(Buffer.alloc(32, i))); + const data = Buffer.concat(nodes.map((node) => node.root)); + const output = Buffer.alloc(32); + // depth of 79 nodes are 7, make it 10 to test the padding + const chunkCount = Math.max(numNode, 10); + const padData = numNode % 2 === 1 ? Buffer.concat([data, zeroHash(0)]) : data; + expect(padData.length % 64).to.equal(0); + const blocks: Uint8Array[] = []; + for (let i = 0; i < padData.length; i += 64) { + blocks.push(padData.slice(i, i + 64)); + } + const blockLimit = blocks.length; + // should be able to run with above blocks, however add some redundant blocks similar to the consumer + blocks.push(Buffer.alloc(64, 1)); + blocks.push(Buffer.alloc(64, 2)); + hasher.merkleizeBlockArray(blocks, blockLimit, chunkCount, output, 0); const depth = Math.ceil(Math.log2(chunkCount)); const root = subtreeFillToContents(nodes, depth).root; expectEqualHex(output, root);