// The reader is now definitely exhausted for the purpose it was created. To + // avoid odd effects, or accidental misuse we nill it out. And we do so regardless of error. + + rr.Reader = nil // The caller has no use for this + + // If we read less. truncate the buffer + if read < int64(len(data)) { + data = data[0:read] + } + return rr, data, nil +} diff --git a/mmrblobs/doc.go b/mmrblobs/doc.go new file mode 100644 index 0000000..861138e --- /dev/null +++ b/mmrblobs/doc.go @@ -0,0 +1,7 @@ +package mmrblobs + +/* + +Module for reading mmr blob (massif) from blob storage. + +*/ diff --git a/mmrblobs/errors.go b/mmrblobs/errors.go new file mode 100644 index 0000000..ab2c0aa --- /dev/null +++ b/mmrblobs/errors.go @@ -0,0 +1,7 @@ +package mmrblobs + +import "errors" + +var ( + ErrNotleaf = errors.New("mmr node not a leaf") +) diff --git a/mmrblobs/indexentry.go b/mmrblobs/indexentry.go new file mode 100644 index 0000000..c7dde8c --- /dev/null +++ b/mmrblobs/indexentry.go @@ -0,0 +1,72 @@ +package mmrblobs + +import ( + "encoding/binary" + "errors" + + "github.com/google/uuid" +) + +const ( + IndexEntryBytes = 32 * 2 + KeyBitSizeLogBase2 = 8 + KeyByteSizeLogBase2 = 5 + + EventIDFirst = 0 + EventIDEnd = EventIDFirst + 16 + SnowflakeIdFirst = 24 + SnowflakeIdEnd = SnowflakeIdFirst + 8 + AssetIDFirst = SnowflakeIdEnd + AssetIDEnd = AssetIDFirst + 16 +) + +var ( + ErrIndexEntryBadSize = errors.New("log index size invalid") +) + +// EmptyIndexEntry is a convenience method for unit tests that don't require a valid index entry +func EmptyIndexEntry() []byte { + return make([]byte, IndexEntryBytes) +} + +func SetIndexSnowflakeID( + data []byte, offset uint64, + snowflakeId uint64, +) { + binary.BigEndian.PutUint64(data[offset+SnowflakeIdFirst:offset+SnowflakeIdEnd], snowflakeId) +} + +func GetIndexSnowflakeID( + data []byte, offset uint64, +) uint64 { + return binary.BigEndian.Uint64(data[offset+SnowflakeIdFirst : offset+SnowflakeIdEnd]) +} + +// NewIndexEntry creates an index entry directly from the required components +func NewIndexEntry( + assetId uuid.UUID, eventId uuid.UUID, snowflakeId uint64, +) []byte { + index := [IndexEntryBytes]byte{} + + SetIndexEntry(index[:], 0, assetId, eventId, snowflakeId) + return index[:] +} + +// SetIndexEntry populates the mmr blob index entry at the provided data offset +// +// | 0 - 127 | 128 - 185| 184 - 191 | 192 - 255 | +// | event uuid| reserved | reserved (epoch) | snowflakeid| +// | 0 - 15 | 16 - 22| 23 | 24 - 31| +// | 16 | 7 | 1 | 8 | +// | asset uuid| reserved | +// | 256 - 384| 384 - - 512 | +// | 16 | 16 | +func SetIndexEntry( + data []byte, offset uint64, + assetId uuid.UUID, eventId uuid.UUID, snowflakeId uint64, +) { + copy(data[offset+EventIDFirst:offset+EventIDEnd], eventId[:]) + copy(data[offset+AssetIDFirst:offset+AssetIDEnd], assetId[:]) + + binary.BigEndian.PutUint64(data[offset+SnowflakeIdFirst:offset+SnowflakeIdEnd], snowflakeId) +} diff --git a/mmrblobs/indexheader.go b/mmrblobs/indexheader.go new file mode 100644 index 0000000..f99a36e --- /dev/null +++ b/mmrblobs/indexheader.go @@ -0,0 +1,29 @@ +package mmrblobs + +const ( +// . | 0 | | 21 - 22 | 23 26|27 27| 28 - 31 | + +) + +// IndexHeader exists to keep track of the number of leaves represented by the +// mmr data. +// +// Background: +// +// By keeping the index and the log together, we guarantee mutual consistency - +// provided the log and the idex values are correctly calculated, a single write +// commits the change back to the blob store. +// +// Because the data is combined, we can't use file size as a proxy for the +// membership count. +// +// Regardless of whether we pre-allocate the index data or whether we accumulate +// it as we do the mmr, we need to know how many leaves are in the index. An algorithm to derive a leaf index form an MMR position exists, it is sub linear but a bit fiddly to get right. Each is both 32 bytes. No range checks are performed, out of range will panic Given the blob itself and only the 'tail nodes' from the preceding blob, it is possible to generate proofs without knowlege of any further blobs. Masif Root Index identifies the massif root If Creating is true and Found > 0, this is the Start header of the *previous* massif As we link the massif blobs by including the root of the previous massif as the value for the first massif entry, we can return it directly. Eg in fhe following, the left child of position 15 is the root of massif 0 at position 7, and similarly, the left child of the root of massif 2 will be position 15. As Get works in indices, that will be indices 14 and 6. And we initialise nextAncestor in AddLeafHash to the top of the stack got != tt.want { + t.Errorf("TreeSize() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestMassifLastLeafIndex(t *testing.T) { + type args struct { + firstIndex uint64 + height uint8 + } + tests := []struct { + name string + args args + want uint64 + }{ + {"m0, height 2", args{0, 2}, 1}, + {"m1, height 2", args{3, 2}, 4}, + {"m2, height 2", args{7, 2}, 8}, + + {"m0, height 3", args{0, 3}, 4}, + {"m1, height 3", args{7, 3}, 11}, + {"m2, height 3", args{15, 3}, 19}, + + {"m0, height 4", args{0, 4}, 11}, + {"m1, height 4", args{15, 4}, 26}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := RangeLastLeafIndex(tt.args.firstIndex, tt.args.height); got != tt.want { + t.Errorf("MassifLastLeafIndex() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestMassifRootIndex(t *testing.T) { + type args struct { + firstIndex uint64 + height uint8 + } + tests := []struct { + name string + args args + want uint64 + }{ + {"m0, height 2", args{0, 2}, 3 - 1}, + {"m1, height 2", args{3, 2}, 6 - 1}, + {"m2, height 2", args{7, 2}, 10 - 1}, + + {"m0, height 3", args{0, 3}, 7 - 1}, + {"m1, height 3", args{7, 3}, 14 - 1}, + {"m2, height 3", args{15, 3}, 22 - 1}, + + {"m0, height 4", args{0, 4}, 15 - 1}, + {"m1, height 4", args{15, 4}, 30 - 1}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := RangeRootIndex(tt.args.firstIndex, tt.args.height); got != tt.want { + t.Errorf("MassifRootIndex() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/mmrblobs/massifstart.go b/mmrblobs/massifstart.go new file mode 100644 index 0000000..c1a4d12 --- /dev/null +++ b/mmrblobs/massifstart.go @@ -0,0 +1,187 @@ +package mmrblobs + +// Massif blobs are strictly sized as multiples of 32 bytes in order to +// facilitate simple content independent arithmetic operations over the whole +// MMR. +// +// Knowing only the relative resource name of the blob (which includes its +// epoch), and the size of the blob all information necessary to place it in the +// overall MMR can be derived computationaly (and efficiently) +// +// The massifstart is a 32 byte field encoding the small amount of book keeping +// required in a blob to allow for efficient correctness checks. This field is followed by the root hashes from preceding blobs that will be necessary to complete the blob. These are maintained in a stack. Neither the stack length nor a mapping of the positions it contains are stored, all of this information is recovered computationaly computed based on the blobs possition in the MMR The value is always considered as a big endian large integer. Lexical ordering is defined only for padded hex representations of the key value. The reserved zero bytes can be used in later versions. Because if we shift the version field left, even without incrementing it, the resulting key is numerically larger than all of those for previous versions This last is an iterative call but it is sub linear. Essentially its O(tree height) (not massif height ofc) got != tt.want { + t.Errorf("MassifFirstLeaf() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestMassifStartKeyRoundTrip(t *testing.T) { + type args struct { + version uint16 + epoch uint32 + massifHeight uint8 + massifIndex uint32 + firstIndex uint64 + } + tests := []struct { + name string + args args + }{ + {"a", args{1, 2, 2, 2, 7}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + encoded := EncodeMassifStart(tt.args.version, tt.args.epoch, tt.args.massifHeight, tt.args.massifIndex) + encoded = append(encoded, make([]byte, 32)...) + got := MassifStart{} + err := got.UnmarshalBinary(encoded) + assert.Nil(t, err) + assert.Equal(t, got.Version, tt.args.version) + assert.Equal(t, got.Epoch, tt.args.epoch) + assert.Equal(t, got.MassifHeight, tt.args.massifHeight) + assert.Equal(t, got.MassifIndex, tt.args.massifIndex) + assert.Equal(t, got.FirstIndex, tt.args.firstIndex) + }) + } +} diff --git a/mmrblobs/masssifreader.go b/mmrblobs/masssifreader.go new file mode 100644 index 0000000..91e195c --- /dev/null +++ b/mmrblobs/masssifreader.go @@ -0,0 +1,117 @@ +package mmrblobs + +import ( + "context" + "time" + + "github.com/datatrails/go-datatrails-common/azblob" + "github.com/datatrails/go-datatrails-common/logger" + "github.com/datatrails/go-datatrails-merklelog/mmr" +) + +type massifReader interface { + Reader( + ctx context.Context, + identity string, + opts ...azblob.Option, + ) (*azblob.ReaderResponse, error) + + List(ctx context.Context, opts ...azblob.Option) (*azblob.ListerResponse, error) +} + +type MassifReader struct { + log logger.Logger + store massifReader +} + +func NewMassifReader(log logger.Logger, store massifReader) *MassifReader { + r := &MassifReader{ + log: log, + store: store, + } + return r +} + +func (mr *MassifReader) GetMassif( + ctx context.Context, tenantIdentity string, massifIndex uint64, + opts ...azblob.Option, +) (MassifContext, error) { + + var err error + var rr *azblob.ReaderResponse + mc := MassifContext{ + TenantIdentity: tenantIdentity, + BlobPath: TenantMassifBlobPath(tenantIdentity, massifIndex), + } + + rr, mc.Data, err = BlobRead(ctx, mc.BlobPath, mr.store, opts...) + if err != nil { + return MassifContext{}, err + } + mc.Tags = rr.Tags + mc.ETag = *rr.ETag + mc.LastRead = time.Now() + mc.LastModfified = *rr.LastModified + + err = mc.Start.UnmarshalBinary(mc.Data) + if err != nil { + return MassifContext{}, err + } + return mc, nil +} + +// MassifIndexFromLeafIndex gets the massif index of the massif that the given leaf is stored in, +// +// given the leaf index of the leaf. +// +// This is found with the given massif height, which is constant for all massifs. +func MassifIndexFromLeafIndex(massifHeight uint8, leafIndex uint64) uint64 { + + // first find how many leaf nodes each massif can hold. + // + // Note: massifHeight starts at index 1, whereas height index for HeighIndexLeafCount starts at 0. + massifMaxLeaves := mmr.HeightIndexLeafCount(uint64(massifHeight) - 1) + + // now find the massif. + // + // for context, see: https://github.com/datatrails/epic-8120-scalable-proof-mechanisms/blob/main/mmr/forestrie-mmrblobs.md#blob-size + // + // Note: massif indexes start at 0. + // Note: leaf indexes starts at 0. + // + // Therefore, given a massif height of 2, that has max leaves of 4; if a leaf index of 3 is given, then it is in massif 0, along with leaves, 0, 1 and 2. Where the leaf nodes start on height 1. Derived from: https://github.com/datatrails/epic-8120-scalable-proof-mechanisms/blob/main/mmr/forestrie-mmrblobs.md#blob-size Derived from: https://github.com/datatrails/epic-8120-scalable-proof-mechanisms/blob/main/mmr/forestrie-mmrblobs.md#blob-size