From 358583c1fdcee9218ca1de9aef42a764e76a05e2 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 3 Feb 2025 22:44:43 -0500 Subject: [PATCH 1/3] fixing various Validate() bugs. --- roaring.go | 3 ++ roaring64/roaringarray64.go | 5 --- roaring_test.go | 62 +++++++++++++++++++++++++++++++++--- roaringarray.go | 5 --- runcontainer.go | 10 +++--- serialization_frozen_test.go | 4 +-- 6 files changed, 65 insertions(+), 24 deletions(-) diff --git a/roaring.go b/roaring.go index c7f00af1..9972a51e 100644 --- a/roaring.go +++ b/roaring.go @@ -2133,6 +2133,9 @@ func (rb *Bitmap) Stats() Statistics { return stats } +// Validate checks if the bitmap is internally consistent. +// You may call it after deserialization to check that the bitmap is valid. +// This function returns an error if the bitmap is invalid, nil otherwise. func (rb *Bitmap) Validate() error { return rb.highlowcontainer.validate() } diff --git a/roaring64/roaringarray64.go b/roaring64/roaringarray64.go index 629ad751..993a208a 100644 --- a/roaring64/roaringarray64.go +++ b/roaring64/roaringarray64.go @@ -14,7 +14,6 @@ type roaringArray64 struct { } var ( - ErrEmptyKeys = errors.New("keys were empty") ErrKeySortOrder = errors.New("keys were out of order") ErrCardinalityConstraint = errors.New("size of arrays was not coherent") ) @@ -437,10 +436,6 @@ func (ra *roaringArray64) checkKeysSorted() bool { // validate checks the referential integrity // ensures len(keys) == len(containers), recurses and checks each container type func (ra *roaringArray64) validate() error { - if len(ra.keys) == 0 { - return ErrEmptyKeys - } - if !ra.checkKeysSorted() { return ErrKeySortOrder } diff --git a/roaring_test.go b/roaring_test.go index 2665b14d..e8aac985 100644 --- a/roaring_test.go +++ b/roaring_test.go @@ -2732,8 +2732,6 @@ func TestFromBitSet(t *testing.T) { func TestRoaringArrayValidation(t *testing.T) { a := newRoaringArray() - assert.ErrorIs(t, a.validate(), ErrEmptyKeys) - a.keys = append(a.keys, uint16(3), uint16(1)) assert.ErrorIs(t, a.validate(), ErrKeySortOrder) a.clear() @@ -2805,11 +2803,9 @@ func TestBitMapValidationFromDeserialization(t *testing.T) { bm.AddRange(100, 110) }, corruptor: func(s []byte) { - // 13 is the length of the run - // Setting to zero causes an invalid run s[13] = 0 }, - err: ErrRunIntervalLength, + err: ErrRunIntervalSize, }, { name: "Creates Interval Overlap", @@ -3310,3 +3306,59 @@ func TestIssue467CaseLarge(t *testing.T) { b.RunOptimize() require.NoError(t, b.Validate()) } + +func TestValidateEmpty(t *testing.T) { + require.NoError(t, New().Validate()) +} + +func TestValidate469(t *testing.T) { + b := New() + b.RemoveRange(0, 180) + b.AddRange(0, 180) + require.NoError(t, b.Validate()) + b.RemoveRange(180, 217) + b.AddRange(180, 217) + require.NoError(t, b.Validate()) + b.RemoveRange(217, 2394) + b.RemoveRange(2394, 2427) + b.AddRange(2394, 2427) + require.NoError(t, b.Validate()) + b.RemoveRange(2427, 2428) + b.AddRange(2427, 2428) + require.NoError(t, b.Validate()) + b.RemoveRange(2428, 3345) + require.NoError(t, b.Validate()) + b.RemoveRange(3345, 3346) + require.NoError(t, b.Validate()) + b.RemoveRange(3346, 3597) + require.NoError(t, b.Validate()) + b.RemoveRange(3597, 3815) + require.NoError(t, b.Validate()) + b.RemoveRange(3815, 3816) + require.NoError(t, b.Validate()) + b.AddRange(3815, 3816) + require.NoError(t, b.Validate()) + b.RemoveRange(3816, 3856) + b.RemoveRange(3856, 4067) + b.RemoveRange(4067, 4069) + b.RemoveRange(4069, 4071) + b.RemoveRange(4071, 4095) + b.RemoveRange(4095, 4096) + require.NoError(t, b.Validate()) + b.RunOptimize() + require.False(t, b.IsEmpty()) + require.NoError(t, b.Validate()) +} + +func TestValidateFromV1(t *testing.T) { + v1 := New() + for i := 0; i <= 2; i++ { + v1.Add(uint32(i)) + } + v1.RunOptimize() + b, err := v1.MarshalBinary() + require.NoError(t, err) + v2 := New() + require.NoError(t, v2.UnmarshalBinary(b)) + require.NoError(t, v2.Validate()) +} diff --git a/roaringarray.go b/roaringarray.go index 552c61ce..40be90a5 100644 --- a/roaringarray.go +++ b/roaringarray.go @@ -90,7 +90,6 @@ const ( ) var ( - ErrEmptyKeys = errors.New("keys were empty") ErrKeySortOrder = errors.New("keys were out of order") ErrCardinalityConstraint = errors.New("size of arrays was not coherent") ) @@ -798,10 +797,6 @@ func (ra *roaringArray) checkKeysSorted() bool { // validate checks the referential integrity // ensures len(keys) == len(containers), recurses and checks each container type func (ra *roaringArray) validate() error { - if len(ra.keys) == 0 { - return ErrEmptyKeys - } - if !ra.checkKeysSorted() { return ErrKeySortOrder } diff --git a/runcontainer.go b/runcontainer.go index d5250a0e..c1a03b69 100644 --- a/runcontainer.go +++ b/runcontainer.go @@ -62,7 +62,6 @@ type interval16 struct { var ( ErrRunIntervalsEmpty = errors.New("run contained no interval") ErrRunNonSorted = errors.New("runs were not sorted") - ErrRunIntervalLength = errors.New("interval had zero length") ErrRunIntervalEqual = errors.New("intervals were equal") ErrRunIntervalOverlap = errors.New("intervals overlapped or were continguous") ErrRunIntervalSize = errors.New("too many intervals relative to data") @@ -2757,10 +2756,9 @@ func (rc *runContainer16) validate() error { intervalsSum := 0 for outeridx := range rc.iv { - - if rc.iv[outeridx].length == 0 { - return ErrRunIntervalLength - } + // The length being stored is the actual length - 1. + // So we need to add 1 to get the actual length. + // It is not possible to have a run with length 0. outerInterval := rc.iv[outeridx] @@ -2799,7 +2797,7 @@ func (rc *runContainer16) validate() error { return ErrRunIntervalSize } } else { - if !(len(rc.iv) < (intervalsSum / 2)) { + if !(2*len(rc.iv) < intervalsSum) { return ErrRunIntervalSize } } diff --git a/serialization_frozen_test.go b/serialization_frozen_test.go index 126c9a9b..20bc0e50 100644 --- a/serialization_frozen_test.go +++ b/serialization_frozen_test.go @@ -171,11 +171,9 @@ func TestBitMapValidationFromFrozen(t *testing.T) { bm.AddRange(100, 110) }, corruptor: func(s []byte) { - // 13 is the length of the run - // Setting to zero causes an invalid run s[2] = 0 }, - err: ErrRunIntervalLength, + err: ErrRunIntervalSize, }, { name: "Creates Interval Overlap", From 2561ba1a63f77cb9dbb9fc4da7d53ce1d8be3e9e Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 3 Feb 2025 23:39:35 -0500 Subject: [PATCH 2/3] more fixes... --- roaring64/bsi64_test.go | 4 ++-- roaring64/roaring64_test.go | 2 -- roaring64/roaringarray64.go | 4 +++- roaring_test.go | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/roaring64/bsi64_test.go b/roaring64/bsi64_test.go index 238b538e..98b4aba0 100644 --- a/roaring64/bsi64_test.go +++ b/roaring64/bsi64_test.go @@ -107,8 +107,8 @@ func TestSetAndGetBigTimestamp(t *testing.T) { // Recover and check the known timestamp bv, _ := bsi.GetBigValue(1) seconds, nanoseconds := bigIntToSecondsAndNanos(bv) - ts := time.Unix(seconds, int64(nanoseconds)) - assert.Equal(t, "3024-10-23T16:55:46.763295273Z", ts.Format(time.RFC3339Nano)) + assert.Equal(t, seconds, int64(33286611346)) + assert.Equal(t, nanoseconds, int32(763295273)) assert.Equal(t, 67, bsi.BitCount()) } diff --git a/roaring64/roaring64_test.go b/roaring64/roaring64_test.go index 69588aec..0a3d20c8 100644 --- a/roaring64/roaring64_test.go +++ b/roaring64/roaring64_test.go @@ -2015,8 +2015,6 @@ func Test32As64(t *testing.T) { func TestRoaringArray64Validation(t *testing.T) { a := roaringArray64{} - assert.ErrorIs(t, a.validate(), ErrEmptyKeys) - a.keys = append(a.keys, uint32(3), uint32(1)) assert.ErrorIs(t, a.validate(), ErrKeySortOrder) a.clear() diff --git a/roaring64/roaringarray64.go b/roaring64/roaringarray64.go index 993a208a..09c366ff 100644 --- a/roaring64/roaringarray64.go +++ b/roaring64/roaringarray64.go @@ -449,11 +449,13 @@ func (ra *roaringArray64) validate() error { } for _, maps := range ra.containers { - err := maps.Validate() if err != nil { return err } + if maps.IsEmpty() { + return errors.New("empty container") + } } return nil diff --git a/roaring_test.go b/roaring_test.go index e8aac985..277c2796 100644 --- a/roaring_test.go +++ b/roaring_test.go @@ -2742,7 +2742,7 @@ func TestRoaringArrayValidation(t *testing.T) { a.containers = append(a.containers, &runContainer16{}, &runContainer16{}, &runContainer16{}) assert.ErrorIs(t, a.validate(), ErrCardinalityConstraint) a.needCopyOnWrite = append(a.needCopyOnWrite, true, false, true) - assert.Errorf(t, a.validate(), "zero intervals") + assert.ErrorIs(t, a.validate(), ErrRunIntervalsEmpty) } func TestBitMapValidation(t *testing.T) { From e5a4a6f820ae3cb74fa3211466eee29192967f73 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Tue, 4 Feb 2025 00:06:43 -0500 Subject: [PATCH 3/3] more fixes --- runcontainer.go | 23 ++++++++++++++--------- runcontainer_test.go | 3 +-- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/runcontainer.go b/runcontainer.go index c1a03b69..4468eebd 100644 --- a/runcontainer.go +++ b/runcontainer.go @@ -2792,15 +2792,20 @@ func (rc *runContainer16) validate() error { check that the number of runs < (number of distinct values) / 2 (otherwise you could use an array container) */ - if MaxIntervalsSum <= intervalsSum { - if !(len(rc.iv) < MaxNumIntervals) { - return ErrRunIntervalSize - } - } else { - if !(2*len(rc.iv) < intervalsSum) { - return ErrRunIntervalSize - } - } + sizeAsRunContainer := runContainer16SerializedSizeInBytes(len(rc.iv)) + sizeAsBitmapContainer := bitmapContainerSizeInBytes() + sizeAsArrayContainer := arrayContainerSizeInBytes(intervalsSum) + fmt.Println(sizeAsRunContainer, sizeAsBitmapContainer, sizeAsArrayContainer) + // this is always ok: + if sizeAsRunContainer < minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) { + return nil + } + if sizeAsRunContainer >= sizeAsBitmapContainer { + return ErrRunIntervalSize + } + if sizeAsRunContainer >= sizeAsArrayContainer { + return ErrRunIntervalSize + } return nil } diff --git a/runcontainer_test.go b/runcontainer_test.go index d3a2e60c..0cbd8b25 100644 --- a/runcontainer_test.go +++ b/runcontainer_test.go @@ -2588,13 +2588,12 @@ func TestIntervalValidationFailing(t *testing.T) { start := -4 for i := 0; i < MaxNumIntervals; i++ { start += 4 - end := start + 2 + end := start + 1 a := newInterval16Range(uint16(start), uint16(end)) rc.iv = append(rc.iv, a) } assert.ErrorIs(t, rc.validate(), ErrRunIntervalSize) - // too many small runs, use array rc = &runContainer16{} start = -3