From 358583c1fdcee9218ca1de9aef42a764e76a05e2 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 3 Feb 2025 22:44:43 -0500
Subject: [PATCH 1/3] fixing various Validate() bugs.

---
 roaring.go                   |  3 ++
 roaring64/roaringarray64.go  |  5 ---
 roaring_test.go              | 62 +++++++++++++++++++++++++++++++++---
 roaringarray.go              |  5 ---
 runcontainer.go              | 10 +++---
 serialization_frozen_test.go |  4 +--
 6 files changed, 65 insertions(+), 24 deletions(-)

diff --git a/roaring.go b/roaring.go
index c7f00af1..9972a51e 100644
--- a/roaring.go
+++ b/roaring.go
@@ -2133,6 +2133,9 @@ func (rb *Bitmap) Stats() Statistics {
 	return stats
 }
 
+// Validate checks if the bitmap is internally consistent.
+// You may call it after deserialization to check that the bitmap is valid.
+// This function returns an error if the bitmap is invalid, nil otherwise.
 func (rb *Bitmap) Validate() error {
 	return rb.highlowcontainer.validate()
 }
diff --git a/roaring64/roaringarray64.go b/roaring64/roaringarray64.go
index 629ad751..993a208a 100644
--- a/roaring64/roaringarray64.go
+++ b/roaring64/roaringarray64.go
@@ -14,7 +14,6 @@ type roaringArray64 struct {
 }
 
 var (
-	ErrEmptyKeys             = errors.New("keys were empty")
 	ErrKeySortOrder          = errors.New("keys were out of order")
 	ErrCardinalityConstraint = errors.New("size of arrays was not coherent")
 )
@@ -437,10 +436,6 @@ func (ra *roaringArray64) checkKeysSorted() bool {
 // validate checks the referential integrity
 // ensures len(keys) == len(containers), recurses and checks each container type
 func (ra *roaringArray64) validate() error {
-	if len(ra.keys) == 0 {
-		return ErrEmptyKeys
-	}
-
 	if !ra.checkKeysSorted() {
 		return ErrKeySortOrder
 	}
diff --git a/roaring_test.go b/roaring_test.go
index 2665b14d..e8aac985 100644
--- a/roaring_test.go
+++ b/roaring_test.go
@@ -2732,8 +2732,6 @@ func TestFromBitSet(t *testing.T) {
 func TestRoaringArrayValidation(t *testing.T) {
 	a := newRoaringArray()
 
-	assert.ErrorIs(t, a.validate(), ErrEmptyKeys)
-
 	a.keys = append(a.keys, uint16(3), uint16(1))
 	assert.ErrorIs(t, a.validate(), ErrKeySortOrder)
 	a.clear()
@@ -2805,11 +2803,9 @@ func TestBitMapValidationFromDeserialization(t *testing.T) {
 				bm.AddRange(100, 110)
 			},
 			corruptor: func(s []byte) {
-				// 13 is the length of the run
-				// Setting to zero causes an invalid run
 				s[13] = 0
 			},
-			err: ErrRunIntervalLength,
+			err: ErrRunIntervalSize,
 		},
 		{
 			name: "Creates Interval Overlap",
@@ -3310,3 +3306,59 @@ func TestIssue467CaseLarge(t *testing.T) {
 	b.RunOptimize()
 	require.NoError(t, b.Validate())
 }
+
+func TestValidateEmpty(t *testing.T) {
+	require.NoError(t, New().Validate())
+}
+
+func TestValidate469(t *testing.T) {
+	b := New()
+	b.RemoveRange(0, 180)
+	b.AddRange(0, 180)
+	require.NoError(t, b.Validate())
+	b.RemoveRange(180, 217)
+	b.AddRange(180, 217)
+	require.NoError(t, b.Validate())
+	b.RemoveRange(217, 2394)
+	b.RemoveRange(2394, 2427)
+	b.AddRange(2394, 2427)
+	require.NoError(t, b.Validate())
+	b.RemoveRange(2427, 2428)
+	b.AddRange(2427, 2428)
+	require.NoError(t, b.Validate())
+	b.RemoveRange(2428, 3345)
+	require.NoError(t, b.Validate())
+	b.RemoveRange(3345, 3346)
+	require.NoError(t, b.Validate())
+	b.RemoveRange(3346, 3597)
+	require.NoError(t, b.Validate())
+	b.RemoveRange(3597, 3815)
+	require.NoError(t, b.Validate())
+	b.RemoveRange(3815, 3816)
+	require.NoError(t, b.Validate())
+	b.AddRange(3815, 3816)
+	require.NoError(t, b.Validate())
+	b.RemoveRange(3816, 3856)
+	b.RemoveRange(3856, 4067)
+	b.RemoveRange(4067, 4069)
+	b.RemoveRange(4069, 4071)
+	b.RemoveRange(4071, 4095)
+	b.RemoveRange(4095, 4096)
+	require.NoError(t, b.Validate())
+	b.RunOptimize()
+	require.False(t, b.IsEmpty())
+	require.NoError(t, b.Validate())
+}
+
+func TestValidateFromV1(t *testing.T) {
+	v1 := New()
+	for i := 0; i <= 2; i++ {
+		v1.Add(uint32(i))
+	}
+	v1.RunOptimize()
+	b, err := v1.MarshalBinary()
+	require.NoError(t, err)
+	v2 := New()
+	require.NoError(t, v2.UnmarshalBinary(b))
+	require.NoError(t, v2.Validate())
+}
diff --git a/roaringarray.go b/roaringarray.go
index 552c61ce..40be90a5 100644
--- a/roaringarray.go
+++ b/roaringarray.go
@@ -90,7 +90,6 @@ const (
 )
 
 var (
-	ErrEmptyKeys             = errors.New("keys were empty")
 	ErrKeySortOrder          = errors.New("keys were out of order")
 	ErrCardinalityConstraint = errors.New("size of arrays was not coherent")
 )
@@ -798,10 +797,6 @@ func (ra *roaringArray) checkKeysSorted() bool {
 // validate checks the referential integrity
 // ensures len(keys) == len(containers), recurses and checks each container type
 func (ra *roaringArray) validate() error {
-	if len(ra.keys) == 0 {
-		return ErrEmptyKeys
-	}
-
 	if !ra.checkKeysSorted() {
 		return ErrKeySortOrder
 	}
diff --git a/runcontainer.go b/runcontainer.go
index d5250a0e..c1a03b69 100644
--- a/runcontainer.go
+++ b/runcontainer.go
@@ -62,7 +62,6 @@ type interval16 struct {
 var (
 	ErrRunIntervalsEmpty  = errors.New("run contained no interval")
 	ErrRunNonSorted       = errors.New("runs were not sorted")
-	ErrRunIntervalLength  = errors.New("interval had zero length")
 	ErrRunIntervalEqual   = errors.New("intervals were equal")
 	ErrRunIntervalOverlap = errors.New("intervals overlapped or were continguous")
 	ErrRunIntervalSize    = errors.New("too many intervals relative to data")
@@ -2757,10 +2756,9 @@ func (rc *runContainer16) validate() error {
 
 	intervalsSum := 0
 	for outeridx := range rc.iv {
-
-		if rc.iv[outeridx].length == 0 {
-			return ErrRunIntervalLength
-		}
+		// The length being stored is the actual length - 1.
+		// So we need to add 1 to get the actual length.
+		// It is not possible to have a run with length 0.
 
 		outerInterval := rc.iv[outeridx]
 
@@ -2799,7 +2797,7 @@ func (rc *runContainer16) validate() error {
 			return ErrRunIntervalSize
 		}
 	} else {
-		if !(len(rc.iv) < (intervalsSum / 2)) {
+		if !(2*len(rc.iv) < intervalsSum) {
 			return ErrRunIntervalSize
 		}
 	}
diff --git a/serialization_frozen_test.go b/serialization_frozen_test.go
index 126c9a9b..20bc0e50 100644
--- a/serialization_frozen_test.go
+++ b/serialization_frozen_test.go
@@ -171,11 +171,9 @@ func TestBitMapValidationFromFrozen(t *testing.T) {
 				bm.AddRange(100, 110)
 			},
 			corruptor: func(s []byte) {
-				// 13 is the length of the run
-				// Setting to zero causes an invalid run
 				s[2] = 0
 			},
-			err: ErrRunIntervalLength,
+			err: ErrRunIntervalSize,
 		},
 		{
 			name: "Creates Interval Overlap",

From 2561ba1a63f77cb9dbb9fc4da7d53ce1d8be3e9e Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 3 Feb 2025 23:39:35 -0500
Subject: [PATCH 2/3] more fixes...

---
 roaring64/bsi64_test.go     | 4 ++--
 roaring64/roaring64_test.go | 2 --
 roaring64/roaringarray64.go | 4 +++-
 roaring_test.go             | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/roaring64/bsi64_test.go b/roaring64/bsi64_test.go
index 238b538e..98b4aba0 100644
--- a/roaring64/bsi64_test.go
+++ b/roaring64/bsi64_test.go
@@ -107,8 +107,8 @@ func TestSetAndGetBigTimestamp(t *testing.T) {
 	// Recover and check the known timestamp
 	bv, _ := bsi.GetBigValue(1)
 	seconds, nanoseconds := bigIntToSecondsAndNanos(bv)
-	ts := time.Unix(seconds, int64(nanoseconds))
-	assert.Equal(t, "3024-10-23T16:55:46.763295273Z", ts.Format(time.RFC3339Nano))
+	assert.Equal(t, seconds, int64(33286611346))
+	assert.Equal(t, nanoseconds, int32(763295273))
 	assert.Equal(t, 67, bsi.BitCount())
 }
 
diff --git a/roaring64/roaring64_test.go b/roaring64/roaring64_test.go
index 69588aec..0a3d20c8 100644
--- a/roaring64/roaring64_test.go
+++ b/roaring64/roaring64_test.go
@@ -2015,8 +2015,6 @@ func Test32As64(t *testing.T) {
 func TestRoaringArray64Validation(t *testing.T) {
 	a := roaringArray64{}
 
-	assert.ErrorIs(t, a.validate(), ErrEmptyKeys)
-
 	a.keys = append(a.keys, uint32(3), uint32(1))
 	assert.ErrorIs(t, a.validate(), ErrKeySortOrder)
 	a.clear()
diff --git a/roaring64/roaringarray64.go b/roaring64/roaringarray64.go
index 993a208a..09c366ff 100644
--- a/roaring64/roaringarray64.go
+++ b/roaring64/roaringarray64.go
@@ -449,11 +449,13 @@ func (ra *roaringArray64) validate() error {
 	}
 
 	for _, maps := range ra.containers {
-
 		err := maps.Validate()
 		if err != nil {
 			return err
 		}
+		if maps.IsEmpty() {
+			return errors.New("empty container")
+		}
 	}
 
 	return nil
diff --git a/roaring_test.go b/roaring_test.go
index e8aac985..277c2796 100644
--- a/roaring_test.go
+++ b/roaring_test.go
@@ -2742,7 +2742,7 @@ func TestRoaringArrayValidation(t *testing.T) {
 	a.containers = append(a.containers, &runContainer16{}, &runContainer16{}, &runContainer16{})
 	assert.ErrorIs(t, a.validate(), ErrCardinalityConstraint)
 	a.needCopyOnWrite = append(a.needCopyOnWrite, true, false, true)
-	assert.Errorf(t, a.validate(), "zero intervals")
+	assert.ErrorIs(t, a.validate(), ErrRunIntervalsEmpty)
 }
 
 func TestBitMapValidation(t *testing.T) {

From e5a4a6f820ae3cb74fa3211466eee29192967f73 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Tue, 4 Feb 2025 00:06:43 -0500
Subject: [PATCH 3/3] more fixes

---
 runcontainer.go      | 23 ++++++++++++++---------
 runcontainer_test.go |  3 +--
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/runcontainer.go b/runcontainer.go
index c1a03b69..4468eebd 100644
--- a/runcontainer.go
+++ b/runcontainer.go
@@ -2792,15 +2792,20 @@ func (rc *runContainer16) validate() error {
 		    check that the number of runs < (number of distinct values) / 2
 		    (otherwise you could use an array container)
 	*/
-	if MaxIntervalsSum <= intervalsSum {
-		if !(len(rc.iv) < MaxNumIntervals) {
-			return ErrRunIntervalSize
-		}
-	} else {
-		if !(2*len(rc.iv) < intervalsSum) {
-			return ErrRunIntervalSize
-		}
-	}
 
+	sizeAsRunContainer := runContainer16SerializedSizeInBytes(len(rc.iv))
+	sizeAsBitmapContainer := bitmapContainerSizeInBytes()
+	sizeAsArrayContainer := arrayContainerSizeInBytes(intervalsSum)
+	fmt.Println(sizeAsRunContainer, sizeAsBitmapContainer, sizeAsArrayContainer)
+	// this is always ok:
+	if sizeAsRunContainer < minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
+		return nil
+	}
+	if sizeAsRunContainer >= sizeAsBitmapContainer {
+		return ErrRunIntervalSize
+	}
+	if sizeAsRunContainer >= sizeAsArrayContainer {
+		return ErrRunIntervalSize
+	}
 	return nil
 }
diff --git a/runcontainer_test.go b/runcontainer_test.go
index d3a2e60c..0cbd8b25 100644
--- a/runcontainer_test.go
+++ b/runcontainer_test.go
@@ -2588,13 +2588,12 @@ func TestIntervalValidationFailing(t *testing.T) {
 	start := -4
 	for i := 0; i < MaxNumIntervals; i++ {
 		start += 4
-		end := start + 2
+		end := start + 1
 		a := newInterval16Range(uint16(start), uint16(end))
 		rc.iv = append(rc.iv, a)
 
 	}
 	assert.ErrorIs(t, rc.validate(), ErrRunIntervalSize)
-
 	// too many small runs, use array
 	rc = &runContainer16{}
 	start = -3