forked from filecoin-project/go-hamt-ipld
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
BREAKING CHANGE: introduce Bitmap for handling bitfield operations
- Loading branch information
Showing
7 changed files
with
499 additions
and
156 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
package hamt | ||
|
||
import ( | ||
"fmt" | ||
"math" | ||
"math/bits" | ||
) | ||
|
||
// Bitmap is a managed bitmap, primarily for the purpose of tracking the | ||
// presence or absence of elements in an associated array. It can set and unset | ||
// individual bits and perform limited popcount for a given index to calculate | ||
// the position in the associated compacted array. | ||
type Bitmap struct { | ||
Bytes []byte | ||
} | ||
|
||
// NewBitmap creates a new bitmap for a given bitWidth. The bitmap will hold | ||
// 2^bitWidth bytes. | ||
func NewBitmap(bitWidth int) *Bitmap { | ||
bc := (1 << uint(bitWidth)) / 8 | ||
if bc == 0 { | ||
panic("bitWidth too small") | ||
} | ||
|
||
return NewBitmapFrom(make([]byte, bc)) | ||
} | ||
|
||
// NewBitmapFrom creates a new Bitmap from an existing byte array. It is | ||
// assumed that bytes is the correct length for the bitWidth of this Bitmap. | ||
func NewBitmapFrom(bytes []byte) *Bitmap { | ||
if len(bytes) == 0 { | ||
panic("can't form Bitmap from zero bytes") | ||
} | ||
bm := Bitmap{Bytes: bytes} | ||
return &bm | ||
} | ||
|
||
// BitWidth calculates the bitWidth of this Bitmap by performing a | ||
// log2(bits). The bitWidth is the minimum number of bits required to | ||
// form indexes that address all of this Bitmap. e.g. a bitWidth of 5 can form | ||
// indexes of 0 to 31, i.e. 4 bytes. | ||
func (bm *Bitmap) BitWidth() int { | ||
return int(math.Log2(float64(len(bm.Bytes) * 8))) | ||
} | ||
|
||
func (bm *Bitmap) bindex(in int) int { | ||
// Return `in` to flip the byte addressing order to LE. For BE we address | ||
// from the last byte backward. | ||
bi := len(bm.Bytes) - 1 - in | ||
if bi > len(bm.Bytes) || bi < 0 { | ||
panic(fmt.Sprintf("invalid index for this Bitmap (index: %v, bytes: %v)", in, len(bm.Bytes))) | ||
} | ||
return bi | ||
} | ||
|
||
// IsSet indicates whether the bit at the provided position is set or not. | ||
func (bm *Bitmap) IsSet(position int) bool { | ||
byt := bm.bindex(position / 8) | ||
offset := position % 8 | ||
return (bm.Bytes[byt]>>offset)&1 == 1 | ||
} | ||
|
||
// Set sets or unsets the bit at the given position according. If set is true, | ||
// the bit will be set. If set is false, the bit will be unset. | ||
func (bm *Bitmap) Set(position int, set bool) { | ||
has := bm.IsSet(position) | ||
byt := bm.bindex(position / 8) | ||
offset := position % 8 | ||
|
||
if set && !has { | ||
bm.Bytes[byt] |= 1 << offset | ||
} else if !set && has { | ||
bm.Bytes[byt] ^= 1 << offset | ||
} | ||
} | ||
|
||
// Index performs a limited popcount up to the given position. This calculates | ||
// the number of set bits up to the index of the bitmap. Useful for calculating | ||
// the position of an element in an associated compacted array. | ||
func (bm *Bitmap) Index(position int) int { | ||
t := 0 | ||
eb := position / 8 | ||
byt := 0 | ||
for ; byt < eb; byt++ { | ||
// quick popcount for the full bytes | ||
t += bits.OnesCount(uint(bm.Bytes[bm.bindex(byt)])) | ||
} | ||
eb = eb * 8 | ||
if position > eb { | ||
for i := byt * 8; i < position; i++ { | ||
// manual per-bit check for the remainder <8 bits | ||
if bm.IsSet(i) { | ||
t++ | ||
} | ||
} | ||
} | ||
return t | ||
} | ||
|
||
// Copy creates a clone of the Bitmap, creating a new byte array with the same | ||
// contents as the original. | ||
func (bm *Bitmap) Copy() *Bitmap { | ||
ba := make([]byte, len(bm.Bytes)) | ||
copy(ba, bm.Bytes) | ||
return NewBitmapFrom(ba) | ||
} | ||
|
||
// BitsSetCount counts how many bits are set in the bitmap. | ||
func (bm *Bitmap) BitsSetCount() int { | ||
count := 0 | ||
for _, b := range bm.Bytes { | ||
count += bits.OnesCount(uint(b)) | ||
} | ||
return count | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,250 @@ | ||
package hamt | ||
|
||
import ( | ||
"bytes" | ||
"testing" | ||
) | ||
|
||
// many cases taken from https://github.com/rvagg/iamap/blob/fad95295b013c8b4f0faac6dd5d9be175f6e606c/test/bit-utils-test.js | ||
// but rev() is used to reverse the data in most instances | ||
|
||
// reverse for BE format | ||
func rev(in []byte) []byte { | ||
out := make([]byte, len(in)) | ||
for i := 0; i < len(in); i++ { | ||
out[len(in)-1-i] = in[i] | ||
} | ||
return out | ||
} | ||
|
||
// 8-char binary string to byte, no binary literals in old Go | ||
func bb(s string) byte { | ||
var r byte | ||
for i, c := range s { | ||
if c == '1' { | ||
r |= 1 << (7 - i) | ||
} | ||
} | ||
return r | ||
} | ||
|
||
func TestBitmapHas(t *testing.T) { | ||
type tcase struct { | ||
bytes []byte | ||
pos int | ||
set bool | ||
} | ||
cases := []tcase{ | ||
{b(0x0), 0, false}, | ||
{b(0x1), 0, true}, | ||
{b(bb("00101010")), 2, false}, | ||
{b(bb("00101010")), 3, true}, | ||
{b(bb("00101010")), 4, false}, | ||
{b(bb("00101010")), 5, true}, | ||
{b(bb("00100000")), 5, true}, | ||
{[]byte{0x0, bb("00100000")}, 8 + 5, true}, | ||
{[]byte{0x0, 0x0, bb("00100000")}, 8*2 + 5, true}, | ||
{[]byte{0x0, 0x0, 0x0, bb("00100000")}, 8*3 + 5, true}, | ||
{[]byte{0x0, 0x0, 0x0, 0x0, bb("00100000")}, 8*4 + 5, true}, | ||
{[]byte{0x0, 0x0, 0x0, 0x0, 0x0, bb("00100000")}, 8*5 + 5, true}, | ||
{[]byte{0x0, 0x0, 0x0, 0x0, 0x0, bb("00100000")}, 8*4 + 5, false}, | ||
{[]byte{0x0, 0x0, 0x0, 0x0, 0x0, bb("00100000")}, 8*3 + 5, false}, | ||
{[]byte{0x0, 0x0, 0x0, 0x0, 0x0, bb("00100000")}, 8*2 + 5, false}, | ||
{[]byte{0x0, 0x0, 0x0, 0x0, 0x0, bb("00100000")}, 8 + 5, false}, | ||
{[]byte{0x0, 0x0, 0x0, 0x0, 0x0, bb("00100000")}, 5, false}, | ||
} | ||
|
||
for _, c := range cases { | ||
bm := NewBitmapFrom(rev(c.bytes)) | ||
if bm.IsSet(c.pos) != c.set { | ||
t.Fatalf("bitmap %v IsSet(%v) should be %v", c.bytes, c.pos, c.set) | ||
} | ||
} | ||
} | ||
|
||
func TestBitmapBitWidth(t *testing.T) { | ||
for i := 3; i <= 16; i++ { | ||
if NewBitmap(i).BitWidth() != i { | ||
t.Fatal("incorrect bitWidth calculation") | ||
} | ||
if NewBitmapFrom(make([]byte, (1<<i)/8)).BitWidth() != i { | ||
t.Fatal("incorrect bitWidth calculation") | ||
} | ||
} | ||
} | ||
|
||
func TestBitmapIndex(t *testing.T) { | ||
type tcase struct { | ||
bytes []byte | ||
pos int | ||
expected int | ||
} | ||
cases := []tcase{ | ||
{b(bb("00111111")), 0, 0}, | ||
{b(bb("00111111")), 1, 1}, | ||
{b(bb("00111111")), 2, 2}, | ||
{b(bb("00111111")), 4, 4}, | ||
{b(bb("00111100")), 2, 0}, | ||
{b(bb("00111101")), 4, 3}, | ||
{b(bb("00111001")), 4, 2}, | ||
{b(bb("00111000")), 4, 1}, | ||
{b(bb("00110000")), 4, 0}, | ||
{b(bb("00000000")), 0, 0}, | ||
{b(bb("00000000")), 1, 0}, | ||
{b(bb("00000000")), 2, 0}, | ||
{b(bb("00000000")), 3, 0}, | ||
{[]byte{0x0, 0x0, 0x0}, 20, 0}, | ||
{[]byte{0xff, 0xff, 0xff}, 5, 5}, | ||
{[]byte{0xff, 0xff, 0xff}, 7, 7}, | ||
{[]byte{0xff, 0xff, 0xff}, 8, 8}, | ||
{[]byte{0xff, 0xff, 0xff}, 10, 10}, | ||
{[]byte{0xff, 0xff, 0xff}, 20, 20}, | ||
} | ||
|
||
for _, c := range cases { | ||
bm := NewBitmapFrom(rev(c.bytes)) | ||
if bm.Index(c.pos) != c.expected { | ||
t.Fatalf("bitmap %v Index(%v) should be %v", c.bytes, c.pos, c.expected) | ||
} | ||
} | ||
} | ||
|
||
func TestBitmap_32bitFixed(t *testing.T) { | ||
// a 32-byte bitmap and a list of all the bits that are set | ||
byts := []byte{ | ||
bb("00100101"), bb("10000000"), bb("00000000"), bb("01000000"), | ||
bb("00000000"), bb("01000000"), bb("00000000"), bb("01000000"), | ||
bb("00000000"), bb("00100000"), bb("00000000"), bb("01000000"), | ||
bb("00000000"), bb("00010000"), bb("00000000"), bb("01000000"), | ||
bb("00000000"), bb("00001000"), bb("00000000"), bb("01000000"), | ||
bb("00000000"), bb("00000100"), bb("00000000"), bb("01000000"), | ||
bb("00000000"), bb("00000010"), bb("00000000"), bb("01000000"), | ||
bb("00000000"), bb("00000001"), bb("00000000"), bb("01000000"), | ||
} | ||
bm := NewBitmapFrom(rev(byts)) | ||
set := []int{ | ||
0, 2, 5, 8 + 7, 8*3 + 6, | ||
8*5 + 6, 8*7 + 6, | ||
8*9 + 5, 8*11 + 6, | ||
8*13 + 4, 8*15 + 6, | ||
8*17 + 3, 8*19 + 6, | ||
8*21 + 2, 8*23 + 6, | ||
8*25 + 1, 8*27 + 6, | ||
8 * 29, 8*31 + 6} | ||
|
||
c := 0 | ||
for i := 0; i < 256; i++ { | ||
if c < len(set) && i == set[c] { | ||
if !bm.IsSet(i) { | ||
t.Fatalf("IsSet(%v) should be true", i) | ||
} | ||
// the index c of `set` also gives us the translation of Index(i) | ||
if bm.Index(i) != c { | ||
t.Fatalf("Index(%v) should be %v", i, c) | ||
} | ||
c++ | ||
} else { | ||
if bm.IsSet(i) { | ||
t.Fatalf("IsSet(%v) should be false", i) | ||
} | ||
} | ||
} | ||
} | ||
|
||
func TestBitmapSetBytes(t *testing.T) { | ||
newSet := func(bitWidth int, ba []byte, index int, set bool) []byte { | ||
var bm *Bitmap | ||
if ba != nil { | ||
bm = NewBitmapFrom(ba) | ||
} else { | ||
bm = NewBitmap(bitWidth) | ||
} | ||
bm.Set(index, set) | ||
return bm.Bytes | ||
} | ||
|
||
if !bytes.Equal(newSet(3, nil, 0, true), rev([]byte{bb("00000001")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(3, nil, 1, true), rev([]byte{bb("00000010")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(3, nil, 7, true), rev([]byte{bb("10000000")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{bb("11111111")}), 0, true), rev([]byte{bb("11111111")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{bb("11111111")}), 7, true), rev([]byte{bb("11111111")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{bb("01010101")}), 1, true), rev([]byte{bb("01010111")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{bb("01010101")}), 7, true), rev([]byte{bb("11010101")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{bb("11111111")}), 0, false), rev([]byte{bb("11111110")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{bb("11111111")}), 1, false), rev([]byte{bb("11111101")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{bb("11111111")}), 7, false), rev([]byte{bb("01111111")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{0, bb("11111111")}), 8+0, true), rev([]byte{0, bb("11111111")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{0, bb("11111111")}), 8+7, true), rev([]byte{0, bb("11111111")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{0, bb("01010101")}), 8+1, true), rev([]byte{0, bb("01010111")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{0, bb("01010101")}), 8+7, true), rev([]byte{0, bb("11010101")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{0, bb("11111111")}), 8+0, false), rev([]byte{0, bb("11111110")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{0, bb("11111111")}), 8+1, false), rev([]byte{0, bb("11111101")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{0, bb("11111111")}), 8+7, false), rev([]byte{0, bb("01111111")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{0}), 0, false), rev([]byte{bb("00000000")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{0}), 7, false), rev([]byte{bb("00000000")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{bb("01010101")}), 0, false), rev([]byte{bb("01010100")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{bb("01010101")}), 6, false), rev([]byte{bb("00010101")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{bb("11000010"), bb("11010010"), bb("01001010"), bb("00000001")}), 0, false), rev([]byte{bb("11000010"), bb("11010010"), bb("01001010"), bb("00000001")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{bb("11000010"), bb("11010010"), bb("01001010"), bb("00000001")}), 0, true), rev([]byte{bb("11000011"), bb("11010010"), bb("01001010"), bb("00000001")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{bb("11000010"), bb("11010010"), bb("01001010"), bb("00000001")}), 12, false), rev([]byte{bb("11000010"), bb("11000010"), bb("01001010"), bb("00000001")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{bb("11000010"), bb("11010010"), bb("01001010"), bb("00000001")}), 12, true), rev([]byte{bb("11000010"), bb("11010010"), bb("01001010"), bb("00000001")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{bb("11000010"), bb("11010010"), bb("01001010"), bb("00000001")}), 24, false), rev([]byte{bb("11000010"), bb("11010010"), bb("01001010"), bb("00000000")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{bb("11000010"), bb("11010010"), bb("01001010"), bb("00000001")}), 24, true), rev([]byte{bb("11000010"), bb("11010010"), bb("01001010"), bb("00000001")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
if !bytes.Equal(newSet(0, rev([]byte{0, 0, 0, 0}), 31, true), rev([]byte{0, 0, 0, bb("10000000")})) { | ||
t.Fatal("Failed bytes comparison") | ||
} | ||
} |
Oops, something went wrong.