diff --git a/gen/go/staples/v1/scan.pb.go b/gen/go/staples/v1/scan.pb.go index 637554a1a..60773a049 100644 --- a/gen/go/staples/v1/scan.pb.go +++ b/gen/go/staples/v1/scan.pb.go @@ -489,6 +489,7 @@ type Metadata_Column struct { FstOffset uint32 `protobuf:"varint,4,opt,name=fst_offset,json=fstOffset,proto3" json:"fst_offset,omitempty"` Offset uint64 `protobuf:"varint,5,opt,name=offset,proto3" json:"offset,omitempty"` Size uint32 `protobuf:"varint,6,opt,name=size,proto3" json:"size,omitempty"` + RawSize uint32 `protobuf:"varint,7,opt,name=raw_size,json=rawSize,proto3" json:"raw_size,omitempty"` } func (x *Metadata_Column) Reset() { @@ -558,6 +559,13 @@ func (x *Metadata_Column) GetSize() uint32 { return 0 } +func (x *Metadata_Column) GetRawSize() uint32 { + if x != nil { + return x.RawSize + } + return 0 +} + type Metadata_Chunk struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -672,14 +680,14 @@ var file_staples_v1_scan_proto_rawDesc = []byte{ 0x12, 0x0e, 0x0a, 0x0a, 0x55, 0x74, 0x6d, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x10, 0x16, 0x12, 0x0d, 0x0a, 0x09, 0x55, 0x74, 0x6d, 0x4d, 0x65, 0x64, 0x69, 0x75, 0x6d, 0x10, 0x17, 0x12, 0x0d, 0x0a, 0x09, 0x55, 0x74, 0x6d, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x10, 0x18, 0x12, 0x0b, - 0x0a, 0x07, 0x55, 0x74, 0x6d, 0x54, 0x65, 0x72, 0x6d, 0x10, 0x19, 0x22, 0xa7, 0x02, 0x0a, 0x08, + 0x0a, 0x07, 0x55, 0x74, 0x6d, 0x54, 0x65, 0x72, 0x6d, 0x10, 0x19, 0x22, 0xc2, 0x02, 0x0a, 0x08, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x2d, 0x0a, 0x07, 0x63, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x76, 0x31, 0x2e, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x52, 0x07, 0x63, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x73, 0x12, 0x10, 0x0a, 0x03, 0x6d, 0x69, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x6d, 0x69, 0x6e, 0x12, 0x10, 0x0a, 0x03, 0x6d, 0x61, 0x78, 0x18, 0x03, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x6d, 0x61, 0x78, 0x12, 0x0e, 0x0a, 0x02, 0x69, - 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x1a, 0x82, 0x01, 0x0a, 0x06, + 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x1a, 0x9d, 0x01, 0x0a, 0x06, 0x43, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x19, 0x0a, 0x08, 0x6e, 0x75, 0x6d, 0x5f, 0x72, 0x6f, 0x77, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x6e, 0x75, @@ -688,17 +696,19 @@ var file_staples_v1_scan_proto_rawDesc = []byte{ 0x66, 0x73, 0x65, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x04, 0x52, 0x06, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x73, 0x69, 0x7a, 0x65, - 0x1a, 0x33, 0x0a, 0x05, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x12, 0x16, 0x0a, 0x06, 0x6f, 0x66, 0x66, - 0x73, 0x65, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x06, 0x6f, 0x66, 0x66, 0x73, 0x65, - 0x74, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, - 0x04, 0x73, 0x69, 0x7a, 0x65, 0x42, 0x6c, 0x0a, 0x06, 0x63, 0x6f, 0x6d, 0x2e, 0x76, 0x31, 0x42, - 0x09, 0x53, 0x63, 0x61, 0x6e, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, 0x2f, 0x67, 0x69, - 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x76, 0x69, 0x6e, 0x63, 0x65, 0x61, 0x6e, - 0x61, 0x6c, 0x79, 0x74, 0x69, 0x63, 0x73, 0x2f, 0x74, 0x73, 0x75, 0x2f, 0x67, 0x65, 0x6e, 0x2f, - 0x67, 0x6f, 0x2f, 0x73, 0x74, 0x61, 0x70, 0x6c, 0x65, 0x73, 0x2f, 0x76, 0x31, 0xa2, 0x02, 0x03, - 0x56, 0x58, 0x58, 0xaa, 0x02, 0x02, 0x56, 0x31, 0xca, 0x02, 0x02, 0x56, 0x31, 0xe2, 0x02, 0x0e, - 0x56, 0x31, 0x5c, 0x47, 0x50, 0x42, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0xea, 0x02, - 0x02, 0x56, 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x12, 0x19, 0x0a, 0x08, 0x72, 0x61, 0x77, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x07, 0x20, 0x01, + 0x28, 0x0d, 0x52, 0x07, 0x72, 0x61, 0x77, 0x53, 0x69, 0x7a, 0x65, 0x1a, 0x33, 0x0a, 0x05, 0x43, + 0x68, 0x75, 0x6e, 0x6b, 0x12, 0x16, 0x0a, 0x06, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x04, 0x52, 0x06, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x12, 0x12, 0x0a, 0x04, + 0x73, 0x69, 0x7a, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x04, 0x73, 0x69, 0x7a, 0x65, + 0x42, 0x6c, 0x0a, 0x06, 0x63, 0x6f, 0x6d, 0x2e, 0x76, 0x31, 0x42, 0x09, 0x53, 0x63, 0x61, 0x6e, + 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x50, 0x01, 0x5a, 0x2f, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, + 0x63, 0x6f, 0x6d, 0x2f, 0x76, 0x69, 0x6e, 0x63, 0x65, 0x61, 0x6e, 0x61, 0x6c, 0x79, 0x74, 0x69, + 0x63, 0x73, 0x2f, 0x74, 0x73, 0x75, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x67, 0x6f, 0x2f, 0x73, 0x74, + 0x61, 0x70, 0x6c, 0x65, 0x73, 0x2f, 0x76, 0x31, 0xa2, 0x02, 0x03, 0x56, 0x58, 0x58, 0xaa, 0x02, + 0x02, 0x56, 0x31, 0xca, 0x02, 0x02, 0x56, 0x31, 0xe2, 0x02, 0x0e, 0x56, 0x31, 0x5c, 0x47, 0x50, + 0x42, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0xea, 0x02, 0x02, 0x56, 0x31, 0x62, 0x06, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/index/file.go b/index/file.go index a3727b54a..de5a29fa4 100644 --- a/index/file.go +++ b/index/file.go @@ -113,14 +113,22 @@ func (f *FileIndex) get(name string) (*FullColumn, error) { func readColumn(r ReaderAtSeeker, meta *v1.Metadata_Column) (*FullColumn, error) { buf := get() defer buf.Release() - data := buf.get(int(meta.Size)) - n, err := r.ReadAt(data, int64(meta.Offset)) + + compress := get() + defer compress.Release() + + raw := buf.get(int(meta.Size)) + n, err := r.ReadAt(raw, int64(meta.Offset)) if err != nil { return nil, err } if n != int(meta.Size) { return nil, fmt.Errorf("index: Too little data read want=%d got %d", meta.Size, n) } + data, err := ZSTDDecompress(compress.get(int(meta.RawSize)), raw) + if err != nil { + return nil, err + } o := &FullColumn{ name: meta.Name, numRows: meta.NumRows, @@ -149,6 +157,8 @@ func chuckFromRaw(raw []byte, chunk *v1.Metadata_Chunk) []byte { func WriteFull(w io.Writer, full Full, id string) error { b := buffers.Bytes() defer b.Release() + compress := get() + defer compress.Release() meta := &v1.Metadata{ Id: id, @@ -161,7 +171,14 @@ func WriteFull(w io.Writer, full Full, id string) error { if err != nil { return err } - n, err := w.Write(data) + out, err := ZSTDCompress( + compress.get(ZSTDCompressBound(len(data))), + data, ZSTDCompressionLevel, + ) + if err != nil { + return err + } + n, err := w.Write(out) if err != nil { return err } @@ -171,6 +188,7 @@ func WriteFull(w io.Writer, full Full, id string) error { FstOffset: uint32(offset), Offset: startOffset, Size: uint32(n), + RawSize: uint32(len(data)), }) startOffset += uint64(n) return diff --git a/index/file_test.go b/index/file_test.go index b4172a4bc..cf4bd070a 100644 --- a/index/file_test.go +++ b/index/file_test.go @@ -8,7 +8,7 @@ import ( ) func TestReadIndexFile(t *testing.T) { - f, err := os.Open("testdata/01HPA7QZNP1E8DP8H1SKK253HQ") + f, err := os.Open("testdata/01HPA98Z0TVKPC4QC7HQBTF30Q") if err != nil { t.Fatal(err) } diff --git a/index/testdata/01HPA7QZNP1E8DP8H1SKK253HQ b/index/testdata/01HPA7QZNP1E8DP8H1SKK253HQ deleted file mode 100644 index 8e50864ce..000000000 Binary files a/index/testdata/01HPA7QZNP1E8DP8H1SKK253HQ and /dev/null differ diff --git a/index/testdata/01HPA98Z0TVKPC4QC7HQBTF30Q b/index/testdata/01HPA98Z0TVKPC4QC7HQBTF30Q new file mode 100644 index 000000000..28a3421ee Binary files /dev/null and b/index/testdata/01HPA98Z0TVKPC4QC7HQBTF30Q differ diff --git a/proto/staples/v1/scan.proto b/proto/staples/v1/scan.proto index e79d43491..c0d719f65 100644 --- a/proto/staples/v1/scan.proto +++ b/proto/staples/v1/scan.proto @@ -70,6 +70,7 @@ message Metadata { uint32 fst_offset = 4; uint64 offset = 5; uint32 size = 6; + uint32 raw_size = 7; } message Chunk {