Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: [Do Not Mrege] Add Sparse Float Vector support to milvus #29421

Closed
wants to merge 5 commits into from
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
[Sparse Float Vector] add sparse float vector support to different
milvus components, including proxy, data node to receive and write
sparse float vectors to binlog, query node to handle search requests,
index node to build index for sparse float column, etc.

Signed-off-by: Buqian Zheng <zhengbuqian@gmail.com>
zhengbuqian committed Mar 6, 2024
commit b4217f5344c858e972c1acdf1db30a2a3cb7f70e
2 changes: 1 addition & 1 deletion internal/datacoord/compaction_trigger.go
Original file line number Diff line number Diff line change
@@ -547,7 +547,7 @@ func (t *compactionTrigger) handleSignal(signal *compactionSignal) {
segments := t.getCandidateSegments(channel, partitionID)

if len(segments) == 0 {
log.Info("the length of segments is 0, skip to handle compaction")
log.Info("the number of candidate segments is 0, skip to handle compaction")
return
}

9 changes: 9 additions & 0 deletions internal/datanode/compactor.go
Original file line number Diff line number Diff line change
@@ -792,6 +792,15 @@ func interface2FieldData(schemaDataType schemapb.DataType, content []interface{}
data.Dim = len(data.Data) * 8 / int(numRows)
rst = data

case schemapb.DataType_SparseFloatVector:
data := &storage.SparseFloatVectorFieldData{}
for _, c := range content {
if err := data.AppendRow(c); err != nil {
return nil, fmt.Errorf("failed to append row: %v, %w", err, errTransferType)
}
}
rst = data

default:
return nil, errUnknownDataType
}
8 changes: 8 additions & 0 deletions internal/datanode/compactor_test.go
Original file line number Diff line number Diff line change
@@ -43,6 +43,7 @@ import (
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/testutils"
"github.com/milvus-io/milvus/pkg/util/timerecord"
)

@@ -105,6 +106,13 @@ func TestCompactionTaskInnerMethods(t *testing.T) {
{false, schemapb.DataType_BinaryVector, []interface{}{nil, nil}, "invalid binaryvector"},
{false, schemapb.DataType_Float16Vector, []interface{}{nil, nil}, "invalid float16vector"},
{false, schemapb.DataType_BFloat16Vector, []interface{}{nil, nil}, "invalid bfloat16vector"},

{false, schemapb.DataType_SparseFloatVector, []interface{}{nil, nil}, "invalid sparsefloatvector"},
{false, schemapb.DataType_SparseFloatVector, []interface{}{[]byte{255}, []byte{15}}, "invalid sparsefloatvector"},
{true, schemapb.DataType_SparseFloatVector, []interface{}{
testutils.CreateSparseFloatRow([]uint32{1, 2}, []float32{1.0, 2.0}),
testutils.CreateSparseFloatRow([]uint32{3, 4}, []float32{1.0, 2.0}),
}, "valid sparsefloatvector"},
}

// make sure all new data types missed to handle would throw unexpected error
4 changes: 4 additions & 0 deletions internal/indexnode/util.go
Original file line number Diff line number Diff line change
@@ -17,6 +17,7 @@
package indexnode

import (
"errors"
"unsafe"

"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
@@ -37,5 +38,8 @@ func estimateFieldDataSize(dim int64, numRows int64, dataType schemapb.DataType)
if dataType == schemapb.DataType_BFloat16Vector {
return uint64(dim) * uint64(numRows) * 2, nil
}
if dataType == schemapb.DataType_SparseFloatVector {
return 0, errors.New("could not estimate field data size of SparseFloatVector")
}
return 0, nil
}
2 changes: 2 additions & 0 deletions internal/parser/planparserv2/plan_parser_v2.go
Original file line number Diff line number Diff line change
@@ -137,6 +137,8 @@ func CreateSearchPlan(schema *typeutil.SchemaHelper, exprStr string, vectorField
vectorType = planpb.VectorType_Float16Vector
} else if dataType == schemapb.DataType_BFloat16Vector {
vectorType = planpb.VectorType_BFloat16Vector
} else if dataType == schemapb.DataType_SparseFloatVector {
vectorType = planpb.VectorType_SparseFloatVector
}
planNode := &planpb.PlanNode{
Node: &planpb.PlanNode_VectorAnns{
11 changes: 11 additions & 0 deletions internal/parser/planparserv2/plan_parser_v2_test.go
Original file line number Diff line number Diff line change
@@ -428,6 +428,17 @@ func TestCreateBFloat16earchPlan(t *testing.T) {
assert.NoError(t, err)
}

func TestCreateSparseFloatVectorSearchPlan(t *testing.T) {
schema := newTestSchema()
_, err := CreateSearchPlan(schema, `$meta["A"] != 10`, "SparseFloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.NoError(t, err)
}

func TestExpr_Invalid(t *testing.T) {
schema := newTestSchema()
helper, err := typeutil.CreateSchemaHelper(schema)
5 changes: 2 additions & 3 deletions internal/proxy/msg_pack.go
Original file line number Diff line number Diff line change
@@ -21,9 +21,6 @@ import (
"strconv"
"time"

"go.uber.org/zap"
"golang.org/x/sync/errgroup"

"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
@@ -36,6 +33,8 @@ import (
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/retry"
"github.com/milvus-io/milvus/pkg/util/typeutil"
"go.uber.org/zap"
"golang.org/x/sync/errgroup"
)

func genInsertMsgsByPartition(ctx context.Context,
2 changes: 1 addition & 1 deletion internal/proxy/task.go
Original file line number Diff line number Diff line change
@@ -321,7 +321,7 @@ func (t *createCollectionTask) PreExecute(ctx context.Context) error {
if err := validateFieldName(field.Name); err != nil {
return err
}
// validate vector field type parameters
// validate dense vector field type parameters
if isVectorType(field.DataType) {
err = validateDimension(field)
if err != nil {
34 changes: 14 additions & 20 deletions internal/proxy/task_index.go
Original file line number Diff line number Diff line change
@@ -36,6 +36,7 @@ import (
"github.com/milvus-io/milvus/pkg/util/indexparamcheck"
"github.com/milvus-io/milvus/pkg/util/indexparams"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/metric"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
@@ -174,9 +175,7 @@ func (cit *createIndexTask) parseIndexParams() error {
fmt.Sprintf("create index on %s field", cit.fieldSchema.DataType.String()),
fmt.Sprintf("create index on %s field is not supported", cit.fieldSchema.DataType.String()))
}
}

if isVecIndex {
} else {
specifyIndexType, exist := indexParamsMap[common.IndexTypeKey]
if Params.AutoIndexConfig.Enable.GetAsBool() { // `enable` only for cloud instance.
log.Info("create index trigger AutoIndex",
@@ -258,6 +257,12 @@ func (cit *createIndexTask) parseIndexParams() error {
return err
}
}
if indexType == indexparamcheck.IndexSparseInverted || indexType == indexparamcheck.IndexSparseWand {
metricType, metricTypeExist := indexParamsMap[common.MetricTypeKey]
if !metricTypeExist || metricType != metric.IP {
return fmt.Errorf("only IP is the supported metric type for sparse index")
}
}

err := checkTrain(cit.fieldSchema, indexParamsMap)
if err != nil {
@@ -309,13 +314,7 @@ func (cit *createIndexTask) getIndexedField(ctx context.Context) (*schemapb.Fiel
}

func fillDimension(field *schemapb.FieldSchema, indexParams map[string]string) error {
vecDataTypes := []schemapb.DataType{
schemapb.DataType_FloatVector,
schemapb.DataType_BinaryVector,
schemapb.DataType_Float16Vector,
schemapb.DataType_BFloat16Vector,
}
if !funcutil.SliceContain(vecDataTypes, field.GetDataType()) {
if !isVectorType(field.GetDataType()) {
return nil
}
params := make([]*commonpb.KeyValuePair, 0, len(field.GetTypeParams())+len(field.GetIndexParams()))
@@ -338,14 +337,7 @@ func fillDimension(field *schemapb.FieldSchema, indexParams map[string]string) e

func checkTrain(field *schemapb.FieldSchema, indexParams map[string]string) error {
indexType := indexParams[common.IndexTypeKey]
// skip params check of non-vector field.
vecDataTypes := []schemapb.DataType{
schemapb.DataType_FloatVector,
schemapb.DataType_BinaryVector,
schemapb.DataType_Float16Vector,
schemapb.DataType_BFloat16Vector,
}
if !funcutil.SliceContain(vecDataTypes, field.GetDataType()) {
if !isVectorType(field.GetDataType()) {
return indexparamcheck.CheckIndexValid(field.GetDataType(), indexType, indexParams)
}

@@ -355,8 +347,10 @@ func checkTrain(field *schemapb.FieldSchema, indexParams map[string]string) erro
return fmt.Errorf("invalid index type: %s", indexType)
}

if err := fillDimension(field, indexParams); err != nil {
return err
if !isSparseVectorType(field.DataType) {
if err := fillDimension(field, indexParams); err != nil {
return err
}
}

if err := checker.CheckValidDataType(field.GetDataType()); err != nil {
70 changes: 70 additions & 0 deletions internal/proxy/task_index_test.go
Original file line number Diff line number Diff line change
@@ -272,6 +272,76 @@ func TestCreateIndexTask_PreExecute(t *testing.T) {
})
}

func Test_sparse_parseIndexParams(t *testing.T) {
cit := &createIndexTask{
Condition: nil,
req: &milvuspb.CreateIndexRequest{
Base: nil,
DbName: "",
CollectionName: "",
FieldName: "",
ExtraParams: []*commonpb.KeyValuePair{
{
Key: common.IndexTypeKey,
Value: "SPARSE_INVERTED_INDEX",
},
{
Key: MetricTypeKey,
Value: "IP",
},
{
Key: common.IndexParamsKey,
Value: "{\"drop_ratio_build\": 0.3}",
},
},
IndexName: "",
},
ctx: nil,
rootCoord: nil,
result: nil,
isAutoIndex: false,
newIndexParams: nil,
newTypeParams: nil,
collectionID: 0,
fieldSchema: &schemapb.FieldSchema{
FieldID: 101,
Name: "FieldID",
IsPrimaryKey: false,
Description: "field no.1",
DataType: schemapb.DataType_SparseFloatVector,
TypeParams: []*commonpb.KeyValuePair{
{
Key: MetricTypeKey,
Value: "IP",
},
},
},
}

t.Run("parse index params", func(t *testing.T) {
err := cit.parseIndexParams()
assert.NoError(t, err)

assert.ElementsMatch(t,
[]*commonpb.KeyValuePair{
{
Key: common.IndexTypeKey,
Value: "SPARSE_INVERTED_INDEX",
},
{
Key: MetricTypeKey,
Value: "IP",
},
{
Key: "drop_ratio_build",
Value: "0.3",
},
}, cit.newIndexParams)
assert.ElementsMatch(t,
[]*commonpb.KeyValuePair{}, cit.newTypeParams)
})
}

func Test_parseIndexParams(t *testing.T) {
cit := &createIndexTask{
Condition: nil,
35 changes: 24 additions & 11 deletions internal/proxy/util.go
Original file line number Diff line number Diff line change
@@ -98,7 +98,12 @@ func isVectorType(dataType schemapb.DataType) bool {
return dataType == schemapb.DataType_FloatVector ||
dataType == schemapb.DataType_BinaryVector ||
dataType == schemapb.DataType_Float16Vector ||
dataType == schemapb.DataType_BFloat16Vector
dataType == schemapb.DataType_BFloat16Vector ||
dataType == schemapb.DataType_SparseFloatVector
}

func isSparseVectorType(dataType schemapb.DataType) bool {
return dataType == schemapb.DataType_SparseFloatVector
}

func validateMaxQueryResultWindow(offset int64, limit int64) error {
@@ -307,6 +312,12 @@ func validateDimension(field *schemapb.FieldSchema) error {
break
}
}
if isSparseVectorType(field.DataType) {
if exist {
return fmt.Errorf("dim should not be specified for sparse vector field %s(%d)", field.Name, field.FieldID)
}
return nil
}
if !exist {
return errors.New("dimension is not defined in field type params, check type param `dim` for vector field")
}
@@ -509,7 +520,7 @@ func isVector(dataType schemapb.DataType) (bool, error) {
schemapb.DataType_Float, schemapb.DataType_Double:
return false, nil

case schemapb.DataType_FloatVector, schemapb.DataType_BinaryVector, schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector:
case schemapb.DataType_FloatVector, schemapb.DataType_BinaryVector, schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector, schemapb.DataType_SparseFloatVector:
return true, nil
}

@@ -520,7 +531,7 @@ func validateMetricType(dataType schemapb.DataType, metricTypeStrRaw string) err
metricTypeStr := strings.ToUpper(metricTypeStrRaw)
switch metricTypeStr {
case metric.L2, metric.IP, metric.COSINE:
if dataType == schemapb.DataType_FloatVector || dataType == schemapb.DataType_Float16Vector || dataType == schemapb.DataType_BFloat16Vector {
if dataType == schemapb.DataType_FloatVector || dataType == schemapb.DataType_Float16Vector || dataType == schemapb.DataType_BFloat16Vector || dataType == schemapb.DataType_SparseFloatVector {
return nil
}
case metric.JACCARD, metric.HAMMING, metric.SUBSTRUCTURE, metric.SUPERSTRUCTURE:
@@ -581,13 +592,15 @@ func validateSchema(coll *schemapb.CollectionSchema) error {
if err2 != nil {
return err2
}
dimStr, ok := typeKv[common.DimKey]
if !ok {
return fmt.Errorf("dim not found in type_params for vector field %s(%d)", field.Name, field.FieldID)
}
dim, err := strconv.Atoi(dimStr)
if err != nil || dim < 0 {
return fmt.Errorf("invalid dim; %s", dimStr)
if !isSparseVectorType(field.DataType) {
dimStr, ok := typeKv[common.DimKey]
if !ok {
return fmt.Errorf("dim not found in type_params for vector field %s(%d)", field.Name, field.FieldID)
}
dim, err := strconv.Atoi(dimStr)
if err != nil || dim < 0 {
return fmt.Errorf("invalid dim; %s", dimStr)
}
}

metricTypeStr, ok := indexKv[common.MetricTypeKey]
@@ -624,7 +637,7 @@ func validateMultipleVectorFields(schema *schemapb.CollectionSchema) error {
for i := range schema.Fields {
name := schema.Fields[i].Name
dType := schema.Fields[i].DataType
isVec := dType == schemapb.DataType_BinaryVector || dType == schemapb.DataType_FloatVector || dType == schemapb.DataType_Float16Vector || dType == schemapb.DataType_BFloat16Vector
isVec := dType == schemapb.DataType_BinaryVector || dType == schemapb.DataType_FloatVector || dType == schemapb.DataType_Float16Vector || dType == schemapb.DataType_BFloat16Vector || dType == schemapb.DataType_SparseFloatVector
if isVec && vecExist && !enableMultipleVectorFields {
return fmt.Errorf(
"multiple vector fields is not supported, fields name: %s, %s",
24 changes: 24 additions & 0 deletions internal/proxy/validate_util.go
Original file line number Diff line number Diff line change
@@ -85,6 +85,10 @@ func (v *validateUtil) Validate(data []*schemapb.FieldData, schema *schemapb.Col
if err := v.checkBinaryVectorFieldData(field, fieldSchema); err != nil {
return err
}
case schemapb.DataType_SparseFloatVector:
if err := v.checkSparseFloatFieldData(field, fieldSchema); err != nil {
return err
}
case schemapb.DataType_VarChar:
if err := v.checkVarCharFieldData(field, fieldSchema); err != nil {
return err
@@ -205,6 +209,13 @@ func (v *validateUtil) checkAligned(data []*schemapb.FieldData, schema *typeutil
if n != numRows {
return errNumRowsMismatch(field.GetFieldName(), n)
}

case schemapb.DataType_SparseFloatVector:
n := uint64(len(field.GetVectors().GetSparseFloatVector().Contents))
if n != numRows {
return errNumRowsMismatch(field.GetFieldName(), n)
}

default:
// error won't happen here.
n, err := funcutil.GetNumRowOfFieldData(field)
@@ -326,6 +337,19 @@ func (v *validateUtil) checkBinaryVectorFieldData(field *schemapb.FieldData, fie
return nil
}

func (v *validateUtil) checkSparseFloatFieldData(field *schemapb.FieldData, fieldSchema *schemapb.FieldSchema) error {
if field.GetVectors() == nil || field.GetVectors().GetSparseFloatVector() == nil {
msg := fmt.Sprintf("sparse float field '%v' is illegal, nil SparseFloatVector", field.GetFieldName())
return merr.WrapErrParameterInvalid("need sparse float array", "got nil", msg)
}
sparseRows := field.GetVectors().GetSparseFloatVector().GetContents()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could has nil reference?
Uploading image.png…

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe we need to check all the sparse vector has at least nonzero value here

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what other limitation do we have about sparse embeddings?

if sparseRows == nil {
msg := fmt.Sprintf("sparse float field '%v' is illegal, array type mismatch", field.GetFieldName())
return merr.WrapErrParameterInvalid("need sparse float array", "got nil", msg)
}
return typeutil.ValidateSparseFloatRows(sparseRows...)
}

func (v *validateUtil) checkVarCharFieldData(field *schemapb.FieldData, fieldSchema *schemapb.FieldSchema) error {
strArr := field.GetScalars().GetStringData().GetData()
if strArr == nil && fieldSchema.GetDefaultValue() == nil {
8 changes: 8 additions & 0 deletions internal/querynodev2/segments/utils.go
Original file line number Diff line number Diff line change
@@ -93,6 +93,8 @@ func getPKsFromRowBasedInsertMsg(msg *msgstream.InsertMsg, schema *schemapb.Coll
break
}
}
case schemapb.DataType_SparseFloatVector:
return nil, fmt.Errorf("SparseFloatVector not support in row based message")
}
}

@@ -166,6 +168,10 @@ func fillFloatVecFieldData(ctx context.Context, vcm storage.ChunkManager, dataPa
return nil
}

func fillSparseFloatVecFieldData(ctx context.Context, vcm storage.ChunkManager, dataPath string, fieldData *schemapb.FieldData, i int, offset int64, endian binary.ByteOrder) error {
return fmt.Errorf("fillSparseFloatVecFieldData not implemented")
}

func fillBoolFieldData(ctx context.Context, vcm storage.ChunkManager, dataPath string, fieldData *schemapb.FieldData, i int, offset int64, endian binary.ByteOrder) error {
// read whole file.
// TODO: optimize here.
@@ -274,6 +280,8 @@ func fillFieldData(ctx context.Context, vcm storage.ChunkManager, dataPath strin
return fillBinVecFieldData(ctx, vcm, dataPath, fieldData, i, offset, endian)
case schemapb.DataType_FloatVector:
return fillFloatVecFieldData(ctx, vcm, dataPath, fieldData, i, offset, endian)
case schemapb.DataType_SparseFloatVector:
return fillSparseFloatVecFieldData(ctx, vcm, dataPath, fieldData, i, offset, endian)
case schemapb.DataType_Bool:
return fillBoolFieldData(ctx, vcm, dataPath, fieldData, i, offset, endian)
case schemapb.DataType_String, schemapb.DataType_VarChar:
2 changes: 1 addition & 1 deletion internal/storage/binlog_writer.go
Original file line number Diff line number Diff line change
@@ -157,7 +157,7 @@ func (writer *InsertBinlogWriter) NextInsertEventWriter(dim ...int) (*insertEven

var event *insertEventWriter
var err error
if typeutil.IsVectorType(writer.PayloadDataType) {
if typeutil.IsVectorType(writer.PayloadDataType) && !typeutil.IsSparseVectorType(writer.PayloadDataType) {
if len(dim) != 1 {
return nil, fmt.Errorf("incorrect input numbers")
}
25 changes: 23 additions & 2 deletions internal/storage/data_codec.go
Original file line number Diff line number Diff line change
@@ -203,7 +203,7 @@
return nil, fmt.Errorf("there is no pk field")
}

// Serialize transfer insert data to blob. It will sort insert data by timestamp.
// Serialize transforms insert data to blob. It will sort insert data by timestamp.
// From schema, it gets all fields.
// For each field, it will create a binlog writer, and write an event to the binlog.
// It returns binlog buffer in the end.
@@ -259,6 +259,8 @@
eventWriter, err = writer.NextInsertEventWriter(singleData.(*Float16VectorFieldData).Dim)
case schemapb.DataType_BFloat16Vector:
eventWriter, err = writer.NextInsertEventWriter(singleData.(*BFloat16VectorFieldData).Dim)
case schemapb.DataType_SparseFloatVector:
eventWriter, err = writer.NextInsertEventWriter()
default:
return nil, fmt.Errorf("undefined data type %d", field.DataType)
}
@@ -384,12 +386,15 @@
writer.AddExtra(originalSizeKey, fmt.Sprintf("%v", singleData.(*Float16VectorFieldData).GetMemorySize()))
case schemapb.DataType_BFloat16Vector:
err = eventWriter.AddBFloat16VectorToPayload(singleData.(*BFloat16VectorFieldData).Data, singleData.(*BFloat16VectorFieldData).Dim)
writer.AddExtra(originalSizeKey, fmt.Sprintf("%v", singleData.(*BFloat16VectorFieldData).GetMemorySize()))
case schemapb.DataType_SparseFloatVector:
err = eventWriter.AddSparseFloatVectorToPayload(singleData.(*SparseFloatVectorFieldData))

Check failure on line 391 in internal/storage/data_codec.go

GitHub Actions / Code Checker MacOS 12

impossible type assertion: singleData.(*SparseFloatVectorFieldData)
if err != nil {
eventWriter.Close()
writer.Close()
return nil, err
}
writer.AddExtra(originalSizeKey, fmt.Sprintf("%v", singleData.(*BFloat16VectorFieldData).GetMemorySize()))
writer.AddExtra(originalSizeKey, fmt.Sprintf("%v", singleData.(*SparseFloatVectorFieldData).GetMemorySize()))

Check failure on line 397 in internal/storage/data_codec.go

GitHub Actions / Code Checker MacOS 12

impossible type assertion: singleData.(*SparseFloatVectorFieldData)
default:
return nil, fmt.Errorf("undefined data type %d", field.DataType)
}
@@ -776,6 +781,22 @@
floatVectorFieldData.Dim = dim
insertData.Data[fieldID] = floatVectorFieldData

case schemapb.DataType_SparseFloatVector:
sparseData, _, err := eventReader.GetSparseFloatVectorFromPayload()
if err != nil {
eventReader.Close()
binlogReader.Close()
return InvalidUniqueID, InvalidUniqueID, InvalidUniqueID, err
}
if insertData.Data[fieldID] == nil {
insertData.Data[fieldID] = &SparseFloatVectorFieldData{}

Check failure on line 792 in internal/storage/data_codec.go

GitHub Actions / Code Checker MacOS 12

cannot use &SparseFloatVectorFieldData{} (value of type *SparseFloatVectorFieldData) as FieldData value in assignment: *SparseFloatVectorFieldData does not implement FieldData (missing method GetRowSize)
}
vec := insertData.Data[fieldID].(*SparseFloatVectorFieldData)

Check failure on line 794 in internal/storage/data_codec.go

GitHub Actions / Code Checker MacOS 12

impossible type assertion: insertData.Data[fieldID].(*SparseFloatVectorFieldData)
vec.AppendAllRows(sparseData)

totalLength += sparseData.RowNum()
insertData.Data[fieldID] = vec

Check failure on line 798 in internal/storage/data_codec.go

GitHub Actions / Code Checker MacOS 12

cannot use vec (variable of type *SparseFloatVectorFieldData) as FieldData value in assignment: *SparseFloatVectorFieldData does not implement FieldData (missing method GetRowSize)

default:
eventReader.Close()
binlogReader.Close()
90 changes: 69 additions & 21 deletions internal/storage/data_codec_test.go
Original file line number Diff line number Diff line change
@@ -30,28 +30,30 @@ import (
"github.com/milvus-io/milvus/internal/proto/etcdpb"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/testutils"
)

const (
CollectionID = 1
PartitionID = 1
SegmentID = 1
RowIDField = 0
TimestampField = 1
BoolField = 100
Int8Field = 101
Int16Field = 102
Int32Field = 103
Int64Field = 104
FloatField = 105
DoubleField = 106
StringField = 107
BinaryVectorField = 108
FloatVectorField = 109
ArrayField = 110
JSONField = 111
Float16VectorField = 112
BFloat16VectorField = 113
CollectionID = 1
PartitionID = 1
SegmentID = 1
RowIDField = 0
TimestampField = 1
BoolField = 100
Int8Field = 101
Int16Field = 102
Int32Field = 103
Int64Field = 104
FloatField = 105
DoubleField = 106
StringField = 107
BinaryVectorField = 108
FloatVectorField = 109
ArrayField = 110
JSONField = 111
Float16VectorField = 112
BFloat16VectorField = 113
SparseFloatVectorField = 114
)

func genTestCollectionMeta() *etcdpb.CollectionMeta {
@@ -187,6 +189,13 @@ func genTestCollectionMeta() *etcdpb.CollectionMeta {
},
},
},
{
FieldID: SparseFloatVectorField,
Name: "field_sparse_float_vector",
Description: "sparse_float_vector",
DataType: schemapb.DataType_SparseFloatVector,
TypeParams: []*commonpb.KeyValuePair{},
},
},
},
}
@@ -266,6 +275,16 @@ func TestInsertCodec(t *testing.T) {
Data: []byte{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255},
Dim: 4,
},
SparseFloatVectorField: &SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{0, 1, 2}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow([]uint32{100, 200, 599}, []float32{3.1, 3.2, 3.3}),
},
},
},
},
}

@@ -319,6 +338,16 @@ func TestInsertCodec(t *testing.T) {
Data: []byte{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255},
Dim: 4,
},
SparseFloatVectorField: &SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 300,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{5, 6, 7}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{15, 26, 37}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow([]uint32{105, 207, 299}, []float32{3.1, 3.2, 3.3}),
},
},
},
ArrayField: &ArrayFieldData{
ElementType: schemapb.DataType_Int32,
Data: []*schemapb.ScalarField{
@@ -359,8 +388,14 @@ func TestInsertCodec(t *testing.T) {
FloatVectorField: &FloatVectorFieldData{[]float32{}, 4},
Float16VectorField: &Float16VectorFieldData{[]byte{}, 4},
BFloat16VectorField: &BFloat16VectorFieldData{[]byte{}, 4},
ArrayField: &ArrayFieldData{schemapb.DataType_Int32, []*schemapb.ScalarField{}},
JSONField: &JSONFieldData{[][]byte{}},
SparseFloatVectorField: &SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 0,
Contents: [][]byte{},
},
},
ArrayField: &ArrayFieldData{schemapb.DataType_Int32, []*schemapb.ScalarField{}},
JSONField: &JSONFieldData{[][]byte{}},
},
}
b, err := insertCodec.Serialize(PartitionID, SegmentID, insertDataEmpty)
@@ -414,6 +449,19 @@ func TestInsertCodec(t *testing.T) {
0, 255, 0, 255, 0, 255, 0, 255,
}, resultData.Data[BFloat16VectorField].(*BFloat16VectorFieldData).Data)

assert.Equal(t, schemapb.SparseFloatArray{
// merged dim should be max of all dims
Dim: 600,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{5, 6, 7}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{15, 26, 37}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow([]uint32{105, 207, 299}, []float32{3.1, 3.2, 3.3}),
testutils.CreateSparseFloatRow([]uint32{0, 1, 2}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow([]uint32{100, 200, 599}, []float32{3.1, 3.2, 3.3}),
},
}, resultData.Data[SparseFloatVectorField].(*SparseFloatVectorFieldData).SparseFloatArray)

int32ArrayList := [][]int32{{1, 2, 3}, {4, 5, 6}, {3, 2, 1}, {6, 5, 4}}
resultArrayList := [][]int32{}
for _, v := range resultData.Data[ArrayField].(*ArrayFieldData).Data {
3 changes: 3 additions & 0 deletions internal/storage/data_sorter.go
Original file line number Diff line number Diff line change
@@ -114,6 +114,9 @@
case schemapb.DataType_JSON:
data := singleData.(*JSONFieldData).Data
data[i], data[j] = data[j], data[i]
case schemapb.DataType_SparseFloatVector:
field_data := singleData.(*SparseFloatVectorFieldData)

Check failure on line 118 in internal/storage/data_sorter.go

GitHub Actions / Code Checker MacOS 12

impossible type assertion: singleData.(*SparseFloatVectorFieldData)
field_data.Contents[i], field_data.Contents[j] = field_data.Contents[j], field_data.Contents[i]
default:
errMsg := "undefined data type " + string(field.DataType)
panic(errMsg)
29 changes: 28 additions & 1 deletion internal/storage/data_sorter_test.go
Original file line number Diff line number Diff line change
@@ -24,6 +24,7 @@ import (

"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/proto/etcdpb"
"github.com/milvus-io/milvus/pkg/util/testutils"
)

func TestDataSorter(t *testing.T) {
@@ -132,9 +133,16 @@ func TestDataSorter(t *testing.T) {
FieldID: 111,
Name: "field_bfloat16_vector",
IsPrimaryKey: false,
Description: "description_12",
Description: "description_13",
DataType: schemapb.DataType_BFloat16Vector,
},
{
FieldID: 112,
Name: "field_sparse_float_vector",
IsPrimaryKey: false,
Description: "description_14",
DataType: schemapb.DataType_SparseFloatVector,
},
},
},
}
@@ -188,6 +196,16 @@ func TestDataSorter(t *testing.T) {
Data: []byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
Dim: 4,
},
112: &SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{0, 1, 2}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow([]uint32{100, 200, 599}, []float32{3.1, 3.2, 3.3}),
},
},
},
},
}

@@ -237,6 +255,7 @@ func TestDataSorter(t *testing.T) {
// }
// }

// last row should be moved to the first row
assert.Equal(t, []int64{2, 3, 4}, dataSorter.InsertData.Data[0].(*Int64FieldData).Data)
assert.Equal(t, []int64{5, 3, 4}, dataSorter.InsertData.Data[1].(*Int64FieldData).Data)
assert.Equal(t, []bool{true, true, false}, dataSorter.InsertData.Data[100].(*BoolFieldData).Data)
@@ -251,6 +270,14 @@ func TestDataSorter(t *testing.T) {
assert.Equal(t, []float32{16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, dataSorter.InsertData.Data[109].(*FloatVectorFieldData).Data)
assert.Equal(t, []byte{16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, dataSorter.InsertData.Data[110].(*Float16VectorFieldData).Data)
assert.Equal(t, []byte{16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, dataSorter.InsertData.Data[111].(*BFloat16VectorFieldData).Data)
assert.Equal(t, schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{100, 200, 599}, []float32{3.1, 3.2, 3.3}),
testutils.CreateSparseFloatRow([]uint32{0, 1, 2}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{2.1, 2.2, 2.3}),
},
}, dataSorter.InsertData.Data[112].(*SparseFloatVectorFieldData).SparseFloatArray)
}

func TestDataSorter_Len(t *testing.T) {
2 changes: 1 addition & 1 deletion internal/storage/event_writer.go
Original file line number Diff line number Diff line change
@@ -215,7 +215,7 @@ func newDescriptorEvent() *descriptorEvent {
func newInsertEventWriter(dataType schemapb.DataType, dim ...int) (*insertEventWriter, error) {
var payloadWriter PayloadWriterInterface
var err error
if typeutil.IsVectorType(dataType) {
if typeutil.IsVectorType(dataType) && !typeutil.IsSparseVectorType(dataType) {
if len(dim) != 1 {
return nil, fmt.Errorf("incorrect input numbers")
}
84 changes: 68 additions & 16 deletions internal/storage/insert_data.go
Original file line number Diff line number Diff line change
@@ -20,9 +20,11 @@
"encoding/binary"
"fmt"

"github.com/gogo/protobuf/proto"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)

// TODO: fill it
@@ -181,7 +183,8 @@
Data: make([]byte, 0),
Dim: dim,
}, nil

case schemapb.DataType_SparseFloatVector:
return &SparseFloatVectorFieldData{}, nil

Check failure on line 187 in internal/storage/insert_data.go

GitHub Actions / Code Checker MacOS 12

cannot use &SparseFloatVectorFieldData{} (value of type *SparseFloatVectorFieldData) as FieldData value in return statement: *SparseFloatVectorFieldData does not implement FieldData (missing method GetRowSize)
case schemapb.DataType_Bool:
return &BoolFieldData{
Data: make([]bool, 0),
@@ -283,6 +286,20 @@
Dim int
}

type SparseFloatVectorFieldData struct {
schemapb.SparseFloatArray
}

func (dst *SparseFloatVectorFieldData) AppendAllRows(src *SparseFloatVectorFieldData) {
if len(src.Contents) == 0 {
return
}
if dst.Dim < src.Dim {
dst.Dim = src.Dim
}
dst.Contents = append(dst.Contents, src.Contents...)
}

// RowNum implements FieldData.RowNum
func (data *BoolFieldData) RowNum() int { return len(data.Data) }
func (data *Int8FieldData) RowNum() int { return len(data.Data) }
@@ -300,6 +317,7 @@
func (data *BFloat16VectorFieldData) RowNum() int {
return len(data.Data) / 2 / data.Dim
}
func (data *SparseFloatVectorFieldData) RowNum() int { return len(data.Contents) }

// GetRow implements FieldData.GetRow
func (data *BoolFieldData) GetRow(i int) any { return data.Data[i] }
@@ -312,9 +330,12 @@
func (data *StringFieldData) GetRow(i int) any { return data.Data[i] }
func (data *ArrayFieldData) GetRow(i int) any { return data.Data[i] }
func (data *JSONFieldData) GetRow(i int) any { return data.Data[i] }
func (data *BinaryVectorFieldData) GetRow(i int) interface{} {
func (data *BinaryVectorFieldData) GetRow(i int) any {
return data.Data[i*data.Dim/8 : (i+1)*data.Dim/8]
}
func (data *SparseFloatVectorFieldData) GetRow(i int) interface{} {
return data.Contents[i]
}

func (data *FloatVectorFieldData) GetRow(i int) interface{} {
return data.Data[i*data.Dim : (i+1)*data.Dim]
@@ -328,20 +349,21 @@
return data.Data[i*data.Dim*2 : (i+1)*data.Dim*2]
}

func (data *BoolFieldData) GetRows() any { return data.Data }
func (data *Int8FieldData) GetRows() any { return data.Data }
func (data *Int16FieldData) GetRows() any { return data.Data }
func (data *Int32FieldData) GetRows() any { return data.Data }
func (data *Int64FieldData) GetRows() any { return data.Data }
func (data *FloatFieldData) GetRows() any { return data.Data }
func (data *DoubleFieldData) GetRows() any { return data.Data }
func (data *StringFieldData) GetRows() any { return data.Data }
func (data *ArrayFieldData) GetRows() any { return data.Data }
func (data *JSONFieldData) GetRows() any { return data.Data }
func (data *BinaryVectorFieldData) GetRows() any { return data.Data }
func (data *FloatVectorFieldData) GetRows() any { return data.Data }
func (data *Float16VectorFieldData) GetRows() any { return data.Data }
func (data *BFloat16VectorFieldData) GetRows() any { return data.Data }
func (data *BoolFieldData) GetRows() any { return data.Data }
func (data *Int8FieldData) GetRows() any { return data.Data }
func (data *Int16FieldData) GetRows() any { return data.Data }
func (data *Int32FieldData) GetRows() any { return data.Data }
func (data *Int64FieldData) GetRows() any { return data.Data }
func (data *FloatFieldData) GetRows() any { return data.Data }
func (data *DoubleFieldData) GetRows() any { return data.Data }
func (data *StringFieldData) GetRows() any { return data.Data }
func (data *ArrayFieldData) GetRows() any { return data.Data }
func (data *JSONFieldData) GetRows() any { return data.Data }
func (data *BinaryVectorFieldData) GetRows() any { return data.Data }
func (data *FloatVectorFieldData) GetRows() any { return data.Data }
func (data *Float16VectorFieldData) GetRows() any { return data.Data }
func (data *BFloat16VectorFieldData) GetRows() any { return data.Data }
func (data *SparseFloatVectorFieldData) GetRows() any { return data.Contents }

// AppendRow implements FieldData.AppendRow
func (data *BoolFieldData) AppendRow(row interface{}) error {
@@ -470,6 +492,22 @@
return nil
}

func (data *SparseFloatVectorFieldData) AppendRow(row interface{}) error {
v, ok := row.([]byte)
if !ok {
return merr.WrapErrParameterInvalid("SparseFloatVectorRowData", row, "Wrong row type")
}
if err := typeutil.ValidateSparseFloatRows(v); err != nil {
return err
}
rowDim := typeutil.SparseFloatRowDim(v)
if data.Dim < rowDim {
data.Dim = rowDim
}
data.Contents = append(data.Contents, v)
return nil
}

func (data *BoolFieldData) AppendRows(rows interface{}) error {
v, ok := rows.([]bool)
if !ok {
@@ -612,6 +650,11 @@
return nil
}

func (data *SparseFloatVectorFieldData) AppendRows(rows interface{}) error {
// TODO(SPARSE)
return fmt.Errorf("not implemented")
}

// GetMemorySize implements FieldData.GetMemorySize
func (data *BoolFieldData) GetMemorySize() int { return binary.Size(data.Data) }
func (data *Int8FieldData) GetMemorySize() int { return binary.Size(data.Data) }
@@ -627,6 +670,11 @@
return binary.Size(data.Data) + 4
}

func (data *SparseFloatVectorFieldData) GetMemorySize() int {
// TODO(SPARSE): should this be the memory size of serialzied size?
return proto.Size(&data.SparseFloatArray)
}

// GetDataType implements FieldData.GetDataType
func (data *BoolFieldData) GetDataType() schemapb.DataType { return schemapb.DataType_Bool }
func (data *Int8FieldData) GetDataType() schemapb.DataType { return schemapb.DataType_Int8 }
@@ -654,6 +702,10 @@
return schemapb.DataType_BFloat16Vector
}

func (data *SparseFloatVectorFieldData) GetDataType() schemapb.DataType {
return schemapb.DataType_SparseFloatVector
}

// why not binary.Size(data) directly? binary.Size(data) return -1
// binary.Size returns how many bytes Write would generate to encode the value v, which
// must be a fixed-size value or a slice of fixed-size values, or a pointer to such data.
63 changes: 33 additions & 30 deletions internal/storage/insert_data_test.go
Original file line number Diff line number Diff line change
@@ -9,6 +9,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/testutils"
)

func TestInsertDataSuite(t *testing.T) {
@@ -84,11 +85,11 @@ func (s *InsertDataSuite) TestInsertData() {

s.False(s.iDataOneRow.IsEmpty())
s.Equal(1, s.iDataOneRow.GetRowNum())
s.Equal(151, s.iDataOneRow.GetMemorySize())
s.Equal(179, s.iDataOneRow.GetMemorySize())

s.False(s.iDataTwoRows.IsEmpty())
s.Equal(2, s.iDataTwoRows.GetRowNum())
s.Equal(286, s.iDataTwoRows.GetMemorySize())
s.Equal(340, s.iDataTwoRows.GetMemorySize())

for _, field := range s.iDataTwoRows.Data {
s.Equal(2, field.RowNum())
@@ -187,20 +188,21 @@ func (s *InsertDataSuite) SetupTest() {
s.Equal(16, s.iDataEmpty.GetMemorySize())

row1 := map[FieldID]interface{}{
RowIDField: int64(3),
TimestampField: int64(3),
BoolField: true,
Int8Field: int8(3),
Int16Field: int16(3),
Int32Field: int32(3),
Int64Field: int64(3),
FloatField: float32(3),
DoubleField: float64(3),
StringField: "str",
BinaryVectorField: []byte{0},
FloatVectorField: []float32{4, 5, 6, 7},
Float16VectorField: []byte{0, 0, 0, 0, 255, 255, 255, 255},
BFloat16VectorField: []byte{0, 0, 0, 0, 255, 255, 255, 255},
RowIDField: int64(3),
TimestampField: int64(3),
BoolField: true,
Int8Field: int8(3),
Int16Field: int16(3),
Int32Field: int32(3),
Int64Field: int64(3),
FloatField: float32(3),
DoubleField: float64(3),
StringField: "str",
BinaryVectorField: []byte{0},
FloatVectorField: []float32{4, 5, 6, 7},
Float16VectorField: []byte{0, 0, 0, 0, 255, 255, 255, 255},
BFloat16VectorField: []byte{0, 0, 0, 0, 255, 255, 255, 255},
SparseFloatVectorField: testutils.CreateSparseFloatRow([]uint32{0, 1, 2}, []float32{4, 5, 6}),
ArrayField: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{Data: []int32{1, 2, 3}},
@@ -219,20 +221,21 @@ func (s *InsertDataSuite) SetupTest() {
}

row2 := map[FieldID]interface{}{
RowIDField: int64(1),
TimestampField: int64(1),
BoolField: false,
Int8Field: int8(1),
Int16Field: int16(1),
Int32Field: int32(1),
Int64Field: int64(1),
FloatField: float32(1),
DoubleField: float64(1),
StringField: string("str"),
BinaryVectorField: []byte{0},
FloatVectorField: []float32{4, 5, 6, 7},
Float16VectorField: []byte{1, 2, 3, 4, 5, 6, 7, 8},
BFloat16VectorField: []byte{1, 2, 3, 4, 5, 6, 7, 8},
RowIDField: int64(1),
TimestampField: int64(1),
BoolField: false,
Int8Field: int8(1),
Int16Field: int16(1),
Int32Field: int32(1),
Int64Field: int64(1),
FloatField: float32(1),
DoubleField: float64(1),
StringField: string("str"),
BinaryVectorField: []byte{0},
FloatVectorField: []float32{4, 5, 6, 7},
Float16VectorField: []byte{1, 2, 3, 4, 5, 6, 7, 8},
BFloat16VectorField: []byte{1, 2, 3, 4, 5, 6, 7, 8},
SparseFloatVectorField: testutils.CreateSparseFloatRow([]uint32{2, 3, 4}, []float32{4, 5, 6}),
ArrayField: &schemapb.ScalarField{
Data: &schemapb.ScalarField_IntData{
IntData: &schemapb.IntArray{Data: []int32{1, 2, 3}},
2 changes: 2 additions & 0 deletions internal/storage/payload.go
Original file line number Diff line number Diff line change
@@ -42,6 +42,7 @@ type PayloadWriterInterface interface {
AddFloatVectorToPayload(binVec []float32, dim int) error
AddFloat16VectorToPayload(binVec []byte, dim int) error
AddBFloat16VectorToPayload(binVec []byte, dim int) error
AddSparseFloatVectorToPayload(data *SparseFloatVectorFieldData) error
FinishPayloadWriter() error
GetPayloadBufferFromWriter() ([]byte, error)
GetPayloadLengthFromWriter() (int, error)
@@ -67,6 +68,7 @@ type PayloadReaderInterface interface {
GetFloat16VectorFromPayload() ([]byte, int, error)
GetBFloat16VectorFromPayload() ([]byte, int, error)
GetFloatVectorFromPayload() ([]float32, int, error)
GetSparseFloatVectorFromPayload() (*SparseFloatVectorFieldData, int, error)
GetPayloadLengthFromReader() (int, error)

GetByteArrayDataSet() (*DataSet[parquet.ByteArray, *file.ByteArrayColumnChunkReader], error)
33 changes: 33 additions & 0 deletions internal/storage/payload_reader.go
Original file line number Diff line number Diff line change
@@ -14,6 +14,7 @@ import (
"github.com/golang/protobuf/proto"

"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)

// PayloadReader reads data from payload
@@ -73,6 +74,8 @@ func (r *PayloadReader) GetDataFromPayload() (interface{}, int, error) {
return r.GetFloat16VectorFromPayload()
case schemapb.DataType_BFloat16Vector:
return r.GetBFloat16VectorFromPayload()
case schemapb.DataType_SparseFloatVector:
return r.GetSparseFloatVectorFromPayload()
case schemapb.DataType_String, schemapb.DataType_VarChar:
val, err := r.GetStringFromPayload()
return val, 0, err
@@ -429,6 +432,36 @@ func (r *PayloadReader) GetFloatVectorFromPayload() ([]float32, int, error) {
return ret, dim, nil
}

func (r *PayloadReader) GetSparseFloatVectorFromPayload() (*SparseFloatVectorFieldData, int, error) {
if !typeutil.IsSparseVectorType(r.colType) {
return nil, -1, fmt.Errorf("failed to get sparse float vector from datatype %v", r.colType.String())
}
values := make([]parquet.ByteArray, r.numRows)
valuesRead, err := ReadDataFromAllRowGroups[parquet.ByteArray, *file.ByteArrayColumnChunkReader](r.reader, values, 0, r.numRows)
if err != nil {
return nil, -1, err
}
if valuesRead != r.numRows {
return nil, -1, fmt.Errorf("expect %d binary, but got = %d", r.numRows, valuesRead)
}

fieldData := &SparseFloatVectorFieldData{}

for _, value := range values {
if len(value)%8 != 0 {
return nil, -1, fmt.Errorf("invalid bytesData length")
}

fieldData.Contents = append(fieldData.Contents, value)
rowDim := typeutil.SparseFloatRowDim(value)
if rowDim > fieldData.Dim {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this dimension useful anywhere? or it just used as a meta information?

fieldData.Dim = rowDim
}
}

return fieldData, int(fieldData.Dim), nil
}

func (r *PayloadReader) GetPayloadLengthFromReader() (int, error) {
return int(r.numRows), nil
}
250 changes: 250 additions & 0 deletions internal/storage/payload_test.go
Original file line number Diff line number Diff line change
@@ -26,6 +26,7 @@ import (
"github.com/stretchr/testify/require"

"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/util/testutils"
)

func TestPayload_ReaderAndWriter(t *testing.T) {
@@ -619,6 +620,170 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
defer r.ReleasePayloadReader()
})

t.Run("TestSparseFloatVector", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_SparseFloatVector)
require.Nil(t, err)
require.NotNil(t, w)

err = w.AddSparseFloatVectorToPayload(&SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{0, 1, 2}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow([]uint32{100, 200, 599}, []float32{3.1, 3.2, 3.3}),
},
},
})
assert.NoError(t, err)
err = w.AddSparseFloatVectorToPayload(&SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}),
},
},
})
assert.NoError(t, err)
err = w.FinishPayloadWriter()
assert.NoError(t, err)

length, err := w.GetPayloadLengthFromWriter()
assert.NoError(t, err)
assert.Equal(t, 6, length)
defer w.ReleasePayloadWriter()

buffer, err := w.GetPayloadBufferFromWriter()
assert.NoError(t, err)

r, err := NewPayloadReader(schemapb.DataType_SparseFloatVector, buffer)
require.Nil(t, err)
length, err = r.GetPayloadLengthFromReader()
assert.NoError(t, err)
assert.Equal(t, length, 6)

floatVecs, dim, err := r.GetSparseFloatVectorFromPayload()
assert.NoError(t, err)
assert.Equal(t, 600, dim)
assert.Equal(t, 6, len(floatVecs.Contents))
assert.Equal(t, schemapb.SparseFloatArray{
// merged dim should be max of all dims
Dim: 600,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{0, 1, 2}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow([]uint32{100, 200, 599}, []float32{3.1, 3.2, 3.3}),
testutils.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}),
},
}, floatVecs.SparseFloatArray)

ifloatVecs, dim, err := r.GetDataFromPayload()
assert.NoError(t, err)
assert.Equal(t, floatVecs, ifloatVecs.(*SparseFloatVectorFieldData))
assert.Equal(t, 600, dim)
defer r.ReleasePayloadReader()
})

testSparse_OneBatch := func(t *testing.T, rows [][]byte, actualDim int) {
w, err := NewPayloadWriter(schemapb.DataType_SparseFloatVector)
require.Nil(t, err)
require.NotNil(t, w)

err = w.AddSparseFloatVectorToPayload(&SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: int64(actualDim),
Contents: rows,
},
})
assert.NoError(t, err)
err = w.FinishPayloadWriter()
assert.NoError(t, err)

length, err := w.GetPayloadLengthFromWriter()
assert.NoError(t, err)
assert.Equal(t, 3, length)
defer w.ReleasePayloadWriter()

buffer, err := w.GetPayloadBufferFromWriter()
assert.NoError(t, err)

r, err := NewPayloadReader(schemapb.DataType_SparseFloatVector, buffer)
require.Nil(t, err)
length, err = r.GetPayloadLengthFromReader()
assert.NoError(t, err)
assert.Equal(t, length, 3)

floatVecs, dim, err := r.GetSparseFloatVectorFromPayload()
assert.NoError(t, err)
assert.Equal(t, actualDim, dim)
assert.Equal(t, 3, len(floatVecs.Contents))
assert.Equal(t, schemapb.SparseFloatArray{
Dim: int64(dim),
Contents: rows,
}, floatVecs.SparseFloatArray)

ifloatVecs, dim, err := r.GetDataFromPayload()
assert.NoError(t, err)
assert.Equal(t, floatVecs, ifloatVecs.(*SparseFloatVectorFieldData))
assert.Equal(t, actualDim, dim)
defer r.ReleasePayloadReader()
}

t.Run("TestSparseFloatVector_emptyRow", func(t *testing.T) {
testSparse_OneBatch(t, [][]byte{
testutils.CreateSparseFloatRow([]uint32{}, []float32{}),
testutils.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow([]uint32{100, 200, 599}, []float32{3.1, 3.2, 3.3}),
}, 600)
testSparse_OneBatch(t, [][]byte{
testutils.CreateSparseFloatRow([]uint32{}, []float32{}),
testutils.CreateSparseFloatRow([]uint32{}, []float32{}),
testutils.CreateSparseFloatRow([]uint32{}, []float32{}),
}, 0)
})

t.Run("TestSparseFloatVector_largeRow", func(t *testing.T) {
nnz := 100000
// generate an int slice with nnz random sorted elements
indices := make([]uint32, nnz)
values := make([]float32, nnz)
for i := 0; i < nnz; i++ {
indices[i] = uint32(i * 6)
values[i] = float32(i)
}
dim := int(indices[nnz-1]) + 1
testSparse_OneBatch(t, [][]byte{
testutils.CreateSparseFloatRow([]uint32{}, []float32{}),
testutils.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow(indices, values),
}, dim)
})

t.Run("TestSparseFloatVector_negativeValues", func(t *testing.T) {
testSparse_OneBatch(t, [][]byte{
testutils.CreateSparseFloatRow([]uint32{}, []float32{}),
testutils.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{-2.1, 2.2, -2.3}),
testutils.CreateSparseFloatRow([]uint32{100, 200, 599}, []float32{3.1, -3.2, 3.3}),
}, 600)
})

// even though SPARSE_INVERTED_INDEX and SPARSE_WAND index do not support
// arbitrarily large dimensions, HNSW does, so we still need to test it.
// Dimension range we support is 0 to positive int32 max - 1(to leave room
// for dim).
t.Run("TestSparseFloatVector_largeIndex", func(t *testing.T) {
int32Max := uint32(math.MaxInt32)
testSparse_OneBatch(t, [][]byte{
testutils.CreateSparseFloatRow([]uint32{}, []float32{}),
testutils.CreateSparseFloatRow([]uint32{10, 20, 30}, []float32{-2.1, 2.2, -2.3}),
testutils.CreateSparseFloatRow([]uint32{100, int32Max / 2, int32Max - 1}, []float32{3.1, -3.2, 3.3}),
}, int(int32Max))
})

// t.Run("TestAddDataToPayload", func(t *testing.T) {
// w, err := NewPayloadWriter(schemapb.DataType_Bool)
// w.colType = 999
@@ -863,6 +1028,37 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
err = w.AddBFloat16VectorToPayload([]byte{1, 0, 0, 0, 0, 0, 0, 0}, 8)
assert.Error(t, err)
})
t.Run("TestAddSparseFloatVectorAfterFinish", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_SparseFloatVector)
require.Nil(t, err)
require.NotNil(t, w)
defer w.Close()

err = w.FinishPayloadWriter()
assert.NoError(t, err)

err = w.AddSparseFloatVectorToPayload(&SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 53,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
},
},
})
assert.Error(t, err)
err = w.AddSparseFloatVectorToPayload(&SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
},
},
})
assert.Error(t, err)

err = w.FinishPayloadWriter()
assert.Error(t, err)
})
t.Run("TestNewReadError", func(t *testing.T) {
buffer := []byte{0}
r, err := NewPayloadReader(999, buffer)
@@ -1388,6 +1584,60 @@ func TestPayload_ReaderAndWriter(t *testing.T) {
assert.Error(t, err)
})

t.Run("TestGetSparseFloatVectorError", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_Bool)
require.Nil(t, err)
require.NotNil(t, w)

err = w.AddBoolToPayload([]bool{false, true, true})
assert.NoError(t, err)

err = w.FinishPayloadWriter()
assert.NoError(t, err)

buffer, err := w.GetPayloadBufferFromWriter()
assert.NoError(t, err)

r, err := NewPayloadReader(schemapb.DataType_SparseFloatVector, buffer)
assert.NoError(t, err)

_, _, err = r.GetSparseFloatVectorFromPayload()
assert.Error(t, err)

r.colType = 999
_, _, err = r.GetSparseFloatVectorFromPayload()
assert.Error(t, err)
})

t.Run("TestGetSparseFloatVectorError2", func(t *testing.T) {
w, err := NewPayloadWriter(schemapb.DataType_SparseFloatVector)
require.Nil(t, err)
require.NotNil(t, w)

err = w.AddSparseFloatVectorToPayload(&SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 53,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
},
},
})
assert.NoError(t, err)

err = w.FinishPayloadWriter()
assert.NoError(t, err)

buffer, err := w.GetPayloadBufferFromWriter()
assert.NoError(t, err)

r, err := NewPayloadReader(schemapb.DataType_SparseFloatVector, buffer)
assert.NoError(t, err)

r.numRows = 99
_, _, err = r.GetSparseFloatVectorFromPayload()
assert.Error(t, err)
})

t.Run("TestWriteLargeSizeData", func(t *testing.T) {
t.Skip("Large data skip for online ut")
size := 1 << 29 // 512M
28 changes: 27 additions & 1 deletion internal/storage/payload_writer.go
Original file line number Diff line number Diff line change
@@ -50,7 +50,8 @@ type NativePayloadWriter struct {

func NewPayloadWriter(colType schemapb.DataType, dim ...int) (PayloadWriterInterface, error) {
var arrowType arrow.DataType
if typeutil.IsVectorType(colType) {
// writer for sparse float vector doesn't require dim
if typeutil.IsVectorType(colType) && !typeutil.IsSparseVectorType(colType) {
if len(dim) != 1 {
return nil, fmt.Errorf("incorrect input numbers")
}
@@ -164,6 +165,12 @@ func (w *NativePayloadWriter) AddDataToPayload(data interface{}, dim ...int) err
return errors.New("incorrect data type")
}
return w.AddBFloat16VectorToPayload(val, dim[0])
case schemapb.DataType_SparseFloatVector:
val, ok := data.(*SparseFloatVectorFieldData)
if !ok {
return errors.New("incorrect data type")
}
return w.AddSparseFloatVectorToPayload(val)
default:
return errors.New("incorrect datatype")
}
@@ -475,6 +482,23 @@ func (w *NativePayloadWriter) AddBFloat16VectorToPayload(data []byte, dim int) e
return nil
}

func (w *NativePayloadWriter) AddSparseFloatVectorToPayload(data *SparseFloatVectorFieldData) error {
if w.finished {
return errors.New("can't append data to finished writer")
}
builder, ok := w.builder.(*array.BinaryBuilder)
if !ok {
return errors.New("failed to cast BinaryBuilder")
}
length := len(data.SparseFloatArray.Contents)
builder.Reserve(length)
for i := 0; i < length; i++ {
builder.Append(data.SparseFloatArray.Contents[i])
}

return nil
}

func (w *NativePayloadWriter) FinishPayloadWriter() error {
if w.finished {
return errors.New("can't reuse a finished writer")
@@ -574,6 +598,8 @@ func milvusDataTypeToArrowType(dataType schemapb.DataType, dim int) arrow.DataTy
return &arrow.FixedSizeBinaryType{
ByteWidth: dim * 2,
}
case schemapb.DataType_SparseFloatVector:
return &arrow.BinaryType{}
default:
panic("unsupported data type")
}
12 changes: 12 additions & 0 deletions internal/storage/print_binlog.go
Original file line number Diff line number Diff line change
@@ -334,6 +334,18 @@ func printPayloadValues(colType schemapb.DataType, reader PayloadReaderInterface
for i := 0; i < rows; i++ {
fmt.Printf("\t\t%d : %s\n", i, val[i])
}
case schemapb.DataType_SparseFloatVector:
sparseData, _, err := reader.GetSparseFloatVectorFromPayload()
if err != nil {
return err
}
fmt.Println("======= SparseFloatVectorFieldData =======")
fmt.Println("row num:", len(sparseData.Contents))
fmt.Println("dim:", sparseData.Dim)
for _, v := range sparseData.Contents {
fmt.Println(v)
}
fmt.Println("===== SparseFloatVectorFieldData end =====")
default:
return errors.New("undefined data type")
}
2 changes: 1 addition & 1 deletion internal/storage/stats.go
Original file line number Diff line number Diff line change
@@ -187,7 +187,7 @@ func (stats *PrimaryKeyStats) UpdateMinMax(pk PrimaryKey) {

func NewPrimaryKeyStats(fieldID, pkType, rowNum int64) (*PrimaryKeyStats, error) {
if rowNum <= 0 {
return nil, merr.WrapErrParameterInvalidMsg("non zero & non negative row num", rowNum)
return nil, merr.WrapErrParameterInvalidMsg("zero or negative row num", rowNum)
}
return &PrimaryKeyStats{
FieldID: fieldID,
29 changes: 29 additions & 0 deletions internal/storage/utils.go
Original file line number Diff line number Diff line change
@@ -422,6 +422,8 @@
Data: vecs,
Dim: dim,
}
case schemapb.DataType_SparseFloatVector:
return nil, fmt.Errorf("Sparse Float Vector is not supported in row based data")

case schemapb.DataType_Bool:
idata.Data[field.FieldID] = &BoolFieldData{
@@ -556,6 +558,11 @@
Dim: dim,
}

case schemapb.DataType_SparseFloatVector:
fieldData = &SparseFloatVectorFieldData{

Check failure on line 562 in internal/storage/utils.go

GitHub Actions / Code Checker MacOS 12

cannot use &SparseFloatVectorFieldData{…} (value of type *SparseFloatVectorFieldData) as FieldData value in assignment: *SparseFloatVectorFieldData does not implement FieldData (missing method GetRowSize)
SparseFloatArray: *srcFields[field.FieldID].GetVectors().GetSparseFloatVector(),
}

case schemapb.DataType_Bool:
srcData := srcField.GetScalars().GetBoolData().GetData()

@@ -823,6 +830,14 @@
fieldData.Data = append(fieldData.Data, field.Data...)
}

func mergeSparseFloatVectorField(data *InsertData, fid FieldID, field *SparseFloatVectorFieldData) {
if _, ok := data.Data[fid]; !ok {
data.Data[fid] = &SparseFloatVectorFieldData{}

Check failure on line 835 in internal/storage/utils.go

GitHub Actions / Code Checker MacOS 12

cannot use &SparseFloatVectorFieldData{} (value of type *SparseFloatVectorFieldData) as FieldData value in assignment: *SparseFloatVectorFieldData does not implement FieldData (missing method GetRowSize)
}
fieldData := data.Data[fid].(*SparseFloatVectorFieldData)

Check failure on line 837 in internal/storage/utils.go

GitHub Actions / Code Checker MacOS 12

impossible type assertion: data.Data[fid].(*SparseFloatVectorFieldData)
fieldData.AppendAllRows(field)
}

// MergeFieldData merge field into data.
func MergeFieldData(data *InsertData, fid FieldID, field FieldData) {
if field == nil {
@@ -857,6 +872,8 @@
mergeFloat16VectorField(data, fid, field)
case *BFloat16VectorFieldData:
mergeBFloat16VectorField(data, fid, field)
case *SparseFloatVectorFieldData:
mergeSparseFloatVectorField(data, fid, field)
}
}

@@ -1182,6 +1199,18 @@
},
},
}
case *SparseFloatVectorFieldData:
fieldData = &schemapb.FieldData{
Type: schemapb.DataType_SparseFloatVector,
FieldId: fieldID,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Data: &schemapb.VectorField_SparseFloatVector{
SparseFloatVector: &rawData.SparseFloatArray,
},
},
},
}
default:
return insertRecord, fmt.Errorf("unsupported data type when transter storage.InsertData to internalpb.InsertRecord")
}
50 changes: 50 additions & 0 deletions internal/storage/utils_test.go
Original file line number Diff line number Diff line change
@@ -35,6 +35,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/mq/msgstream"
"github.com/milvus-io/milvus/pkg/util/testutils"
)

func TestCheckTsField(t *testing.T) {
@@ -900,6 +901,25 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim
for nrows := 0; nrows < numRows; nrows++ {
columns[idx] = append(columns[idx], data[nrows*bf16VecDim*2:(nrows+1)*bf16VecDim*2])
}
case schemapb.DataType_SparseFloatVector:
data := testutils.GenerateSparseFloatVectors(numRows)
f := &schemapb.FieldData{
Type: schemapb.DataType_BFloat16Vector,
FieldName: field.Name,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: int64(data.Dim),
Data: &schemapb.VectorField_SparseFloatVector{
SparseFloatVector: data,
},
},
},
FieldId: field.FieldID,
}
msg.FieldsData = append(msg.FieldsData, f)
for nrows := 0; nrows < numRows; nrows++ {
columns[idx] = append(columns[idx], data.Contents[idx])
}

case schemapb.DataType_Array:
data := generateInt32ArrayList(numRows)
@@ -1246,6 +1266,15 @@ func TestMergeInsertData(t *testing.T) {
Data: []byte{0, 1},
Dim: 1,
},
SparseFloatVectorField: &SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
},
},
},
ArrayField: &ArrayFieldData{
Data: []*schemapb.ScalarField{
{
@@ -1311,6 +1340,14 @@ func TestMergeInsertData(t *testing.T) {
Data: []byte{2, 3},
Dim: 1,
},
SparseFloatVectorField: &SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}),
},
},
},
ArrayField: &ArrayFieldData{
Data: []*schemapb.ScalarField{
{
@@ -1387,6 +1424,19 @@ func TestMergeInsertData(t *testing.T) {
assert.True(t, ok)
assert.Equal(t, []byte{0, 1, 2, 3}, f.(*BFloat16VectorFieldData).Data)

f, ok = d1.Data[SparseFloatVectorField]
assert.True(t, ok)
assert.Equal(t, &SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Dim: 600,
Contents: [][]byte{
testutils.CreateSparseFloatRow([]uint32{30, 41, 52}, []float32{1.1, 1.2, 1.3}),
testutils.CreateSparseFloatRow([]uint32{60, 80, 230}, []float32{2.1, 2.2, 2.3}),
testutils.CreateSparseFloatRow([]uint32{170, 300, 579}, []float32{3.1, 3.2, 3.3}),
},
},
}, f.(*SparseFloatVectorFieldData))

f, ok = d1.Data[ArrayField]
assert.True(t, ok)
assert.Equal(t, []int32{1, 2, 3}, f.(*ArrayFieldData).Data[0].GetIntData().GetData())
12 changes: 12 additions & 0 deletions internal/util/indexcgowrapper/dataset.go
Original file line number Diff line number Diff line change
@@ -41,6 +41,16 @@ func GenBFloat16VecDataset(vectors []byte) *Dataset {
}
}

func GenSparseFloatVecDataset(data *storage.SparseFloatVectorFieldData) *Dataset {
// TODO(SPARSE): in search for the usage of this method, only the DType
// of the returned Dataset is used.
// If this is designed to generate a Dataset that will be sent to knowhere,
// we'll need to expose knowhere::sparse::SparseRow to Go.
return &Dataset{
DType: schemapb.DataType_SparseFloatVector,
}
}

func GenBinaryVecDataset(vectors []byte) *Dataset {
return &Dataset{
DType: schemapb.DataType_BinaryVector,
@@ -116,6 +126,8 @@ func GenDataset(data storage.FieldData) *Dataset {
return GenFloat16VecDataset(f.Data)
case *storage.BFloat16VectorFieldData:
return GenBFloat16VecDataset(f.Data)
case *storage.SparseFloatVectorFieldData:
return GenSparseFloatVecDataset(f)
default:
return &Dataset{
DType: schemapb.DataType_None,
9 changes: 9 additions & 0 deletions internal/util/indexcgowrapper/index.go
Original file line number Diff line number Diff line change
@@ -49,6 +49,7 @@ type CgoIndex struct {
close bool
}

// used only in test
// TODO: use proto.Marshal instead of proto.MarshalTextString for better compatibility.
func NewCgoIndex(dtype schemapb.DataType, typeParams, indexParams map[string]string) (CodecIndex, error) {
protoTypeParams := &indexcgopb.TypeParams{
@@ -123,6 +124,8 @@ func CreateIndexV2(ctx context.Context, buildIndexInfo *BuildIndexInfo) (CodecIn
return index, nil
}

// TODO: this seems to be used only for test. We should mark the method
// name with ForTest, or maybe move to test file.
func (index *CgoIndex) Build(dataset *Dataset) error {
switch dataset.DType {
case schemapb.DataType_None:
@@ -176,6 +179,12 @@ func (index *CgoIndex) buildBFloat16VecIndex(dataset *Dataset) error {
return HandleCStatus(&status, "failed to build bfloat16 vector index")
}

func (index *CgoIndex) buildSparseFloatVecIndex(dataset *Dataset) error {
vectors := dataset.Data[keyRawArr].([]byte)
status := C.BuildSparseFloatVecIndex(index.indexPtr, (C.int64_t)(len(vectors)), (C.int64_t)(0), (*C.uint8_t)(&vectors[0]))
return HandleCStatus(&status, "failed to build sparse float vector index")
}

func (index *CgoIndex) buildBinaryVecIndex(dataset *Dataset) error {
vectors := dataset.Data[keyRawArr].([]byte)
status := C.BuildBinaryVecIndex(index.indexPtr, (C.int64_t)(len(vectors)), (*C.uint8_t)(&vectors[0]))
4 changes: 3 additions & 1 deletion pkg/util/funcutil/func.go
Original file line number Diff line number Diff line change
@@ -146,7 +146,7 @@ func CheckCtxValid(ctx context.Context) bool {
func GetVecFieldIDs(schema *schemapb.CollectionSchema) []int64 {
var vecFieldIDs []int64
for _, field := range schema.Fields {
if field.DataType == schemapb.DataType_BinaryVector || field.DataType == schemapb.DataType_FloatVector || field.DataType == schemapb.DataType_Float16Vector {
if field.DataType == schemapb.DataType_BinaryVector || field.DataType == schemapb.DataType_FloatVector || field.DataType == schemapb.DataType_Float16Vector || field.DataType == schemapb.DataType_SparseFloatVector {
vecFieldIDs = append(vecFieldIDs, field.FieldID)
}
}
@@ -335,6 +335,8 @@ func GetNumRowOfFieldData(fieldData *schemapb.FieldData) (uint64, error) {
if err != nil {
return 0, err
}
case *schemapb.VectorField_SparseFloatVector:
fieldNumRows = uint64(len(vectorField.GetSparseFloatVector().GetContents()))
default:
return 0, fmt.Errorf("%s is not supported now", vectorFieldType)
}
17 changes: 17 additions & 0 deletions pkg/util/funcutil/placeholdergroup.go
Original file line number Diff line number Diff line change
@@ -2,6 +2,7 @@ package funcutil

import (
"encoding/binary"
"fmt"
"math"

"github.com/cockroachdb/errors"
@@ -76,6 +77,22 @@ func fieldDataToPlaceholderValue(fieldData *schemapb.FieldData) (*commonpb.Place
Values: flattenedFloat16VectorsToByteVectors(x.Bfloat16Vector, int(vectors.Dim)),
}
return placeholderValue, nil
case schemapb.DataType_SparseFloatVector:
vectors, ok := fieldData.GetVectors().GetData().(*schemapb.VectorField_SparseFloatVector)
if !ok {
return nil, errors.New("vector data is not schemapb.VectorField_SparseFloatVector")
}
vec := vectors.SparseFloatVector
bytes, err := proto.Marshal(vec)
if err != nil {
return nil, fmt.Errorf("failed to marshal schemapb.SparseFloatArray to bytes: %w", err)
}
placeholderValue := &commonpb.PlaceholderValue{
Tag: "$0",
Type: commonpb.PlaceholderType_SparseFloatVector,
Values: [][]byte{bytes},
}
return placeholderValue, nil
default:
return nil, errors.New("field is not a vector field")
}
2 changes: 1 addition & 1 deletion pkg/util/gc/gc_tuner.go
Original file line number Diff line number Diff line change
@@ -87,7 +87,7 @@ func optimizeGOGC() {

// currently we assume 20 ms as long gc pause
if (m.PauseNs[(m.NumGC+255)%256] / uint64(time.Millisecond)) < 20 {
log.Info("GC Tune done", zap.Uint32("previous GOGC", previousGOGC),
log.Debug("GC Tune done", zap.Uint32("previous GOGC", previousGOGC),
zap.Uint64("heapuse ", toMB(heapuse)),
zap.Uint64("total memory", toMB(totaluse)),
zap.Uint64("next GC", toMB(m.NextGC)),
4 changes: 4 additions & 0 deletions pkg/util/indexparamcheck/conf_adapter_mgr.go
Original file line number Diff line number Diff line change
@@ -56,6 +56,10 @@ func (mgr *indexCheckerMgrImpl) registerIndexChecker() {
mgr.checkers[IndexFaissBinIvfFlat] = newBinIVFFlatChecker()
mgr.checkers[IndexHNSW] = newHnswChecker()
mgr.checkers[IndexDISKANN] = newDiskannChecker()
mgr.checkers[IndexSparseInverted] = newSparseInvertedIndexChecker()
// WAND doesn't have more index params than sparse inverted index, thus
// using the same checker.
mgr.checkers[IndexSparseWand] = newSparseInvertedIndexChecker()
}

func newIndexCheckerMgr() *indexCheckerMgrImpl {
9 changes: 7 additions & 2 deletions pkg/util/indexparamcheck/constraints.go
Original file line number Diff line number Diff line change
@@ -41,6 +41,9 @@ const (

CargaBuildAlgoIVFPQ = "IVF_PQ"
CargaBuildAlgoNNDESCENT = "NN_DESCENT"

// Sparse Index Param
SparseDropRatioBuild = "drop_ratio_build"
)

// METRICS is a set of all metrics types supported for float vector.
@@ -55,9 +58,11 @@ var (
CagraBuildAlgoTypes = []string{CargaBuildAlgoIVFPQ, CargaBuildAlgoNNDESCENT}
supportDimPerSubQuantizer = []int{32, 28, 24, 20, 16, 12, 10, 8, 6, 4, 3, 2, 1} // const
supportSubQuantizer = []int{96, 64, 56, 48, 40, 32, 28, 24, 20, 16, 12, 8, 4, 3, 2, 1} // const
SparseMetrics = []string{metric.IP} // const
)

const (
FloatVectorDefaultMetricType = metric.IP
BinaryVectorDefaultMetricType = metric.JACCARD
FloatVectorDefaultMetricType = metric.IP
SparseFloatVectorDefaultMetricType = metric.IP
BinaryVectorDefaultMetricType = metric.JACCARD
)
1 change: 1 addition & 0 deletions pkg/util/indexparamcheck/hnsw_checker.go
Original file line number Diff line number Diff line change
@@ -31,6 +31,7 @@ func (c hnswChecker) CheckTrain(params map[string]string) error {
}

func (c hnswChecker) CheckValidDataType(dType schemapb.DataType) error {
// TODO(SPARSE) we'll add sparse vector support in HNSW later in cardinal
if dType != schemapb.DataType_FloatVector && dType != schemapb.DataType_BinaryVector && dType != schemapb.DataType_Float16Vector && dType != schemapb.DataType_BFloat16Vector {
return fmt.Errorf("only support float vector or binary vector")
}
2 changes: 2 additions & 0 deletions pkg/util/indexparamcheck/index_type.go
Original file line number Diff line number Diff line change
@@ -30,6 +30,8 @@ const (
IndexFaissBinIvfFlat IndexType = "BIN_IVF_FLAT"
IndexHNSW IndexType = "HNSW"
IndexDISKANN IndexType = "DISKANN"
IndexSparseInverted IndexType = "SPARSE_INVERTED_INDEX"
IndexSparseWand IndexType = "SPARSE_WAND"
)

func IsGpuIndex(indexType IndexType) bool {
47 changes: 47 additions & 0 deletions pkg/util/indexparamcheck/sparse_float_vector_base_checker.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package indexparamcheck

import (
"fmt"
"strconv"

"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/common"
)

// sparse vector don't check for dim, but baseChecker does, thus not including baseChecker
type sparseFloatVectorBaseChecker struct{}

func (c sparseFloatVectorBaseChecker) StaticCheck(params map[string]string) error {
if !CheckStrByValues(params, Metric, SparseMetrics) {
return fmt.Errorf("metric type not found or not supported, supported: %v", SparseMetrics)
}

return nil
}

func (c sparseFloatVectorBaseChecker) CheckTrain(params map[string]string) error {
dropRatioBuildStr, exist := params[SparseDropRatioBuild]
if exist {
dropRatioBuild, err := strconv.ParseFloat(dropRatioBuildStr, 64)
if err != nil || dropRatioBuild < 0 || dropRatioBuild >= 1 {
return fmt.Errorf("invalid drop_ratio_build: %s, must be in range [0, 1)", dropRatioBuildStr)
}
}

return nil
}

func (c sparseFloatVectorBaseChecker) CheckValidDataType(dType schemapb.DataType) error {
if dType != schemapb.DataType_SparseFloatVector {
return fmt.Errorf("only sparse float vector is supported for the specified index tpye")
}
return nil
}

func (c sparseFloatVectorBaseChecker) SetDefaultMetricTypeIfNotExist(params map[string]string) {
setDefaultIfNotExist(params, common.MetricTypeKey, SparseFloatVectorDefaultMetricType)
}

func newSparseFloatVectorBaseChecker() IndexChecker {
return &sparseFloatVectorBaseChecker{}
}
9 changes: 9 additions & 0 deletions pkg/util/indexparamcheck/sparse_inverted_index_checker.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package indexparamcheck

type sparseInvertedIndexChecker struct {
sparseFloatVectorBaseChecker
}

func newSparseInvertedIndexChecker() *sparseInvertedIndexChecker {
return &sparseInvertedIndexChecker{}
}
84 changes: 84 additions & 0 deletions pkg/util/testutils/sparse_test_utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package testutils

import (
"encoding/binary"
"math"
"math/rand"
"sort"

"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
)

func SparseFloatRowSetAt(row []byte, pos int, idx uint32, value float32) {
binary.LittleEndian.PutUint32(row[pos*8:], idx)
binary.LittleEndian.PutUint32(row[pos*8+4:], math.Float32bits(value))
}

func CreateSparseFloatRow(indices []uint32, values []float32) []byte {
row := make([]byte, len(indices)*8)
for i := 0; i < len(indices); i++ {
SparseFloatRowSetAt(row, i, indices[i], values[i])
}
return row
}

func GenerateSparseFloatVectors(numRows int) *schemapb.SparseFloatArray {
dim := 700
avgNnz := 20
var contents [][]byte
maxDim := 0

uniqueAndSort := func(indices []uint32) []uint32 {
seen := make(map[uint32]bool)
var result []uint32
for _, value := range indices {
if _, ok := seen[value]; !ok {
seen[value] = true
result = append(result, value)
}
}
sort.Slice(result, func(i, j int) bool {
return result[i] < result[j]
})
return result
}

for i := 0; i < numRows; i++ {
nnz := rand.Intn(avgNnz*2) + 1
indices := make([]uint32, 0, nnz)
for j := 0; j < nnz; j++ {
indices = append(indices, uint32(rand.Intn(dim)))
}
indices = uniqueAndSort(indices)
values := make([]float32, 0, len(indices))
for j := 0; j < len(indices); j++ {
values = append(values, rand.Float32())
}
if len(indices) > 0 && int(indices[len(indices)-1])+1 > maxDim {
maxDim = int(indices[len(indices)-1]) + 1
}
row_bytes := CreateSparseFloatRow(indices, values)

contents = append(contents, row_bytes)
}
return &schemapb.SparseFloatArray{
Dim: int64(maxDim),
Contents: contents,
}
}
22 changes: 22 additions & 0 deletions pkg/util/typeutil/gen_empty_field_data.go
Original file line number Diff line number Diff line change
@@ -207,6 +207,26 @@ func genEmptyBFloat16VectorFieldData(field *schemapb.FieldSchema) (*schemapb.Fie
}, nil
}

func genEmptySparseFloatVectorFieldData(field *schemapb.FieldSchema) (*schemapb.FieldData, error) {
return &schemapb.FieldData{
Type: field.GetDataType(),
FieldName: field.GetName(),
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: 0,
Data: &schemapb.VectorField_SparseFloatVector{
SparseFloatVector: &schemapb.SparseFloatArray{
Dim: 0,
Contents: make([][]byte, 0),
},
},
},
},
FieldId: field.GetFieldID(),
IsDynamic: field.GetIsDynamic(),
}, nil
}

func GenEmptyFieldData(field *schemapb.FieldSchema) (*schemapb.FieldData, error) {
dataType := field.GetDataType()
switch dataType {
@@ -234,6 +254,8 @@ func GenEmptyFieldData(field *schemapb.FieldSchema) (*schemapb.FieldData, error)
return genEmptyFloat16VectorFieldData(field)
case schemapb.DataType_BFloat16Vector:
return genEmptyBFloat16VectorFieldData(field)
case schemapb.DataType_SparseFloatVector:
return genEmptySparseFloatVectorFieldData(field)
default:
return nil, fmt.Errorf("unsupported data type: %s", dataType.String())
}
3 changes: 3 additions & 0 deletions pkg/util/typeutil/get_dim.go
Original file line number Diff line number Diff line change
@@ -13,6 +13,9 @@ func GetDim(field *schemapb.FieldSchema) (int64, error) {
if !IsVectorType(field.GetDataType()) {
return 0, fmt.Errorf("%s is not of vector type", field.GetDataType())
}
if IsSparseVectorType(field.GetDataType()) {
return 0, fmt.Errorf("typeutil.GetDim should not invoke on sparse vector type")
}
h := NewKvPairs(append(field.GetIndexParams(), field.GetTypeParams()...))
dimStr, err := h.Get(common.DimKey)
if err != nil {
130 changes: 128 additions & 2 deletions pkg/util/typeutil/schema.go
Original file line number Diff line number Diff line change
@@ -159,6 +159,12 @@ func estimateSizeBy(schema *schemapb.CollectionSchema, policy getVariableFieldLe
break
}
}
case schemapb.DataType_SparseFloatVector:
// TODO(SPARSE, zhengbuqian): size of sparse flaot vector
// varies depending on the number of non-zeros. Using sparse vector
// generated by SPLADE as reference and returning size of a sparse
// vector with 150 non-zeros.
res += 1200
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is dangerous.
We should not depends on any of this kind of estimation but so far lets keep it.
make this configurable

}
}
return res, nil
@@ -235,6 +241,11 @@ func EstimateEntitySize(fieldsData []*schemapb.FieldData, rowOffset int) (int, e
res += int(fs.GetVectors().GetDim())
case schemapb.DataType_FloatVector:
res += int(fs.GetVectors().GetDim() * 4)
case schemapb.DataType_SparseFloatVector:
vec := fs.GetVectors().GetSparseFloatVector()
// counting only the size of the vector data, ignoring other
// bytes used in proto.
res += len(vec.Contents[rowOffset])
}
}
return res, nil
@@ -359,13 +370,17 @@ func (helper *SchemaHelper) GetVectorDimFromID(fieldID int64) (int, error) {
// IsVectorType returns true if input is a vector type, otherwise false
func IsVectorType(dataType schemapb.DataType) bool {
switch dataType {
case schemapb.DataType_FloatVector, schemapb.DataType_BinaryVector, schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector:
case schemapb.DataType_FloatVector, schemapb.DataType_BinaryVector, schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector, schemapb.DataType_SparseFloatVector:
return true
default:
return false
}
}

func IsSparseVectorType(dataType schemapb.DataType) bool {
return dataType == schemapb.DataType_SparseFloatVector
}

// IsIntegerType returns true if input is an integer type, otherwise false
func IsIntegerType(dataType schemapb.DataType) bool {
switch dataType {
@@ -516,6 +531,15 @@ func PrepareResultFieldData(sample []*schemapb.FieldData, topK int64) []*schemap
vectors.Vectors.Data = &schemapb.VectorField_BinaryVector{
BinaryVector: make([]byte, 0, topK*dim/8),
}
case *schemapb.VectorField_SparseFloatVector:
vectors.Vectors.Data = &schemapb.VectorField_SparseFloatVector{
SparseFloatVector: &schemapb.SparseFloatArray{
// dim to be updated when appending data.
Dim: 0,
Contents: make([][]byte, 0, topK),
},
}
vectors.Vectors.Dim = 0
}
fd.Field = vectors
}
@@ -525,7 +549,7 @@ func PrepareResultFieldData(sample []*schemapb.FieldData, topK int64) []*schemap
}

// AppendFieldData appends fields data of specified index from src to dst
func AppendFieldData(dst []*schemapb.FieldData, src []*schemapb.FieldData, idx int64) (appendSize int64) {
func AppendFieldData(dst, src []*schemapb.FieldData, idx int64) (appendSize int64) {
for i, fieldData := range src {
switch fieldType := fieldData.Field.(type) {
case *schemapb.FieldData_Scalars:
@@ -711,6 +735,18 @@ func AppendFieldData(dst []*schemapb.FieldData, src []*schemapb.FieldData, idx i
}
/* #nosec G103 */
appendSize += int64(unsafe.Sizeof(srcVector.Bfloat16Vector[idx*(dim*2) : (idx+1)*(dim*2)]))
case *schemapb.VectorField_SparseFloatVector:
if dstVector.GetSparseFloatVector() == nil {
dstVector.Data = &schemapb.VectorField_SparseFloatVector{
SparseFloatVector: &schemapb.SparseFloatArray{
Dim: 0,
Contents: make([][]byte, 0),
},
}
dstVector.Dim = int64(srcVector.SparseFloatVector.Dim)
}
vec := dstVector.Data.(*schemapb.VectorField_SparseFloatVector).SparseFloatVector
appendSize += appendSparseFloatArraySingleRow(vec, srcVector.SparseFloatVector, idx)
default:
log.Error("Not supported field type", zap.String("field type", fieldData.Type.String()))
}
@@ -767,6 +803,8 @@ func DeleteFieldData(dst []*schemapb.FieldData) {
case *schemapb.VectorField_Bfloat16Vector:
dstBfloat16Vector := dstVector.Data.(*schemapb.VectorField_Bfloat16Vector)
dstBfloat16Vector.Bfloat16Vector = dstBfloat16Vector.Bfloat16Vector[:len(dstBfloat16Vector.Bfloat16Vector)-int(dim*2)]
case *schemapb.VectorField_SparseFloatVector:
trimSparseFloatArray(dstVector.GetSparseFloatVector())
default:
log.Error("wrong field type added", zap.String("field type", fieldData.Type.String()))
}
@@ -929,6 +967,14 @@ func MergeFieldData(dst []*schemapb.FieldData, src []*schemapb.FieldData) error
} else {
dstVector.GetFloatVector().Data = append(dstVector.GetFloatVector().Data, srcVector.FloatVector.Data...)
}
case *schemapb.VectorField_SparseFloatVector:
if dstVector.GetSparseFloatVector() == nil {
dstVector.Data = &schemapb.VectorField_SparseFloatVector{
SparseFloatVector: srcVector.SparseFloatVector,
}
} else {
appendSparseFloatArray(dstVector.GetSparseFloatVector(), srcVector.SparseFloatVector)
}
default:
log.Error("Not supported data type", zap.String("data type", srcFieldData.Type.String()))
return errors.New("unsupported data type: " + srcFieldData.Type.String())
@@ -1166,6 +1212,8 @@ func GetData(field *schemapb.FieldData, idx int) interface{} {
dim := int(field.GetVectors().GetDim())
dataBytes := dim * 2
return field.GetVectors().GetBfloat16Vector()[idx*dataBytes : (idx+1)*dataBytes]
case schemapb.DataType_SparseFloatVector:
return field.GetVectors().GetSparseFloatVector().Contents[idx]
}
return nil
}
@@ -1325,3 +1373,81 @@ func AppendGroupByValue(dstResData *schemapb.SearchResultData,
}
return nil
}

func appendSparseFloatArray(dst, src *schemapb.SparseFloatArray) {
if len(src.Contents) == 0 {
return
}
if dst.Dim < src.Dim {
dst.Dim = src.Dim
}
dst.Contents = append(dst.Contents, src.Contents...)
}

// return the size of indices and values of the appended row
func appendSparseFloatArraySingleRow(dst, src *schemapb.SparseFloatArray, idx int64) int64 {
row := src.Contents[idx]
dst.Contents = append(dst.Contents, row)
rowDim := SparseFloatRowDim(row)
if rowDim == 0 {
return 0
}
if dst.Dim < rowDim {
dst.Dim = rowDim
}
return int64(len(row))
}

func trimSparseFloatArray(vec *schemapb.SparseFloatArray) {
if len(vec.Contents) == 0 {
return
}
// not decreasing dim of the entire SparseFloatArray, as we don't want to
// iterate through the entire array to find the new max dim. Correctness
// will not be affected.
vec.Contents = vec.Contents[:len(vec.Contents)-1]
}

func ValidateSparseFloatRows(rows ...[]byte) error {
for _, row := range rows {
if len(row) == 0 {
return errors.New("empty sparse float vector row")
}
if len(row)%8 != 0 {
return fmt.Errorf("invalid data length in sparse float vector: %d", len(row))
}
for i := 0; i < SparseFloatRowElementCount(row); i++ {
if i > 0 && SparseFloatRowIndexAt(row, i) < SparseFloatRowIndexAt(row, i-1) {
return errors.New("unsorted indices in sparse float vector")
}
VerifyFloat(float64(SparseFloatRowValueAt(row, i)))
}
}
return nil
}

// SparseFloatRowUtils
func SparseFloatRowElementCount(row []byte) int {
if row == nil {
return 0
}
return len(row) / 8
}

// does not check for out-of-range access
func SparseFloatRowIndexAt(row []byte, idx int) uint32 {
return common.Endian.Uint32(row[idx*8:])
}

// does not check for out-of-range access
func SparseFloatRowValueAt(row []byte, idx int) float32 {
return math.Float32frombits(common.Endian.Uint32(row[idx*8+4:]))
}

// dim of a sparse float vector is the maximum/last index + 1
func SparseFloatRowDim(row []byte) int64 {
if row == nil || len(row) == 0 {
return 0
}
return int64(SparseFloatRowIndexAt(row, SparseFloatRowElementCount(row)-1)) + 1
}
316 changes: 281 additions & 35 deletions pkg/util/typeutil/schema_test.go

Large diffs are not rendered by default.