Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: [Do Not Mrege] Add Sparse Float Vector support to milvus #29421

Closed
wants to merge 5 commits into from
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
[Sparse Float Vector] added some integrated tests
Signed-off-by: Buqian Zheng <zhengbuqian@gmail.com>
zhengbuqian committed Mar 6, 2024
commit 80a7531adb5501e34d2a224d358dfa03e9ec0e1c
2 changes: 1 addition & 1 deletion internal/proxy/util.go
Original file line number Diff line number Diff line change
@@ -1196,7 +1196,7 @@ func fillFieldsDataBySchema(schema *schemapb.CollectionSchema, insertMsg *msgstr
}

if len(insertMsg.FieldsData) != requiredFieldsNum {
log.Warn("the number of fields is less than needed",
log.Warn("the number of fields is not the same as needed",
zap.Int("fieldNum", len(insertMsg.FieldsData)),
zap.Int("requiredFieldNum", requiredFieldsNum),
zap.String("collection", schema.GetName()))
2 changes: 2 additions & 0 deletions pkg/common/common.go
Original file line number Diff line number Diff line change
@@ -101,6 +101,8 @@ const (
DimKey = "dim"
MaxLengthKey = "max_length"
MaxCapacityKey = "max_capacity"

DropRatioBuildKey = "drop_ratio_build"
)

// Collection properties key
77 changes: 69 additions & 8 deletions tests/integration/getvector/get_vector_test.go
Original file line number Diff line number Diff line change
@@ -86,19 +86,23 @@ func (s *TestGetVectorSuite) run() {
IndexParams: nil,
AutoID: false,
}
typeParams := []*commonpb.KeyValuePair{}
if !typeutil.IsSparseVectorType(s.vecType) {
typeParams = []*commonpb.KeyValuePair{
{
Key: common.DimKey,
Value: fmt.Sprintf("%d", dim),
},
}
}
fVec := &schemapb.FieldSchema{
FieldID: 101,
Name: vecFieldName,
IsPrimaryKey: false,
Description: "",
DataType: s.vecType,
TypeParams: []*commonpb.KeyValuePair{
{
Key: common.DimKey,
Value: fmt.Sprintf("%d", dim),
},
},
IndexParams: nil,
TypeParams: typeParams,
IndexParams: nil,
}
schema := integration.ConstructSchema(collection, dim, false, pk, fVec)
marshaledSchema, err := proto.Marshal(schema)
@@ -126,6 +130,8 @@ func (s *TestGetVectorSuite) run() {
vecFieldData = integration.NewFloat16VectorFieldData(vecFieldName, NB, dim)
// } else if s.vecType == schemapb.DataType_BFloat16Vector {
// vecFieldData = integration.NewBFloat16VectorFieldData(vecFieldName, NB, dim)
} else if typeutil.IsSparseVectorType(s.vecType) {
vecFieldData = integration.NewSparseFloatVectorFieldData(vecFieldName, NB)
} else {
vecFieldData = integration.NewBinaryVectorFieldData(vecFieldName, NB, dim)
}
@@ -193,7 +199,7 @@ func (s *TestGetVectorSuite) run() {

searchResp, err := s.Cluster.Proxy.Search(ctx, searchReq)
s.Require().NoError(err)
s.Require().Equal(searchResp.GetStatus().GetErrorCode(), commonpb.ErrorCode_Success)
s.Require().Equal(commonpb.ErrorCode_Success, searchResp.GetStatus().GetErrorCode())

result := searchResp.GetResults()
if s.pkType == schemapb.DataType_Int64 {
@@ -253,6 +259,21 @@ func (s *TestGetVectorSuite) run() {
// }
// }
} else if s.vecType == schemapb.DataType_BFloat16Vector {
} else if s.vecType == schemapb.DataType_SparseFloatVector {
s.Require().Len(result.GetFieldsData()[vecFieldIndex].GetVectors().GetSparseFloatVector().GetContents(), nq*topk)
rawData := vecFieldData.GetVectors().GetSparseFloatVector().GetContents()
resData := result.GetFieldsData()[vecFieldIndex].GetVectors().GetSparseFloatVector().GetContents()
if s.pkType == schemapb.DataType_Int64 {
for i, id := range result.GetIds().GetIntId().GetData() {
s.Require().Equal(*rawData[id], *resData[i])
}
} else {
for i, idStr := range result.GetIds().GetStrId().GetData() {
id, err := strconv.Atoi(idStr)
s.Require().NoError(err)
s.Require().Equal(*rawData[id], *resData[i])
}
}
} else {
s.Require().Len(result.GetFieldsData()[vecFieldIndex].GetVectors().GetBinaryVector(), nq*topk*dim/8)
rawData := vecFieldData.GetVectors().GetBinaryVector()
@@ -430,6 +451,46 @@ func (s *TestGetVectorSuite) TestGetVector_With_DB_Name() {
s.run()
}

func (s *TestGetVectorSuite) TestGetVector_Sparse_SPARSE_INVERTED_INDEX() {
s.nq = 10
s.topK = 10
s.indexType = integration.IndexSparseInvertedIndex
s.metricType = metric.IP
s.pkType = schemapb.DataType_Int64
s.vecType = schemapb.DataType_SparseFloatVector
s.run()
}

func (s *TestGetVectorSuite) TestGetVector_Sparse_SPARSE_INVERTED_INDEX_StrPK() {
s.nq = 10
s.topK = 10
s.indexType = integration.IndexSparseInvertedIndex
s.metricType = metric.IP
s.pkType = schemapb.DataType_VarChar
s.vecType = schemapb.DataType_SparseFloatVector
s.run()
}

func (s *TestGetVectorSuite) TestGetVector_Sparse_SPARSE_WAND() {
s.nq = 10
s.topK = 10
s.indexType = integration.IndexSparseWand
s.metricType = metric.IP
s.pkType = schemapb.DataType_Int64
s.vecType = schemapb.DataType_SparseFloatVector
s.run()
}

func (s *TestGetVectorSuite) TestGetVector_Sparse_SPARSE_WAND_StrPK() {
s.nq = 10
s.topK = 10
s.indexType = integration.IndexSparseWand
s.metricType = metric.IP
s.pkType = schemapb.DataType_VarChar
s.vecType = schemapb.DataType_SparseFloatVector
s.run()
}

//func (s *TestGetVectorSuite) TestGetVector_DISKANN_L2() {
// s.nq = 10
// s.topK = 10
46 changes: 38 additions & 8 deletions tests/integration/hellomilvus/hello_milvus_test.go
Original file line number Diff line number Diff line change
@@ -37,9 +37,13 @@ import (

type HelloMilvusSuite struct {
integration.MiniClusterSuite

indexType string
metricType string
vecType schemapb.DataType
}

func (s *HelloMilvusSuite) TestHelloMilvus() {
func (s *HelloMilvusSuite) run() {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
c := s.Cluster
@@ -52,7 +56,7 @@ func (s *HelloMilvusSuite) TestHelloMilvus() {

collectionName := "TestHelloMilvus" + funcutil.GenRandomStr()

schema := integration.ConstructSchema(collectionName, dim, true)
schema := integration.ConstructSchemaOfVecDataType(collectionName, dim, true, s.vecType)
marshaledSchema, err := proto.Marshal(schema)
s.NoError(err)

@@ -74,7 +78,12 @@ func (s *HelloMilvusSuite) TestHelloMilvus() {
s.Equal(showCollectionsResp.GetStatus().GetErrorCode(), commonpb.ErrorCode_Success)
log.Info("ShowCollections result", zap.Any("showCollectionsResp", showCollectionsResp))

fVecColumn := integration.NewFloatVectorFieldData(integration.FloatVecField, rowNum, dim)
var fVecColumn *schemapb.FieldData
if s.vecType == schemapb.DataType_SparseFloatVector {
fVecColumn = integration.NewSparseFloatVectorFieldData(integration.SparseFloatVecField, rowNum)
} else {
fVecColumn = integration.NewFloatVectorFieldData(integration.SparseFloatVecField, rowNum, dim)
}
hashKeys := integration.GenerateHashKeys(rowNum)
insertResult, err := c.Proxy.Insert(ctx, &milvuspb.InsertRequest{
DbName: dbName,
@@ -110,17 +119,17 @@ func (s *HelloMilvusSuite) TestHelloMilvus() {
// create index
createIndexStatus, err := c.Proxy.CreateIndex(ctx, &milvuspb.CreateIndexRequest{
CollectionName: collectionName,
FieldName: integration.FloatVecField,
FieldName: integration.SparseFloatVecField,
IndexName: "_default",
ExtraParams: integration.ConstructIndexParam(dim, integration.IndexFaissIvfFlat, metric.L2),
ExtraParams: integration.ConstructIndexParam(dim, s.indexType, s.metricType),
})
if createIndexStatus.GetErrorCode() != commonpb.ErrorCode_Success {
log.Warn("createIndexStatus fail reason", zap.String("reason", createIndexStatus.GetReason()))
}
s.NoError(err)
s.Equal(commonpb.ErrorCode_Success, createIndexStatus.GetErrorCode())

s.WaitForIndexBuilt(ctx, collectionName, integration.FloatVecField)
s.WaitForIndexBuilt(ctx, collectionName, integration.SparseFloatVecField)

// load
loadStatus, err := c.Proxy.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{
@@ -140,9 +149,9 @@ func (s *HelloMilvusSuite) TestHelloMilvus() {
topk := 10
roundDecimal := -1

params := integration.GetSearchParams(integration.IndexFaissIvfFlat, metric.L2)
params := integration.GetSearchParams(s.indexType, s.metricType)
searchReq := integration.ConstructSearchRequest("", collectionName, expr,
integration.FloatVecField, schemapb.DataType_FloatVector, nil, metric.L2, params, nq, dim, topk, roundDecimal)
integration.SparseFloatVecField, s.vecType, nil, s.metricType, params, nq, dim, topk, roundDecimal)

searchResult, err := c.Proxy.Search(ctx, searchReq)

@@ -155,6 +164,27 @@ func (s *HelloMilvusSuite) TestHelloMilvus() {
log.Info("TestHelloMilvus succeed")
}

func (s *HelloMilvusSuite) TestHelloMilvus_basic() {
s.indexType = integration.IndexFaissIvfFlat
s.metricType = metric.L2
s.vecType = schemapb.DataType_FloatVector
s.run()
}

func (s *HelloMilvusSuite) TestHelloMilvus_sparse_basic() {
s.indexType = integration.IndexSparseInvertedIndex
s.metricType = metric.IP
s.vecType = schemapb.DataType_SparseFloatVector
s.run()
}

func (s *HelloMilvusSuite) TestHelloMilvus_sparse_wand_basic() {
s.indexType = integration.IndexSparseWand
s.metricType = metric.IP
s.vecType = schemapb.DataType_SparseFloatVector
s.run()
}

func TestHelloMilvus(t *testing.T) {
suite.Run(t, new(HelloMilvusSuite))
}
37 changes: 32 additions & 5 deletions tests/integration/hybridsearch/hybridsearch_test.go
Original file line number Diff line number Diff line change
@@ -42,6 +42,7 @@ func (s *HybridSearchSuite) TestHybridSearch() {
&schemapb.FieldSchema{Name: integration.Int64Field, DataType: schemapb.DataType_Int64, IsPrimaryKey: true, AutoID: true},
&schemapb.FieldSchema{Name: integration.FloatVecField, DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: common.DimKey, Value: "128"}}},
&schemapb.FieldSchema{Name: integration.BinVecField, DataType: schemapb.DataType_BinaryVector, TypeParams: []*commonpb.KeyValuePair{{Key: common.DimKey, Value: "128"}}},
&schemapb.FieldSchema{Name: integration.SparseFloatVecField, DataType: schemapb.DataType_SparseFloatVector},
)
marshaledSchema, err := proto.Marshal(schema)
s.NoError(err)
@@ -67,11 +68,12 @@ func (s *HybridSearchSuite) TestHybridSearch() {

fVecColumn := integration.NewFloatVectorFieldData(integration.FloatVecField, rowNum, dim)
bVecColumn := integration.NewBinaryVectorFieldData(integration.BinVecField, rowNum, dim)
sparseVecColumn := integration.NewSparseFloatVectorFieldData(integration.SparseFloatVecField, rowNum)
hashKeys := integration.GenerateHashKeys(rowNum)
insertResult, err := c.Proxy.Insert(ctx, &milvuspb.InsertRequest{
DbName: dbName,
CollectionName: collectionName,
FieldsData: []*schemapb.FieldData{fVecColumn, bVecColumn},
FieldsData: []*schemapb.FieldData{fVecColumn, bVecColumn, sparseVecColumn},
HashKeys: hashKeys,
NumRows: uint32(rowNum),
})
@@ -143,6 +145,28 @@ func (s *HybridSearchSuite) TestHybridSearch() {
}
s.WaitForIndexBuiltWithIndexName(ctx, collectionName, integration.BinVecField, "_default_binary")

// load with index on partial vector fields
loadStatus, err = c.Proxy.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{
DbName: dbName,
CollectionName: collectionName,
})
s.NoError(err)
s.Error(merr.Error(loadStatus))

// create index for sparse float vector
createIndexStatus, err = c.Proxy.CreateIndex(ctx, &milvuspb.CreateIndexRequest{
CollectionName: collectionName,
FieldName: integration.SparseFloatVecField,
IndexName: "_default_sparse",
ExtraParams: integration.ConstructIndexParam(dim, integration.IndexSparseInvertedIndex, metric.IP),
})
s.NoError(err)
err = merr.Error(createIndexStatus)
if err != nil {
log.Warn("createIndexStatus fail reason", zap.Error(err))
}
s.WaitForIndexBuiltWithIndexName(ctx, collectionName, integration.SparseFloatVecField, "_default_sparse")

// load with index on all vector fields
loadStatus, err = c.Proxy.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{
DbName: dbName,
@@ -163,18 +187,21 @@ func (s *HybridSearchSuite) TestHybridSearch() {

fParams := integration.GetSearchParams(integration.IndexFaissIvfFlat, metric.L2)
bParams := integration.GetSearchParams(integration.IndexFaissBinIvfFlat, metric.L2)
sParams := integration.GetSearchParams(integration.IndexSparseInvertedIndex, metric.IP)
fSearchReq := integration.ConstructSearchRequest("", collectionName, expr,
integration.FloatVecField, schemapb.DataType_FloatVector, nil, metric.L2, fParams, nq, dim, topk, roundDecimal)

bSearchReq := integration.ConstructSearchRequest("", collectionName, expr,
integration.BinVecField, schemapb.DataType_BinaryVector, nil, metric.JACCARD, bParams, nq, dim, topk, roundDecimal)

sSearchReq := integration.ConstructSearchRequest("", collectionName, expr,
integration.SparseFloatVecField, schemapb.DataType_SparseFloatVector, nil, metric.IP, sParams, nq, dim, topk, roundDecimal)
hSearchReq := &milvuspb.HybridSearchRequest{
Base: nil,
DbName: dbName,
CollectionName: collectionName,
PartitionNames: nil,
Requests: []*milvuspb.SearchRequest{fSearchReq, bSearchReq},
Requests: []*milvuspb.SearchRequest{fSearchReq, bSearchReq, sSearchReq},
OutputFields: []string{integration.FloatVecField, integration.BinVecField},
}

@@ -196,7 +223,7 @@ func (s *HybridSearchSuite) TestHybridSearch() {

// weighted rank hybrid search
weightsParams := make(map[string][]float64)
weightsParams[proxy.WeightsParamsKey] = []float64{0.5, 0.2}
weightsParams[proxy.WeightsParamsKey] = []float64{0.5, 0.2, 0.1}
b, err = json.Marshal(weightsParams)
s.NoError(err)

@@ -206,8 +233,8 @@ func (s *HybridSearchSuite) TestHybridSearch() {
DbName: dbName,
CollectionName: collectionName,
PartitionNames: nil,
Requests: []*milvuspb.SearchRequest{fSearchReq, bSearchReq},
OutputFields: []string{integration.FloatVecField, integration.BinVecField},
Requests: []*milvuspb.SearchRequest{fSearchReq, bSearchReq, sSearchReq},
OutputFields: []string{integration.FloatVecField, integration.BinVecField, integration.SparseFloatVecField},
}
hSearchReq.RankParams = []*commonpb.KeyValuePair{
{Key: proxy.RankTypeKey, Value: "weighted"},
13 changes: 12 additions & 1 deletion tests/integration/indexstat/get_index_statistics_test.go
Original file line number Diff line number Diff line change
@@ -19,9 +19,13 @@ import (

type GetIndexStatisticsSuite struct {
integration.MiniClusterSuite

indexType string
metricType string
vecType schemapb.DataType
}

func (s *GetIndexStatisticsSuite) TestGetIndexStatistics() {
func (s *GetIndexStatisticsSuite) run() {
c := s.Cluster
ctx, cancel := context.WithCancel(c.GetContext())
defer cancel()
@@ -153,6 +157,13 @@ func (s *GetIndexStatisticsSuite) TestGetIndexStatistics() {
log.Info("TestGetIndexStatistics succeed")
}

func (s *GetIndexStatisticsSuite) TestGetIndexStatistics_float() {
s.indexType = integration.IndexFaissIvfFlat
s.metricType = metric.L2
s.vecType = schemapb.DataType_FloatVector
s.run()
}

func TestGetIndexStat(t *testing.T) {
suite.Run(t, new(GetIndexStatisticsSuite))
}
1 change: 1 addition & 0 deletions tests/integration/insert/insert_test.go
Original file line number Diff line number Diff line change
@@ -38,6 +38,7 @@ type InsertSuite struct {
integration.MiniClusterSuite
}

// insert request with duplicate field data should fail
func (s *InsertSuite) TestInsert() {
c := s.Cluster
ctx, cancel := context.WithCancel(c.GetContext())
548 changes: 548 additions & 0 deletions tests/integration/sparse/sparse_test.go

Large diffs are not rendered by default.

29 changes: 18 additions & 11 deletions tests/integration/util_index.go
Original file line number Diff line number Diff line change
@@ -30,17 +30,19 @@ import (
)

const (
IndexRaftIvfFlat = indexparamcheck.IndexRaftIvfFlat
IndexRaftIvfPQ = indexparamcheck.IndexRaftIvfPQ
IndexFaissIDMap = indexparamcheck.IndexFaissIDMap
IndexFaissIvfFlat = indexparamcheck.IndexFaissIvfFlat
IndexFaissIvfPQ = indexparamcheck.IndexFaissIvfPQ
IndexScaNN = indexparamcheck.IndexScaNN
IndexFaissIvfSQ8 = indexparamcheck.IndexFaissIvfSQ8
IndexFaissBinIDMap = indexparamcheck.IndexFaissBinIDMap
IndexFaissBinIvfFlat = indexparamcheck.IndexFaissBinIvfFlat
IndexHNSW = indexparamcheck.IndexHNSW
IndexDISKANN = indexparamcheck.IndexDISKANN
IndexRaftIvfFlat = indexparamcheck.IndexRaftIvfFlat
IndexRaftIvfPQ = indexparamcheck.IndexRaftIvfPQ
IndexFaissIDMap = indexparamcheck.IndexFaissIDMap
IndexFaissIvfFlat = indexparamcheck.IndexFaissIvfFlat
IndexFaissIvfPQ = indexparamcheck.IndexFaissIvfPQ
IndexScaNN = indexparamcheck.IndexScaNN
IndexFaissIvfSQ8 = indexparamcheck.IndexFaissIvfSQ8
IndexFaissBinIDMap = indexparamcheck.IndexFaissBinIDMap
IndexFaissBinIvfFlat = indexparamcheck.IndexFaissBinIvfFlat
IndexHNSW = indexparamcheck.IndexHNSW
IndexDISKANN = indexparamcheck.IndexDISKANN
IndexSparseInvertedIndex = indexparamcheck.IndexSparseInverted
IndexSparseWand = indexparamcheck.IndexSparseWand
)

func (s *MiniClusterSuite) WaitForIndexBuiltWithDB(ctx context.Context, dbName, collection, field string) {
@@ -166,6 +168,8 @@ func ConstructIndexParam(dim int, indexType string, metricType string) []*common
Key: "efConstruction",
Value: "200",
})
case IndexSparseInvertedIndex:
case IndexSparseWand:
case IndexDISKANN:
default:
panic(fmt.Sprintf("unimplemented index param for %s, please help to improve it", indexType))
@@ -184,6 +188,9 @@ func GetSearchParams(indexType string, metricType string) map[string]any {
params["ef"] = 200
case IndexDISKANN:
params["search_list"] = 20
case IndexSparseInvertedIndex:
case IndexSparseWand:
params["drop_ratio_search"] = 0.1
default:
panic(fmt.Sprintf("unimplemented search param for %s, please help to improve it", indexType))
}
21 changes: 21 additions & 0 deletions tests/integration/util_insert.go
Original file line number Diff line number Diff line change
@@ -24,6 +24,7 @@ import (

"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/util/testutils"
)

func (s *MiniClusterSuite) WaitForFlush(ctx context.Context, segIDs []int64, flushTs uint64, dbName, collectionName string) {
@@ -144,6 +145,22 @@ func NewBinaryVectorFieldData(fieldName string, numRows, dim int) *schemapb.Fiel
}
}

func NewSparseFloatVectorFieldData(fieldName string, numRows int) *schemapb.FieldData {
sparseVecs := GenerateSparseFloatArray(numRows)
return &schemapb.FieldData{
Type: schemapb.DataType_SparseFloatVector,
FieldName: fieldName,
Field: &schemapb.FieldData_Vectors{
Vectors: &schemapb.VectorField{
Dim: sparseVecs.Dim,
Data: &schemapb.VectorField_SparseFloatVector{
SparseFloatVector: sparseVecs,
},
},
},
}
}

func GenerateInt64Array(numRows int) []int64 {
ret := make([]int64, numRows)
for i := 0; i < numRows; i++ {
@@ -189,6 +206,10 @@ func GenerateFloat16Vectors(numRows, dim int) []byte {
return ret
}

func GenerateSparseFloatArray(numRows int) *schemapb.SparseFloatArray {
return testutils.GenerateSparseFloatVectors(numRows)
}

// func GenerateBFloat16Vectors(numRows, dim int) []byte {
// total := numRows * dim * 2
// ret := make([]byte, total)
12 changes: 12 additions & 0 deletions tests/integration/util_query.go
Original file line number Diff line number Diff line change
@@ -128,6 +128,7 @@ func ConstructSearchRequest(
},
TravelTimestamp: 0,
GuaranteeTimestamp: 0,
Nq: int64(nq),
}
}

@@ -243,6 +244,17 @@ func constructPlaceholderGroup(nq, dim int, vectorType schemapb.DataType) *commo
// }
// values = append(values, ret)
// }
case schemapb.DataType_SparseFloatVector:
// for sparse, all query rows are encoded in a single byte array
values = make([][]byte, 0, 1)
placeholderType = commonpb.PlaceholderType_SparseFloatVector
sparseVecs := GenerateSparseFloatArray(nq)
bs, err := proto.Marshal(sparseVecs)
if err != nil {
panic(err)
}
values = append(values, bs)

default:
panic("invalid vector data type")
}
71 changes: 58 additions & 13 deletions tests/integration/util_schema.go
Original file line number Diff line number Diff line change
@@ -25,19 +25,20 @@ import (
)

const (
BoolField = "boolField"
Int8Field = "int8Field"
Int16Field = "int16Field"
Int32Field = "int32Field"
Int64Field = "int64Field"
FloatField = "floatField"
DoubleField = "doubleField"
VarCharField = "varCharField"
JSONField = "jsonField"
FloatVecField = "floatVecField"
BinVecField = "binVecField"
Float16VecField = "float16VecField"
BFloat16VecField = "bfloat16VecField"
BoolField = "boolField"
Int8Field = "int8Field"
Int16Field = "int16Field"
Int32Field = "int32Field"
Int64Field = "int64Field"
FloatField = "floatField"
DoubleField = "doubleField"
VarCharField = "varCharField"
JSONField = "jsonField"
FloatVecField = "floatVecField"
BinVecField = "binVecField"
Float16VecField = "float16VecField"
BFloat16VecField = "bfloat16VecField"
SparseFloatVecField = "sparseFloatVecField"
)

func ConstructSchema(collection string, dim int, autoID bool, fields ...*schemapb.FieldSchema) *schemapb.CollectionSchema {
@@ -81,3 +82,47 @@ func ConstructSchema(collection string, dim int, autoID bool, fields ...*schemap
Fields: []*schemapb.FieldSchema{pk, fVec},
}
}

func ConstructSchemaOfVecDataType(collection string, dim int, autoID bool, data_type schemapb.DataType) *schemapb.CollectionSchema {
pk := &schemapb.FieldSchema{
FieldID: 100,
Name: Int64Field,
IsPrimaryKey: true,
Description: "",
DataType: schemapb.DataType_Int64,
TypeParams: nil,
IndexParams: nil,
AutoID: autoID,
}
var name string
var typeParams []*commonpb.KeyValuePair
switch data_type {
case schemapb.DataType_FloatVector:
name = FloatVecField
typeParams = []*commonpb.KeyValuePair{
{
Key: common.DimKey,
Value: fmt.Sprintf("%d", dim),
},
}
case schemapb.DataType_SparseFloatVector:
name = SparseFloatVecField
typeParams = nil
default:
panic("unsupported data type")
}
fVec := &schemapb.FieldSchema{
FieldID: 101,
Name: name,
IsPrimaryKey: false,
Description: "",
DataType: data_type,
TypeParams: typeParams,
IndexParams: nil,
}
return &schemapb.CollectionSchema{
Name: collection,
AutoID: autoID,
Fields: []*schemapb.FieldSchema{pk, fVec},
}
}

Unchanged files with check annotations Beta

err = eventWriter.AddBFloat16VectorToPayload(singleData.(*BFloat16VectorFieldData).Data, singleData.(*BFloat16VectorFieldData).Dim)
writer.AddExtra(originalSizeKey, fmt.Sprintf("%v", singleData.(*BFloat16VectorFieldData).GetMemorySize()))
case schemapb.DataType_SparseFloatVector:
err = eventWriter.AddSparseFloatVectorToPayload(singleData.(*SparseFloatVectorFieldData))

Check failure on line 391 in internal/storage/data_codec.go

GitHub Actions / Code Checker MacOS 12

impossible type assertion: singleData.(*SparseFloatVectorFieldData)
if err != nil {
eventWriter.Close()
writer.Close()
return nil, err
}
writer.AddExtra(originalSizeKey, fmt.Sprintf("%v", singleData.(*SparseFloatVectorFieldData).GetMemorySize()))

Check failure on line 397 in internal/storage/data_codec.go

GitHub Actions / Code Checker MacOS 12

impossible type assertion: singleData.(*SparseFloatVectorFieldData)
default:
return nil, fmt.Errorf("undefined data type %d", field.DataType)
}
return InvalidUniqueID, InvalidUniqueID, InvalidUniqueID, err
}
if insertData.Data[fieldID] == nil {
insertData.Data[fieldID] = &SparseFloatVectorFieldData{}

Check failure on line 792 in internal/storage/data_codec.go

GitHub Actions / Code Checker MacOS 12

cannot use &SparseFloatVectorFieldData{} (value of type *SparseFloatVectorFieldData) as FieldData value in assignment: *SparseFloatVectorFieldData does not implement FieldData (missing method GetRowSize)
}
vec := insertData.Data[fieldID].(*SparseFloatVectorFieldData)

Check failure on line 794 in internal/storage/data_codec.go

GitHub Actions / Code Checker MacOS 12

impossible type assertion: insertData.Data[fieldID].(*SparseFloatVectorFieldData)
vec.AppendAllRows(sparseData)
totalLength += sparseData.RowNum()
insertData.Data[fieldID] = vec

Check failure on line 798 in internal/storage/data_codec.go

GitHub Actions / Code Checker MacOS 12

cannot use vec (variable of type *SparseFloatVectorFieldData) as FieldData value in assignment: *SparseFloatVectorFieldData does not implement FieldData (missing method GetRowSize)
default:
eventReader.Close()
data := singleData.(*JSONFieldData).Data
data[i], data[j] = data[j], data[i]
case schemapb.DataType_SparseFloatVector:
field_data := singleData.(*SparseFloatVectorFieldData)

Check failure on line 118 in internal/storage/data_sorter.go

GitHub Actions / Code Checker MacOS 12

impossible type assertion: singleData.(*SparseFloatVectorFieldData)
field_data.Contents[i], field_data.Contents[j] = field_data.Contents[j], field_data.Contents[i]
default:
errMsg := "undefined data type " + string(field.DataType)
Dim: dim,
}, nil
case schemapb.DataType_SparseFloatVector:
return &SparseFloatVectorFieldData{}, nil

Check failure on line 187 in internal/storage/insert_data.go

GitHub Actions / Code Checker MacOS 12

cannot use &SparseFloatVectorFieldData{} (value of type *SparseFloatVectorFieldData) as FieldData value in return statement: *SparseFloatVectorFieldData does not implement FieldData (missing method GetRowSize)
case schemapb.DataType_Bool:
return &BoolFieldData{
Data: make([]bool, 0),
}
case schemapb.DataType_SparseFloatVector:
fieldData = &SparseFloatVectorFieldData{

Check failure on line 562 in internal/storage/utils.go

GitHub Actions / Code Checker MacOS 12

cannot use &SparseFloatVectorFieldData{…} (value of type *SparseFloatVectorFieldData) as FieldData value in assignment: *SparseFloatVectorFieldData does not implement FieldData (missing method GetRowSize)
SparseFloatArray: *srcFields[field.FieldID].GetVectors().GetSparseFloatVector(),
}
func mergeSparseFloatVectorField(data *InsertData, fid FieldID, field *SparseFloatVectorFieldData) {
if _, ok := data.Data[fid]; !ok {
data.Data[fid] = &SparseFloatVectorFieldData{}

Check failure on line 835 in internal/storage/utils.go

GitHub Actions / Code Checker MacOS 12

cannot use &SparseFloatVectorFieldData{} (value of type *SparseFloatVectorFieldData) as FieldData value in assignment: *SparseFloatVectorFieldData does not implement FieldData (missing method GetRowSize)
}
fieldData := data.Data[fid].(*SparseFloatVectorFieldData)

Check failure on line 837 in internal/storage/utils.go

GitHub Actions / Code Checker MacOS 12

impossible type assertion: data.Data[fid].(*SparseFloatVectorFieldData)
fieldData.AppendAllRows(field)
}