From a2fc6321613e789ecbd7d62e5ed002f0258a18c0 Mon Sep 17 00:00:00 2001 From: Cai Yudong Date: Tue, 24 Dec 2024 11:28:35 +0800 Subject: [PATCH] feat: Support restful & go sdk for Int8Vector Signed-off-by: Cai Yudong --- client/column/columns.go | 24 +++++++ client/column/conversion.go | 14 +++- client/column/vector.go | 33 +++++++++ client/column/vector_test.go | 55 +++++++++++++++ client/entity/field.go | 4 ++ client/entity/vectors.go | 22 +++++- client/entity/vectors_test.go | 11 +++ client/go.mod | 6 +- client/go.sum | 8 +-- client/milvusclient/write_options.go | 10 ++- client/milvusclient/write_test.go | 48 +++++++++++-- client/row/data.go | 8 +++ client/row/data_test.go | 14 +++- client/row/schema.go | 2 + .../proxy/httpserver/handler_v1_test.go | 24 ++++--- .../proxy/httpserver/handler_v2_test.go | 27 +++++--- .../distributed/proxy/httpserver/utils.go | 69 +++++++++++++++++++ .../proxy/httpserver/utils_test.go | 63 ++++++++++++++++- .../proxy/httpserver/wrap_request.go | 31 +++++++++ .../proxy/httpserver/wrap_request_test.go | 57 +++++++++++++++ tests/go_client/go.mod | 4 +- tests/go_client/go.sum | 8 +-- 22 files changed, 500 insertions(+), 42 deletions(-) diff --git a/client/column/columns.go b/client/column/columns.go index 3e04557d95d51..79669d8f279dd 100644 --- a/client/column/columns.go +++ b/client/column/columns.go @@ -284,6 +284,7 @@ func FieldDataColumn(fd *schemapb.FieldData, begin, end int) (Column, error) { vector = append(vector, v) } return NewColumnBFloat16Vector(fd.GetFieldName(), dim, vector), nil + case schemapb.DataType_SparseFloatVector: sparseVectors := fd.GetVectors().GetSparseFloatVector() if sparseVectors == nil { @@ -303,6 +304,29 @@ func FieldDataColumn(fd *schemapb.FieldData, begin, end int) (Column, error) { vectors = append(vectors, vector) } return NewColumnSparseVectors(fd.GetFieldName(), vectors), nil + + case schemapb.DataType_Int8Vector: + vectors := fd.GetVectors() + x, ok := vectors.GetData().(*schemapb.VectorField_Int8Vector) + if !ok { + return nil, errFieldDataTypeNotMatch + } + data := x.Int8Vector + dim := int(vectors.GetDim()) + if end < 0 { + end = len(data) / dim + } + vector := make([][]int8, 0, end-begin) // shall not have remanunt + // TODO caiyd: has better way to convert []byte to []int8 ? + for i := begin; i < end; i++ { + v := make([]int8, dim) + for j := 0; j < dim; j++ { + v[j] = int8(data[i*dim+j]) + } + vector = append(vector, v) + } + return NewColumnInt8Vector(fd.GetFieldName(), dim, vector), nil + default: return nil, fmt.Errorf("unsupported data type %s", fd.GetType()) } diff --git a/client/column/conversion.go b/client/column/conversion.go index 0eedccadbecdd..7fac3fda29613 100644 --- a/client/column/conversion.go +++ b/client/column/conversion.go @@ -126,7 +126,8 @@ func values2FieldData[T any](values []T, fieldType entity.FieldType, dim int) *s entity.FieldTypeFloat16Vector, entity.FieldTypeBFloat16Vector, entity.FieldTypeBinaryVector, - entity.FieldTypeSparseVector: + entity.FieldTypeSparseVector, + entity.FieldTypeInt8Vector: fd.Field = &schemapb.FieldData_Vectors{ Vectors: values2Vectors(values, fieldType, int64(dim)), } @@ -265,8 +266,17 @@ func values2Vectors[T any](values []T, fieldType entity.FieldType, dim int64) *s Contents: data, }, } + case entity.FieldTypeInt8Vector: + var vectors []entity.Int8Vector + vectors, ok = any(values).([]entity.Int8Vector) + data := make([]byte, 0, int64(len(vectors))*dim) + for _, vector := range vectors { + data = append(data, vector.Serialize()...) + } + vectorField.Data = &schemapb.VectorField_Int8Vector{ + Int8Vector: data, + } } - if !ok { panic(fmt.Sprintf("unexpected values type(%T) of fieldType %v", values, fieldType)) } diff --git a/client/column/vector.go b/client/column/vector.go index e1f7882cbb730..3f912cdfd18db 100644 --- a/client/column/vector.go +++ b/client/column/vector.go @@ -213,3 +213,36 @@ func (c *ColumnBFloat16Vector) Slice(start, end int) Column { vectorBase: c.vectorBase.slice(start, end), } } + +/* int8 vector */ + +type ColumnInt8Vector struct { + *vectorBase[entity.Int8Vector] +} + +func NewColumnInt8Vector(fieldName string, dim int, data [][]int8) *ColumnInt8Vector { + vectors := lo.Map(data, func(row []int8, _ int) entity.Int8Vector { return entity.Int8Vector(row) }) + return &ColumnInt8Vector{ + vectorBase: newVectorBase(fieldName, dim, vectors, entity.FieldTypeInt8Vector), + } +} + +// AppendValue appends vector value into values. +// override default type constrains, add `[]int8` conversion +func (c *ColumnInt8Vector) AppendValue(i interface{}) error { + switch vector := i.(type) { + case entity.Int8Vector: + c.values = append(c.values, vector) + case []int8: + c.values = append(c.values, vector) + default: + return errors.Newf("unexpected append value type %T, field type %v", vector, c.fieldType) + } + return nil +} + +func (c *ColumnInt8Vector) Slice(start, end int) Column { + return &ColumnInt8Vector{ + vectorBase: c.vectorBase.slice(start, end), + } +} diff --git a/client/column/vector_test.go b/client/column/vector_test.go index 70acebb12119d..31c63c3dbbc0d 100644 --- a/client/column/vector_test.go +++ b/client/column/vector_test.go @@ -26,6 +26,7 @@ import ( "github.com/stretchr/testify/suite" "github.com/milvus-io/milvus/client/v2/entity" + "github.com/milvus-io/milvus/pkg/util/typeutil" ) type VectorSuite struct { @@ -187,6 +188,38 @@ func (s *VectorSuite) TestBasic() { } } }) + + s.Run("int8_vector", func() { + name := fmt.Sprintf("field_%d", rand.Intn(1000)) + n := 3 + dim := rand.Intn(10) + 2 + data := make([][]int8, 0, n) + for i := 0; i < n; i++ { + row := lo.RepeatBy(dim, func(i int) int8 { + return int8(rand.Intn(256) - 128) + }) + data = append(data, row) + } + column := NewColumnInt8Vector(name, dim, data) + s.Equal(entity.FieldTypeInt8Vector, column.Type()) + s.Equal(name, column.Name()) + s.Equal(lo.Map(data, func(row []int8, _ int) entity.Int8Vector { return entity.Int8Vector(row) }), column.Data()) + s.Equal(dim, column.Dim()) + + fd := column.FieldData() + s.Equal(name, fd.GetFieldName()) + s.Equal(typeutil.Int8ArrayToBytes(lo.Flatten(data)), fd.GetVectors().GetInt8Vector()) + + result, err := FieldDataColumn(fd, 0, -1) + s.NoError(err) + parsed, ok := result.(*ColumnInt8Vector) + if s.True(ok) { + s.Equal(entity.FieldTypeInt8Vector, parsed.Type()) + s.Equal(name, parsed.Name()) + s.Equal(lo.Map(data, func(row []int8, _ int) entity.Int8Vector { return entity.Int8Vector(row) }), parsed.Data()) + s.Equal(dim, parsed.Dim()) + } + }) } func (s *VectorSuite) TestSlice() { @@ -277,6 +310,28 @@ func (s *VectorSuite) TestSlice() { s.Equal(lo.Map(data[:l], func(row []byte, _ int) entity.BFloat16Vector { return entity.BFloat16Vector(row) }), slicedColumn.Data()) } }) + + s.Run("int8_vector", func() { + name := fmt.Sprintf("field_%d", rand.Intn(1000)) + n := 100 + dim := rand.Intn(10) + 2 + data := make([][]int8, 0, n) + for i := 0; i < n; i++ { + row := lo.RepeatBy(dim, func(i int) int8 { + return int8(rand.Intn(256) - 128) + }) + data = append(data, row) + } + column := NewColumnInt8Vector(name, dim, data) + + l := rand.Intn(n) + sliced := column.Slice(0, l) + slicedColumn, ok := sliced.(*ColumnInt8Vector) + if s.True(ok) { + s.Equal(dim, slicedColumn.Dim()) + s.Equal(lo.Map(data[:l], func(row []int8, _ int) entity.Int8Vector { return entity.Int8Vector(row) }), slicedColumn.Data()) + } + }) } func TestVectors(t *testing.T) { diff --git a/client/entity/field.go b/client/entity/field.go index fa910d8e5c17a..9ef956ff8b32f 100644 --- a/client/entity/field.go +++ b/client/entity/field.go @@ -62,6 +62,8 @@ func (t FieldType) Name() string { return "Float16Vector" case FieldTypeBFloat16Vector: return "BFloat16Vector" + case FieldTypeInt8Vector: + return "Int8Vector" default: return "undefined" } @@ -100,6 +102,8 @@ func (t FieldType) String() string { return "[]byte" case FieldTypeBFloat16Vector: return "[]byte" + case FieldTypeInt8Vector: + return "[]int8" default: return "undefined" } diff --git a/client/entity/vectors.go b/client/entity/vectors.go index b8e101bc49c17..0b7bf002c4d27 100644 --- a/client/entity/vectors.go +++ b/client/entity/vectors.go @@ -56,7 +56,7 @@ func (fv FloatVector) ToBFloat16Vector() BFloat16Vector { return typeutil.Float32ArrayToBFloat16Bytes(fv) } -// FloatVector float32 vector wrapper. +// Float16Vector float16 vector wrapper. type Float16Vector []byte // Dim returns vector dimension. @@ -77,7 +77,7 @@ func (fv Float16Vector) ToFloat32Vector() FloatVector { return typeutil.Float16BytesToFloat32Vector(fv) } -// FloatVector float32 vector wrapper. +// BFloat16Vector bfloat16 vector wrapper. type BFloat16Vector []byte // Dim returns vector dimension. @@ -131,3 +131,21 @@ func (t Text) FieldType() FieldType { func (t Text) Serialize() []byte { return []byte(t) } + +// Int8Vector []int8 vector wrapper +type Int8Vector []int8 + +// Dim return vector dimension +func (iv Int8Vector) Dim() int { + return len(iv) +} + +// Serialize just return bytes +func (iv Int8Vector) Serialize() []byte { + return typeutil.Int8ArrayToBytes(iv) +} + +// entity.FieldType returns coresponding field type. +func (iv Int8Vector) FieldType() FieldType { + return FieldTypeInt8Vector +} diff --git a/client/entity/vectors_test.go b/client/entity/vectors_test.go index 861ab56563336..0c6eabbd2e5b3 100644 --- a/client/entity/vectors_test.go +++ b/client/entity/vectors_test.go @@ -92,4 +92,15 @@ func TestVectors(t *testing.T) { assert.Equal(t, dim*8, bv.Dim()) assert.ElementsMatch(t, raw, bv.Serialize()) }) + + t.Run("test int8 vector", func(t *testing.T) { + raw := make([]int8, dim) + for i := 0; i < dim; i++ { + raw[i] = int8(rand.Intn(256) - 128) + } + + iv := Int8Vector(raw) + assert.Equal(t, dim, iv.Dim()) + assert.Equal(t, dim, len(iv.Serialize())) + }) } diff --git a/client/go.mod b/client/go.mod index 07492c7b2bd9b..4a50c97f1ae50 100644 --- a/client/go.mod +++ b/client/go.mod @@ -6,14 +6,13 @@ require ( github.com/blang/semver/v4 v4.0.0 github.com/cockroachdb/errors v1.9.1 github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 - github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20241211060635-410431d7865b - github.com/milvus-io/milvus/pkg v0.0.2-0.20241126032235-cb6542339e84 + github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20250102080446-c3ba3d26a90f + github.com/milvus-io/milvus/pkg v0.0.2-0.20250115044500-f5234c3c11a3 github.com/quasilyte/go-ruleguard/dsl v0.3.22 github.com/samber/lo v1.27.0 github.com/stretchr/testify v1.9.0 github.com/tidwall/gjson v1.17.1 go.uber.org/atomic v1.10.0 - golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 google.golang.org/grpc v1.65.0 google.golang.org/protobuf v1.34.2 ) @@ -99,6 +98,7 @@ require ( go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect golang.org/x/crypto v0.31.0 // indirect + golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 // indirect golang.org/x/net v0.33.0 // indirect golang.org/x/sync v0.10.0 // indirect golang.org/x/sys v0.28.0 // indirect diff --git a/client/go.sum b/client/go.sum index d4d7e5a1601a5..70171495c36d1 100644 --- a/client/go.sum +++ b/client/go.sum @@ -318,10 +318,10 @@ github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfr github.com/mediocregopher/radix/v3 v3.4.2/go.mod h1:8FL3F6UQRXHXIBSPUs5h0RybMF8i4n7wVopoX3x7Bv8= github.com/microcosm-cc/bluemonday v1.0.2/go.mod h1:iVP4YcDBq+n/5fb23BhYFvIMq/leAFZyRl6bYmGDlGc= github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= -github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20241211060635-410431d7865b h1:iPPhnFx+s7FF53UeWj7A4EYhPRMFPL6mHqyQw7qRjeQ= -github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20241211060635-410431d7865b/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs= -github.com/milvus-io/milvus/pkg v0.0.2-0.20241126032235-cb6542339e84 h1:EAFxmxUVp5yYFDCrX1MQoSxkTO+ycy8NXEqEDEB3cRM= -github.com/milvus-io/milvus/pkg v0.0.2-0.20241126032235-cb6542339e84/go.mod h1:RATa0GS4jhkPpsYOvQ/QvcNz8rd+TlRPDiSyXQnMMxs= +github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20250102080446-c3ba3d26a90f h1:So6RKU5wqP/8EaKogicJP8gZ2SrzzS/JprusBaE3RKc= +github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20250102080446-c3ba3d26a90f/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs= +github.com/milvus-io/milvus/pkg v0.0.2-0.20250115044500-f5234c3c11a3 h1:WF9BkNk1XjLtwMbaB/cniRBMMNLnqG6e+AUbK8DciHQ= +github.com/milvus-io/milvus/pkg v0.0.2-0.20250115044500-f5234c3c11a3/go.mod h1:nxnHkDFB3jh27nTQJBaC4azAQO8chT03DkmoiZ5086s= github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= diff --git a/client/milvusclient/write_options.go b/client/milvusclient/write_options.go index 0a47fcef8833c..6d960a7cf4c87 100644 --- a/client/milvusclient/write_options.go +++ b/client/milvusclient/write_options.go @@ -97,7 +97,8 @@ func (opt *columnBasedDataOption) processInsertColumns(colSchema *entity.Schema, return nil, 0, fmt.Errorf("param column %s has type %v but collection field definition is %v", col.Name(), col.Type(), field.DataType) } if field.DataType == entity.FieldTypeFloatVector || field.DataType == entity.FieldTypeBinaryVector || - field.DataType == entity.FieldTypeFloat16Vector || field.DataType == entity.FieldTypeBFloat16Vector { + field.DataType == entity.FieldTypeFloat16Vector || field.DataType == entity.FieldTypeBFloat16Vector || + field.DataType == entity.FieldTypeInt8Vector { dim := 0 switch column := col.(type) { case *column.ColumnFloatVector: @@ -108,6 +109,8 @@ func (opt *columnBasedDataOption) processInsertColumns(colSchema *entity.Schema, dim = column.Dim() case *column.ColumnBFloat16Vector: dim = column.Dim() + case *column.ColumnInt8Vector: + dim = column.Dim() } if fmt.Sprintf("%d", dim) != field.TypeParams[entity.TypeParamDim] { return nil, 0, fmt.Errorf("params column %s vector dim %d not match collection definition, which has dim of %s", field.Name, dim, field.TypeParams[entity.TypeParamDim]) @@ -234,6 +237,11 @@ func (opt *columnBasedDataOption) WithBinaryVectorColumn(colName string, dim int return opt.WithColumns(column) } +func (opt *columnBasedDataOption) WithInt8VectorColumn(colName string, dim int, data [][]int8) *columnBasedDataOption { + column := column.NewColumnInt8Vector(colName, dim, data) + return opt.WithColumns(column) +} + func (opt *columnBasedDataOption) WithPartition(partitionName string) *columnBasedDataOption { opt.partitionName = partitionName return opt diff --git a/client/milvusclient/write_test.go b/client/milvusclient/write_test.go index 3e47d068d83fe..85fcf18726626 100644 --- a/client/milvusclient/write_test.go +++ b/client/milvusclient/write_test.go @@ -19,6 +19,7 @@ package milvusclient import ( "context" "fmt" + "math" "math/rand" "testing" @@ -45,13 +46,15 @@ func (s *WriteSuite) SetupSuite() { WithField(entity.NewField().WithName("id").WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true)). WithField(entity.NewField().WithName("vector").WithDataType(entity.FieldTypeFloatVector).WithDim(128)). WithField(entity.NewField().WithName("fp16_vector").WithDataType(entity.FieldTypeFloat16Vector).WithDim(128)). - WithField(entity.NewField().WithName("bf16_vector").WithDataType(entity.FieldTypeBFloat16Vector).WithDim(128)) + WithField(entity.NewField().WithName("bf16_vector").WithDataType(entity.FieldTypeBFloat16Vector).WithDim(128)). + WithField(entity.NewField().WithName("int8_vector").WithDataType(entity.FieldTypeInt8Vector).WithDim(128)) s.schemaDyn = entity.NewSchema().WithDynamicFieldEnabled(true). WithField(entity.NewField().WithName("id").WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true)). WithField(entity.NewField().WithName("vector").WithDataType(entity.FieldTypeFloatVector).WithDim(128)). WithField(entity.NewField().WithName("fp16_vector").WithDataType(entity.FieldTypeFloat16Vector).WithDim(128)). - WithField(entity.NewField().WithName("bf16_vector").WithDataType(entity.FieldTypeBFloat16Vector).WithDim(128)) + WithField(entity.NewField().WithName("bf16_vector").WithDataType(entity.FieldTypeBFloat16Vector).WithDim(128)). + WithField(entity.NewField().WithName("int8_vector").WithDataType(entity.FieldTypeInt8Vector).WithDim(128)) } func (s *WriteSuite) TestInsert() { @@ -66,7 +69,7 @@ func (s *WriteSuite) TestInsert() { s.mock.EXPECT().Insert(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, ir *milvuspb.InsertRequest) (*milvuspb.MutationResult, error) { s.Equal(collName, ir.GetCollectionName()) s.Equal(partName, ir.GetPartitionName()) - s.Require().Len(ir.GetFieldsData(), 4) + s.Require().Len(ir.GetFieldsData(), 5) s.EqualValues(3, ir.GetNumRows()) return &milvuspb.MutationResult{ Status: merr.Success(), @@ -91,6 +94,9 @@ func (s *WriteSuite) TestInsert() { WithBFloat16VectorColumn("bf16_vector", 128, lo.RepeatBy(3, func(i int) []float32 { return lo.RepeatBy(128, func(i int) float32 { return rand.Float32() }) })). + WithInt8VectorColumn("int8_vector", 128, lo.RepeatBy(3, func(i int) []int8 { + return lo.RepeatBy(128, func(i int) int8 { return int8(rand.Intn(math.MaxUint8) - 128) }) + })). WithInt64Column("id", []int64{1, 2, 3}).WithPartition(partName)) s.NoError(err) s.EqualValues(3, result.InsertCount) @@ -104,7 +110,7 @@ func (s *WriteSuite) TestInsert() { s.mock.EXPECT().Insert(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, ir *milvuspb.InsertRequest) (*milvuspb.MutationResult, error) { s.Equal(collName, ir.GetCollectionName()) s.Equal(partName, ir.GetPartitionName()) - s.Require().Len(ir.GetFieldsData(), 5) + s.Require().Len(ir.GetFieldsData(), 6) s.EqualValues(3, ir.GetNumRows()) return &milvuspb.MutationResult{ Status: merr.Success(), @@ -129,6 +135,9 @@ func (s *WriteSuite) TestInsert() { WithBFloat16VectorColumn("bf16_vector", 128, lo.RepeatBy(3, func(i int) []float32 { return lo.RepeatBy(128, func(i int) float32 { return rand.Float32() }) })). + WithInt8VectorColumn("int8_vector", 128, lo.RepeatBy(3, func(i int) []int8 { + return lo.RepeatBy(128, func(i int) int8 { return int8(rand.Intn(math.MaxUint8) - 128) }) + })). WithVarcharColumn("extra", []string{"a", "b", "c"}). WithInt64Column("id", []int64{1, 2, 3}).WithPartition(partName)) s.NoError(err) @@ -165,6 +174,9 @@ func (s *WriteSuite) TestInsert() { })). WithBFloat16VectorColumn("bf16_vector", 128, lo.RepeatBy(3, func(i int) []float32 { return lo.RepeatBy(128, func(i int) float32 { return rand.Float32() }) + })). + WithInt8VectorColumn("int8_vector", 128, lo.RepeatBy(3, func(i int) []int8 { + return lo.RepeatBy(128, func(i int) int8 { return int8(rand.Intn(math.MaxUint8) - 128) }) })), }, { @@ -179,6 +191,9 @@ func (s *WriteSuite) TestInsert() { })). WithBFloat16VectorColumn("bf16_vector", 128, lo.RepeatBy(3, func(i int) []float32 { return lo.RepeatBy(128, func(i int) float32 { return rand.Float32() }) + })). + WithInt8VectorColumn("int8_vector", 128, lo.RepeatBy(3, func(i int) []int8 { + return lo.RepeatBy(128, func(i int) int8 { return int8(rand.Intn(math.MaxUint8) - 128) }) })), }, } @@ -207,6 +222,9 @@ func (s *WriteSuite) TestInsert() { WithBFloat16VectorColumn("bf16_vector", 128, lo.RepeatBy(3, func(i int) []float32 { return lo.RepeatBy(128, func(i int) float32 { return rand.Float32() }) })). + WithInt8VectorColumn("int8_vector", 128, lo.RepeatBy(3, func(i int) []int8 { + return lo.RepeatBy(128, func(i int) int8 { return int8(rand.Intn(math.MaxUint8) - 128) }) + })). WithInt64Column("id", []int64{1, 2, 3})) s.Error(err) }) @@ -224,7 +242,7 @@ func (s *WriteSuite) TestUpsert() { s.mock.EXPECT().Upsert(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, ur *milvuspb.UpsertRequest) (*milvuspb.MutationResult, error) { s.Equal(collName, ur.GetCollectionName()) s.Equal(partName, ur.GetPartitionName()) - s.Require().Len(ur.GetFieldsData(), 4) + s.Require().Len(ur.GetFieldsData(), 5) s.EqualValues(3, ur.GetNumRows()) return &milvuspb.MutationResult{ Status: merr.Success(), @@ -249,6 +267,9 @@ func (s *WriteSuite) TestUpsert() { WithBFloat16VectorColumn("bf16_vector", 128, lo.RepeatBy(3, func(i int) []float32 { return lo.RepeatBy(128, func(i int) float32 { return rand.Float32() }) })). + WithInt8VectorColumn("int8_vector", 128, lo.RepeatBy(3, func(i int) []int8 { + return lo.RepeatBy(128, func(i int) int8 { return int8(rand.Intn(math.MaxUint8) - 128) }) + })). WithInt64Column("id", []int64{1, 2, 3}).WithPartition(partName)) s.NoError(err) s.EqualValues(3, result.UpsertCount) @@ -262,7 +283,7 @@ func (s *WriteSuite) TestUpsert() { s.mock.EXPECT().Upsert(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, ur *milvuspb.UpsertRequest) (*milvuspb.MutationResult, error) { s.Equal(collName, ur.GetCollectionName()) s.Equal(partName, ur.GetPartitionName()) - s.Require().Len(ur.GetFieldsData(), 5) + s.Require().Len(ur.GetFieldsData(), 6) s.EqualValues(3, ur.GetNumRows()) return &milvuspb.MutationResult{ Status: merr.Success(), @@ -287,6 +308,9 @@ func (s *WriteSuite) TestUpsert() { WithBFloat16VectorColumn("bf16_vector", 128, lo.RepeatBy(3, func(i int) []float32 { return lo.RepeatBy(128, func(i int) float32 { return rand.Float32() }) })). + WithInt8VectorColumn("int8_vector", 128, lo.RepeatBy(3, func(i int) []int8 { + return lo.RepeatBy(128, func(i int) int8 { return int8(rand.Intn(math.MaxUint8) - 128) }) + })). WithVarcharColumn("extra", []string{"a", "b", "c"}). WithInt64Column("id", []int64{1, 2, 3}).WithPartition(partName)) s.NoError(err) @@ -314,6 +338,9 @@ func (s *WriteSuite) TestUpsert() { })). WithBFloat16VectorColumn("bf16_vector", 128, lo.RepeatBy(3, func(i int) []float32 { return lo.RepeatBy(128, func(i int) float32 { return rand.Float32() }) + })). + WithInt8VectorColumn("int8_vector", 128, lo.RepeatBy(3, func(i int) []int8 { + return lo.RepeatBy(128, func(i int) int8 { return int8(rand.Intn(math.MaxUint8) - 128) }) })), }, { @@ -329,6 +356,9 @@ func (s *WriteSuite) TestUpsert() { })). WithBFloat16VectorColumn("bf16_vector", 128, lo.RepeatBy(3, func(i int) []float32 { return lo.RepeatBy(128, func(i int) float32 { return rand.Float32() }) + })). + WithInt8VectorColumn("int8_vector", 128, lo.RepeatBy(3, func(i int) []int8 { + return lo.RepeatBy(128, func(i int) int8 { return int8(rand.Intn(math.MaxUint8) - 128) }) })), }, { @@ -343,6 +373,9 @@ func (s *WriteSuite) TestUpsert() { })). WithBFloat16VectorColumn("bf16_vector", 128, lo.RepeatBy(3, func(i int) []float32 { return lo.RepeatBy(128, func(i int) float32 { return rand.Float32() }) + })). + WithInt8VectorColumn("int8_vector", 128, lo.RepeatBy(3, func(i int) []int8 { + return lo.RepeatBy(128, func(i int) int8 { return int8(rand.Intn(math.MaxUint8) - 128) }) })), }, } @@ -371,6 +404,9 @@ func (s *WriteSuite) TestUpsert() { WithBFloat16VectorColumn("bf16_vector", 128, lo.RepeatBy(3, func(i int) []float32 { return lo.RepeatBy(128, func(i int) float32 { return rand.Float32() }) })). + WithInt8VectorColumn("int8_vector", 128, lo.RepeatBy(3, func(i int) []int8 { + return lo.RepeatBy(128, func(i int) int8 { return int8(rand.Intn(math.MaxUint8) - 128) }) + })). WithInt64Column("id", []int64{1, 2, 3})) s.Error(err) }) diff --git a/client/row/data.go b/client/row/data.go index 448509be911e4..998efe7bebbe3 100644 --- a/client/row/data.go +++ b/client/row/data.go @@ -171,6 +171,14 @@ func AnyToColumns(rows []interface{}, schemas ...*entity.Schema) ([]column.Colum data := make([]entity.SparseEmbedding, 0, rowsLen) col := column.NewColumnSparseVectors(field.Name, data) nameColumns[field.Name] = col + case entity.FieldTypeInt8Vector: + data := make([][]int8, 0, rowsLen) + dim, err := field.GetDim() + if err != nil { + return []column.Column{}, err + } + col := column.NewColumnInt8Vector(field.Name, int(dim), data) + nameColumns[field.Name] = col } } diff --git a/client/row/data_test.go b/client/row/data_test.go index 87e73946d8d11..064475dcbe4ce 100644 --- a/client/row/data_test.go +++ b/client/row/data_test.go @@ -61,7 +61,7 @@ func (s *RowsSuite) TestRowsToColumns() { s.Equal("Vector", columns[0].Name()) }) - s.Run("fp16", func() { + s.Run("bf16", func() { type BF16Struct struct { ID int64 `milvus:"primary_key;auto_id"` Vector []byte `milvus:"dim:16;vector_type:bf16"` @@ -85,6 +85,18 @@ func (s *RowsSuite) TestRowsToColumns() { s.Equal(entity.FieldTypeFloat16Vector, columns[0].Type()) }) + s.Run("int8", func() { + type Int8Struct struct { + ID int64 `milvus:"primary_key;auto_id"` + Vector []int8 `milvus:"dim:16;vector_type:int8"` + } + columns, err := AnyToColumns([]any{&Int8Struct{}}) + s.Nil(err) + s.Require().Equal(1, len(columns)) + s.Equal("Vector", columns[0].Name()) + s.Equal(entity.FieldTypeInt8Vector, columns[0].Type()) + }) + s.Run("invalid_cases", func() { // empty input _, err := AnyToColumns([]any{}) diff --git a/client/row/schema.go b/client/row/schema.go index ab1f57bb007e3..bd0ee77256597 100644 --- a/client/row/schema.go +++ b/client/row/schema.go @@ -147,6 +147,8 @@ func ParseSchema(r interface{}) (*entity.Schema, error) { } case reflect.Float32: field.DataType = entity.FieldTypeFloatVector + case reflect.Int8: + field.DataType = entity.FieldTypeInt8Vector default: return nil, fmt.Errorf("field %s is slice of %v, which is not supported", f.Name, elemType) } diff --git a/internal/distributed/proxy/httpserver/handler_v1_test.go b/internal/distributed/proxy/httpserver/handler_v1_test.go index 00d7ee7a3fef3..973c32590cc1e 100644 --- a/internal/distributed/proxy/httpserver/handler_v1_test.go +++ b/internal/distributed/proxy/httpserver/handler_v1_test.go @@ -1277,7 +1277,8 @@ func TestFp16Bf16VectorsV1(t *testing.T) { "binaryVector": "AQ==", "float16Vector": [3.0], "bfloat16Vector": [4.4, 442], - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), @@ -1296,7 +1297,8 @@ func TestFp16Bf16VectorsV1(t *testing.T) { "binaryVector": "AQ==", "float16Vector": [3, 3.0], "bfloat16Vector": [4.4, 442], - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), @@ -1315,7 +1317,8 @@ func TestFp16Bf16VectorsV1(t *testing.T) { "binaryVector": "AQ==", "float16Vector": [3, 3], "bfloat16Vector": [4.4, 442], - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), @@ -1333,7 +1336,8 @@ func TestFp16Bf16VectorsV1(t *testing.T) { "binaryVector": "AQ==", "float16Vector": "AQIDBA==", "bfloat16Vector": "AQIDBA==", - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), @@ -1351,7 +1355,8 @@ func TestFp16Bf16VectorsV1(t *testing.T) { "binaryVector": "AQ==", "float16Vector": [3, 3.0, 3], "bfloat16Vector": [4.4, 44], - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), @@ -1370,7 +1375,8 @@ func TestFp16Bf16VectorsV1(t *testing.T) { "binaryVector": "AQ==", "float16Vector": [3, 3.0], "bfloat16Vector": [4.4, 442, 44], - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), @@ -1389,7 +1395,8 @@ func TestFp16Bf16VectorsV1(t *testing.T) { "binaryVector": "AQ==", "float16Vector": "AQIDBA==", "bfloat16Vector": [4.4, 442], - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] }, { "book_id": 1, @@ -1398,7 +1405,8 @@ func TestFp16Bf16VectorsV1(t *testing.T) { "binaryVector": "AQ==", "float16Vector": [3.1, 3.1], "bfloat16Vector": "AQIDBA==", - "sparseFloatVector": {"3": 1.1, "2": 0.44} + "sparseFloatVector": {"3": 1.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), diff --git a/internal/distributed/proxy/httpserver/handler_v2_test.go b/internal/distributed/proxy/httpserver/handler_v2_test.go index 2cbdb22131741..28422ce1c06ed 100644 --- a/internal/distributed/proxy/httpserver/handler_v2_test.go +++ b/internal/distributed/proxy/httpserver/handler_v2_test.go @@ -2160,10 +2160,13 @@ func generateCollectionSchemaWithVectorFields() *schemapb.CollectionSchema { bfloat16VectorField.Name = "bfloat16Vector" sparseFloatVectorField := generateVectorFieldSchema(schemapb.DataType_SparseFloatVector) sparseFloatVectorField.Name = "sparseFloatVector" + int8VectorField := generateVectorFieldSchema(schemapb.DataType_Int8Vector) + int8VectorField.Name = "int8Vector" collSchema.Fields = append(collSchema.Fields, binaryVectorField) collSchema.Fields = append(collSchema.Fields, float16VectorField) collSchema.Fields = append(collSchema.Fields, bfloat16VectorField) collSchema.Fields = append(collSchema.Fields, sparseFloatVectorField) + collSchema.Fields = append(collSchema.Fields, int8VectorField) return collSchema } @@ -2191,7 +2194,8 @@ func TestFp16Bf16VectorsV2(t *testing.T) { "binaryVector": "AQ==", "float16Vector": [3.0], "bfloat16Vector": [4.4, 442], - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), @@ -2210,7 +2214,8 @@ func TestFp16Bf16VectorsV2(t *testing.T) { "binaryVector": "AQ==", "float16Vector": [3, 3.0], "bfloat16Vector": [4.4, 442], - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), @@ -2228,7 +2233,8 @@ func TestFp16Bf16VectorsV2(t *testing.T) { "binaryVector": "AQ==", "float16Vector": [3, 3], "bfloat16Vector": [4.4, 442], - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), @@ -2245,7 +2251,8 @@ func TestFp16Bf16VectorsV2(t *testing.T) { "binaryVector": "AQ==", "float16Vector": "AQIDBA==", "bfloat16Vector": "AQIDBA==", - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), @@ -2262,7 +2269,8 @@ func TestFp16Bf16VectorsV2(t *testing.T) { "binaryVector": "AQ==", "float16Vector": [3, 3.0, 3], "bfloat16Vector": [4.4, 44], - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), @@ -2281,7 +2289,8 @@ func TestFp16Bf16VectorsV2(t *testing.T) { "binaryVector": "AQ==", "float16Vector": [3, 3.0], "bfloat16Vector": [4.4, 442, 44], - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), @@ -2300,7 +2309,8 @@ func TestFp16Bf16VectorsV2(t *testing.T) { "binaryVector": "AQ==", "float16Vector": "AQIDBA==", "bfloat16Vector": [4.4, 442], - "sparseFloatVector": {"1": 0.1, "2": 0.44} + "sparseFloatVector": {"1": 0.1, "2": 0.44}, + "int8Vector": [1, 2] }, { "book_id": 1, @@ -2309,7 +2319,8 @@ func TestFp16Bf16VectorsV2(t *testing.T) { "binaryVector": "AQ==", "float16Vector": [3.1, 3.1], "bfloat16Vector": "AQIDBA==", - "sparseFloatVector": {"3": 1.1, "2": 0.44} + "sparseFloatVector": {"3": 1.1, "2": 0.44}, + "int8Vector": [1, 2] } ] }`), diff --git a/internal/distributed/proxy/httpserver/utils.go b/internal/distributed/proxy/httpserver/utils.go index f7276679b9ec3..7141cc8a7730d 100644 --- a/internal/distributed/proxy/httpserver/utils.go +++ b/internal/distributed/proxy/httpserver/utils.go @@ -397,6 +397,16 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, } else { return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, "invalid vector field: "+fieldName), reallyDataArray, validDataMap } + case schemapb.DataType_Int8Vector: + if dataString == "" { + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], "", "missing vector field: "+fieldName), reallyDataArray, validDataMap + } + var vectorArray []int8 + err := json.Unmarshal([]byte(dataString), &vectorArray) + if err != nil { + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray, validDataMap + } + reallyData[fieldName] = vectorArray case schemapb.DataType_Bool: result, err := cast.ToBoolE(dataString) if err != nil { @@ -664,6 +674,20 @@ func convertBinaryVectorToArray(vector [][]byte, dim int64, dataType schemapb.Da return binaryArray, nil } +func convertInt8VectorToArray(vector [][]int8, dim int64) ([]byte, error) { + byteArray := make([]byte, 0) + for _, arr := range vector { + if int64(len(arr)) != dim { + return nil, fmt.Errorf("[]int8 size %d doesn't equal to vector dimension %d of %s", + len(arr), dim, schemapb.DataType_name[int32(schemapb.DataType_Int8Vector)]) + } + for i := int64(0); i < dim; i++ { + byteArray = append(byteArray, byte(arr[i])) + } + } + return byteArray, nil +} + type fieldCandi struct { name string v reflect.Value @@ -770,6 +794,10 @@ func anyToColumns(rows []map[string]interface{}, validDataMap map[string][]bool, case schemapb.DataType_SparseFloatVector: data = make([][]byte, 0, rowsLen) nameDims[field.Name] = int64(0) + case schemapb.DataType_Int8Vector: + data = make([][]int8, 0, rowsLen) + dim, _ := getDim(field) + nameDims[field.Name] = dim default: return nil, fmt.Errorf("the type(%v) of field(%v) is not supported, use other sdk please", field.DataType, field.Name) } @@ -871,6 +899,8 @@ func anyToColumns(rows []map[string]interface{}, validDataMap map[string][]bool, nameDims[field.Name] = rowSparseDim } nameColumns[field.Name] = append(nameColumns[field.Name].([][]byte), content) + case schemapb.DataType_Int8Vector: + nameColumns[field.Name] = append(nameColumns[field.Name].([][]int8), candi.v.Interface().([]int8)) default: return nil, fmt.Errorf("the type(%v) of field(%v) is not supported, use other sdk please", field.DataType, field.Name) } @@ -1074,6 +1104,20 @@ func anyToColumns(rows []map[string]interface{}, validDataMap map[string][]bool, }, }, } + case schemapb.DataType_Int8Vector: + dim := nameDims[name] + arr, err := convertInt8VectorToArray(column.([][]int8), dim) + if err != nil { + return nil, err + } + colData.Field = &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: dim, + Data: &schemapb.VectorField_Int8Vector{ + Int8Vector: arr, + }, + }, + } default: return nil, fmt.Errorf("the type(%v) of field(%v) is not supported, use other sdk please", colData.Type, name) } @@ -1163,6 +1207,24 @@ func serializeSparseFloatVectors(vectors []gjson.Result, dataType schemapb.DataT return values, nil } +func serializeInt8Vectors(vectorStr string, dataType schemapb.DataType, dimension int64, int8ArrayToBytesFunc func([]int8) []byte) ([][]byte, error) { + var int8Values [][]int8 + err := json.Unmarshal([]byte(vectorStr), &int8Values) + if err != nil { + return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vectorStr, err.Error()) + } + values := make([][]byte, 0, len(int8Values)) + for _, vectorArray := range int8Values { + if int64(len(vectorArray)) != dimension { + return nil, merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(dataType)], vectorStr, + fmt.Sprintf("dimension: %d, but length of []int8: %d", dimension, len(vectorArray))) + } + vectorBytes := int8ArrayToBytesFunc(vectorArray) + values = append(values, vectorBytes) + } + return values, nil +} + func convertQueries2Placeholder(body string, dataType schemapb.DataType, dimension int64) (*commonpb.PlaceholderValue, error) { var valueType commonpb.PlaceholderType var values [][]byte @@ -1183,6 +1245,9 @@ func convertQueries2Placeholder(body string, dataType schemapb.DataType, dimensi case schemapb.DataType_SparseFloatVector: valueType = commonpb.PlaceholderType_SparseFloatVector values, err = serializeSparseFloatVectors(gjson.Get(body, HTTPRequestData).Array(), dataType) + case schemapb.DataType_Int8Vector: + valueType = commonpb.PlaceholderType_Int8Vector + values, err = serializeInt8Vectors(gjson.Get(body, HTTPRequestData).Raw, dataType, dimension, typeutil.Int8ArrayToBytes) case schemapb.DataType_VarChar: valueType = commonpb.PlaceholderType_VarChar res := gjson.Get(body, HTTPRequestData).Array() @@ -1280,6 +1345,8 @@ func buildQueryResp(rowsNum int64, needFields []string, fieldDataList []*schemap rowsNum = int64(len(fieldDataList[0].GetVectors().GetBfloat16Vector())/2) / fieldDataList[0].GetVectors().GetDim() case schemapb.DataType_SparseFloatVector: rowsNum = int64(len(fieldDataList[0].GetVectors().GetSparseFloatVector().Contents)) + case schemapb.DataType_Int8Vector: + rowsNum = int64(len(fieldDataList[0].GetVectors().GetInt8Vector())) / fieldDataList[0].GetVectors().GetDim() default: return nil, fmt.Errorf("the type(%v) of field(%v) is not supported, use other sdk please", fieldDataList[0].Type, fieldDataList[0].FieldName) } @@ -1374,6 +1441,8 @@ func buildQueryResp(rowsNum int64, needFields []string, fieldDataList []*schemap row[fieldDataList[j].FieldName] = fieldDataList[j].GetVectors().GetBfloat16Vector()[i*(fieldDataList[j].GetVectors().GetDim()*2) : (i+1)*(fieldDataList[j].GetVectors().GetDim()*2)] case schemapb.DataType_SparseFloatVector: row[fieldDataList[j].FieldName] = typeutil.SparseFloatBytesToMap(fieldDataList[j].GetVectors().GetSparseFloatVector().Contents[i]) + case schemapb.DataType_Int8Vector: + row[fieldDataList[j].FieldName] = fieldDataList[j].GetVectors().GetInt8Vector()[i*fieldDataList[j].GetVectors().GetDim() : (i+1)*fieldDataList[j].GetVectors().GetDim()] case schemapb.DataType_Array: if len(fieldDataList[j].ValidData) != 0 && !fieldDataList[j].ValidData[i] { row[fieldDataList[j].FieldName] = nil diff --git a/internal/distributed/proxy/httpserver/utils_test.go b/internal/distributed/proxy/httpserver/utils_test.go index 9ac84c07e25a5..0b87e5661e394 100644 --- a/internal/distributed/proxy/httpserver/utils_test.go +++ b/internal/distributed/proxy/httpserver/utils_test.go @@ -287,6 +287,20 @@ func generateVectorFieldData(vectorType schemapb.DataType) schemapb.FieldData { }, IsDynamic: false, } + case schemapb.DataType_Int8Vector: + return schemapb.FieldData{ + Type: schemapb.DataType_Int8Vector, + FieldName: FieldBookIntro, + Field: &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: 2, + Data: &schemapb.VectorField_Int8Vector{ + Int8Vector: []byte{0x00, 0x1, 0x2, 0x3, 0x4, 0x5}, + }, + }, + }, + IsDynamic: false, + } default: panic("unsupported vector type") } @@ -735,6 +749,8 @@ func TestCheckAndSetData(t *testing.T) { float16VectorField.Name = "float16Vector" bfloat16VectorField := generateVectorFieldSchema(schemapb.DataType_BFloat16Vector) bfloat16VectorField.Name = "bfloat16Vector" + int8VectorField := generateVectorFieldSchema(schemapb.DataType_Int8Vector) + int8VectorField.Name = "int8Vector" err, _, _ = checkAndSetData(body, &schemapb.CollectionSchema{ Name: DefaultCollectionName, Fields: []*schemapb.FieldSchema{ @@ -771,6 +787,15 @@ func TestCheckAndSetData(t *testing.T) { }) assert.Error(t, err) assert.Equal(t, true, strings.HasPrefix(err.Error(), "missing vector field")) + err, _, _ = checkAndSetData(body, &schemapb.CollectionSchema{ + Name: DefaultCollectionName, + Fields: []*schemapb.FieldSchema{ + primaryField, int8VectorField, + }, + EnableDynamicField: true, + }) + assert.Error(t, err) + assert.Equal(t, true, strings.HasPrefix(err.Error(), "missing vector field")) }) t.Run("with pk when autoID == True when upsert", func(t *testing.T) { @@ -974,6 +999,27 @@ func TestSerialize(t *testing.T) { }) assert.Nil(t, err) } + + { + request := map[string]interface{}{ + HTTPRequestData: []interface{}{ + []int8{1, 2}, + }, + } + requestBody, _ := json.Marshal(request) + values, err = serializeInt8Vectors(gjson.Get(string(requestBody), HTTPRequestData).Raw, schemapb.DataType_Int8Vector, 2, typeutil.Int8ArrayToBytes) + assert.Nil(t, err) + placeholderValue = &commonpb.PlaceholderValue{ + Tag: "$0", + Values: values, + } + _, err = proto.Marshal(&commonpb.PlaceholderGroup{ + Placeholders: []*commonpb.PlaceholderValue{ + placeholderValue, + }, + }) + assert.Nil(t, err) + } } func TestConvertQueries2Placeholder(t *testing.T) { @@ -1611,6 +1657,9 @@ func newFieldData(fieldDatas []*schemapb.FieldData, firstFieldType schemapb.Data case schemapb.DataType_BFloat16Vector: vectorField := generateVectorFieldData(firstFieldType) return []*schemapb.FieldData{&vectorField} + case schemapb.DataType_Int8Vector: + vectorField := generateVectorFieldData(firstFieldType) + return []*schemapb.FieldData{&vectorField} case schemapb.DataType_Array: return []*schemapb.FieldData{&fieldData10} case schemapb.DataType_JSON: @@ -1850,6 +1899,9 @@ func newNullableFieldData(fieldDatas []*schemapb.FieldData, firstFieldType schem case schemapb.DataType_BFloat16Vector: vectorField := generateVectorFieldData(firstFieldType) return []*schemapb.FieldData{&vectorField} + case schemapb.DataType_Int8Vector: + vectorField := generateVectorFieldData(firstFieldType) + return []*schemapb.FieldData{&vectorField} case schemapb.DataType_Array: return []*schemapb.FieldData{&fieldData10} case schemapb.DataType_JSON: @@ -2047,6 +2099,7 @@ func TestVector(t *testing.T) { float16Vector := "vector-float16" bfloat16Vector := "vector-bfloat16" sparseFloatVector := "vector-sparse-float" + int8Vector := "vector-int8" testcaseRows := []map[string]interface{}{ { FieldBookID: int64(1), @@ -2055,6 +2108,7 @@ func TestVector(t *testing.T) { float16Vector: []byte{1, 1, 11, 11}, bfloat16Vector: []byte{1, 1, 11, 11}, sparseFloatVector: map[uint32]float32{0: 0.1, 1: 0.11}, + int8Vector: []int8{1, 11}, }, { FieldBookID: int64(2), @@ -2063,6 +2117,7 @@ func TestVector(t *testing.T) { float16Vector: []byte{2, 2, 22, 22}, bfloat16Vector: []byte{2, 2, 22, 22}, sparseFloatVector: map[uint32]float32{1000: 0.3, 200: 0.44}, + int8Vector: []int8{2, 22}, }, { FieldBookID: int64(3), @@ -2071,6 +2126,7 @@ func TestVector(t *testing.T) { float16Vector: []byte{3, 3, 33, 33}, bfloat16Vector: []byte{3, 3, 33, 33}, sparseFloatVector: map[uint32]float32{987621: 32190.31, 32189: 0.0001}, + int8Vector: []int8{3, 33}, }, { FieldBookID: int64(4), @@ -2079,6 +2135,7 @@ func TestVector(t *testing.T) { float16Vector: []float32{0.4, 0.44}, bfloat16Vector: []float32{0.4, 0.44}, sparseFloatVector: map[uint32]float32{25: 0.1, 1: 0.11}, + int8Vector: []int8{4, 44}, }, { FieldBookID: int64(5), @@ -2087,6 +2144,7 @@ func TestVector(t *testing.T) { float16Vector: []int64{99999999, -99999999}, bfloat16Vector: []int64{99999999, -99999999}, sparseFloatVector: map[uint32]float32{1121: 0.1, 3: 0.11}, + int8Vector: []int8{-128, 127}, }, } body, err := wrapRequestBody(testcaseRows) @@ -2102,6 +2160,8 @@ func TestVector(t *testing.T) { bfloat16VectorField.Name = bfloat16Vector sparseFloatVectorField := generateVectorFieldSchema(schemapb.DataType_SparseFloatVector) sparseFloatVectorField.Name = sparseFloatVector + int8VectorField := generateVectorFieldSchema(schemapb.DataType_Int8Vector) + int8VectorField.Name = int8Vector collectionSchema := &schemapb.CollectionSchema{ Name: DefaultCollectionName, Description: "", @@ -2167,7 +2227,8 @@ func TestBuildQueryResps(t *testing.T) { } dataTypes := []schemapb.DataType{ - schemapb.DataType_FloatVector, schemapb.DataType_BinaryVector, schemapb.DataType_Float16Vector, schemapb.DataType_BFloat16Vector, schemapb.DataType_SparseFloatVector, + schemapb.DataType_FloatVector, schemapb.DataType_BinaryVector, schemapb.DataType_Float16Vector, + schemapb.DataType_BFloat16Vector, schemapb.DataType_SparseFloatVector, schemapb.DataType_Int8Vector, schemapb.DataType_Bool, schemapb.DataType_Int8, schemapb.DataType_Int16, schemapb.DataType_Int32, schemapb.DataType_Float, schemapb.DataType_Double, schemapb.DataType_String, schemapb.DataType_VarChar, diff --git a/internal/distributed/proxy/httpserver/wrap_request.go b/internal/distributed/proxy/httpserver/wrap_request.go index 2536d0fcd3678..3d04c886879b3 100644 --- a/internal/distributed/proxy/httpserver/wrap_request.go +++ b/internal/distributed/proxy/httpserver/wrap_request.go @@ -310,6 +310,37 @@ func (f *FieldData) AsSchemapb() (*schemapb.FieldData, error) { }, }, } + case schemapb.DataType_Int8Vector: + wrappedData := [][]int8{} + err := json.Unmarshal(raw, &wrappedData) + if err != nil { + return nil, newFieldDataError(f.FieldName, err) + } + if len(wrappedData) < 1 { + return nil, errors.New("at least one row for insert") + } + array0 := wrappedData[0] + dim := len(array0) + if dim < 1 { + return nil, errors.New("dim must >= 1") + } + data := make([]byte, len(wrappedData)*dim) + + var i int + for _, dataArray := range wrappedData { + for _, v := range dataArray { + data[i] = byte(v) + i++ + } + } + ret.Field = &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: int64(dim), + Data: &schemapb.VectorField_Int8Vector{ + Int8Vector: data, + }, + }, + } default: return nil, errors.New("unsupported data type") } diff --git a/internal/distributed/proxy/httpserver/wrap_request_test.go b/internal/distributed/proxy/httpserver/wrap_request_test.go index e4026119029dd..0f05039bff344 100644 --- a/internal/distributed/proxy/httpserver/wrap_request_test.go +++ b/internal/distributed/proxy/httpserver/wrap_request_test.go @@ -345,6 +345,63 @@ func TestFieldData_AsSchemapb(t *testing.T) { _, err := fieldData.AsSchemapb() assert.Error(t, err) }) + + t.Run("int8vector_ok_1", func(t *testing.T) { + fieldData := FieldData{ + Type: schemapb.DataType_Int8Vector, + Field: []byte(`[ + [1, 2, 3, 4], + [-11, -52, 37, 121], + [-128, -35, 31, 127] + ]`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.NoError(t, err) + }) + t.Run("int8vector_ok_1", func(t *testing.T) { + fieldData := FieldData{ + Type: schemapb.DataType_Int8Vector, + Field: []byte(`[ + [-200, 141] + ]`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.Error(t, err) + }) + t.Run("int8vector_empty_err", func(t *testing.T) { + fieldData := FieldData{ + Type: schemapb.DataType_Int8Vector, + Field: []byte(""), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.Error(t, err) + }) + t.Run("int8vector_dim0_err", func(t *testing.T) { + fieldData := FieldData{ + Type: schemapb.DataType_Int8Vector, + Field: []byte(`[]`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.Error(t, err) + }) + t.Run("int8vector_datatype_err", func(t *testing.T) { + fieldData := FieldData{ + Type: schemapb.DataType_Int8Vector, + Field: []byte(`['a', 'b', 'c']`), + } + raw, _ := json.Marshal(fieldData) + json.Unmarshal(raw, &fieldData) + _, err := fieldData.AsSchemapb() + assert.Error(t, err) + }) } func Test_vector2Bytes(t *testing.T) { diff --git a/tests/go_client/go.mod b/tests/go_client/go.mod index 26d4204b02752..29f330bf52d36 100644 --- a/tests/go_client/go.mod +++ b/tests/go_client/go.mod @@ -6,7 +6,7 @@ toolchain go1.21.11 require ( github.com/milvus-io/milvus/client/v2 v2.0.0-20241125024034-0b9edb62a92d - github.com/milvus-io/milvus/pkg v0.0.2-0.20241126032235-cb6542339e84 + github.com/milvus-io/milvus/pkg v0.0.2-0.20250115044500-f5234c3c11a3 github.com/quasilyte/go-ruleguard/dsl v0.3.22 github.com/stretchr/testify v1.9.0 github.com/x448/float16 v0.8.4 @@ -52,7 +52,7 @@ require ( github.com/kr/text v0.2.0 // indirect github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect - github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20241211060635-410431d7865b // indirect + github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20250102080446-c3ba3d26a90f // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/opencontainers/runtime-spec v1.0.2 // indirect diff --git a/tests/go_client/go.sum b/tests/go_client/go.sum index d4d7e5a1601a5..70171495c36d1 100644 --- a/tests/go_client/go.sum +++ b/tests/go_client/go.sum @@ -318,10 +318,10 @@ github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfr github.com/mediocregopher/radix/v3 v3.4.2/go.mod h1:8FL3F6UQRXHXIBSPUs5h0RybMF8i4n7wVopoX3x7Bv8= github.com/microcosm-cc/bluemonday v1.0.2/go.mod h1:iVP4YcDBq+n/5fb23BhYFvIMq/leAFZyRl6bYmGDlGc= github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= -github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20241211060635-410431d7865b h1:iPPhnFx+s7FF53UeWj7A4EYhPRMFPL6mHqyQw7qRjeQ= -github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20241211060635-410431d7865b/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs= -github.com/milvus-io/milvus/pkg v0.0.2-0.20241126032235-cb6542339e84 h1:EAFxmxUVp5yYFDCrX1MQoSxkTO+ycy8NXEqEDEB3cRM= -github.com/milvus-io/milvus/pkg v0.0.2-0.20241126032235-cb6542339e84/go.mod h1:RATa0GS4jhkPpsYOvQ/QvcNz8rd+TlRPDiSyXQnMMxs= +github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20250102080446-c3ba3d26a90f h1:So6RKU5wqP/8EaKogicJP8gZ2SrzzS/JprusBaE3RKc= +github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20250102080446-c3ba3d26a90f/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs= +github.com/milvus-io/milvus/pkg v0.0.2-0.20250115044500-f5234c3c11a3 h1:WF9BkNk1XjLtwMbaB/cniRBMMNLnqG6e+AUbK8DciHQ= +github.com/milvus-io/milvus/pkg v0.0.2-0.20250115044500-f5234c3c11a3/go.mod h1:nxnHkDFB3jh27nTQJBaC4azAQO8chT03DkmoiZ5086s= github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=