Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support row based Array & fix varchar array type #621

Merged
merged 1 commit into from
Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 17 additions & 6 deletions entity/columns.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,14 @@ func FieldDataColumn(fd *schema.FieldData, begin, end int) (Column, error) {
if data == nil {
return nil, errFieldDataTypeNotMatch
}
return parseArrayData(fd.GetFieldName(), data)
var arrayData []*schema.ScalarField
if end < 0 {
arrayData = data.GetData()[begin:]
} else {
arrayData = data.GetData()[begin:end]
}

return parseArrayData(fd.GetFieldName(), data.GetElementType(), arrayData)

case schema.DataType_JSON:
data, ok := fd.GetScalars().GetData().(*schema.ScalarField_JsonData)
Expand Down Expand Up @@ -304,9 +311,7 @@ func FieldDataColumn(fd *schema.FieldData, begin, end int) (Column, error) {
}
}

func parseArrayData(fieldName string, array *schema.ArrayArray) (Column, error) {
fieldDataList := array.Data
elementType := array.ElementType
func parseArrayData(fieldName string, elementType schema.DataType, fieldDataList []*schema.ScalarField) (Column, error) {

switch elementType {
case schema.DataType_Bool:
Expand Down Expand Up @@ -368,11 +373,17 @@ func parseArrayData(fieldName string, array *schema.ArrayArray) (Column, error)
}
return NewColumnDoubleArray(fieldName, data), nil

case schema.DataType_VarChar:
case schema.DataType_VarChar, schema.DataType_String:
var data [][][]byte
for _, fd := range fieldDataList {
data = append(data, fd.GetBytesData().GetData())
strs := fd.GetStringData().GetData()
bytesData := make([][]byte, 0, len(strs))
for _, str := range strs {
bytesData = append(bytesData, []byte(str))
congqixia marked this conversation as resolved.
Show resolved Hide resolved
}
data = append(data, bytesData)
}

return NewColumnVarCharArray(fieldName, data), nil

default:
Expand Down
107 changes: 107 additions & 0 deletions entity/columns_array.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
package entity

import (
"fmt"

"github.com/cockroachdb/errors"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
)

// ColumnVarCharArray generated columns type for VarChar
type ColumnVarCharArray struct {
ColumnBase
name string
values [][][]byte
}

// Name returns column name
func (c *ColumnVarCharArray) Name() string {
return c.name
}

// Type returns column FieldType
func (c *ColumnVarCharArray) Type() FieldType {
return FieldTypeArray
}

// Len returns column values length
func (c *ColumnVarCharArray) Len() int {
return len(c.values)
}

// Get returns value at index as interface{}.
func (c *ColumnVarCharArray) Get(idx int) (interface{}, error) {
var r []string // use default value
if idx < 0 || idx >= c.Len() {
return r, errors.New("index out of range")
}
return c.values[idx], nil
}

// FieldData return column data mapped to schemapb.FieldData
func (c *ColumnVarCharArray) FieldData() *schemapb.FieldData {
fd := &schemapb.FieldData{
Type: schemapb.DataType_Array,
FieldName: c.name,
}

data := make([]*schemapb.ScalarField, 0, c.Len())
for _, arr := range c.values {
converted := make([]string, 0, c.Len())
for i := 0; i < len(arr); i++ {
converted = append(converted, string(arr[i]))
}
data = append(data, &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: converted,
},
},
})
}
fd.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_ArrayData{
ArrayData: &schemapb.ArrayArray{
Data: data,
ElementType: schemapb.DataType_VarChar,
},
},
},
}
return fd
}

// ValueByIdx returns value of the provided index
// error occurs when index out of range
func (c *ColumnVarCharArray) ValueByIdx(idx int) ([][]byte, error) {
var r [][]byte // use default value
if idx < 0 || idx >= c.Len() {
return r, errors.New("index out of range")
}
return c.values[idx], nil
}

// AppendValue append value into column
func (c *ColumnVarCharArray) AppendValue(i interface{}) error {
v, ok := i.([][]byte)
if !ok {
return fmt.Errorf("invalid type, expected []string, got %T", i)
}
c.values = append(c.values, v)

return nil
}

// Data returns column data
func (c *ColumnVarCharArray) Data() [][][]byte {
return c.values
}

// NewColumnVarChar auto generated constructor
func NewColumnVarCharArray(name string, values [][][]byte) *ColumnVarCharArray {
return &ColumnVarCharArray{
name: name,
values: values,
}
}
98 changes: 0 additions & 98 deletions entity/columns_array_gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions entity/columns_varchar_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ func TestColumnVarChar(t *testing.T) {
t.Run("test meta", func(t *testing.T) {
ft := FieldTypeVarChar
assert.Equal(t, "VarChar", ft.Name())
assert.Equal(t, "[]byte", ft.String())
assert.Equal(t, "string", ft.String())
pbName, pbType := ft.PbFieldType()
assert.Equal(t, "Bytes", pbName)
assert.Equal(t, "[]byte", pbType)
assert.Equal(t, "VarChar", pbName)
assert.Equal(t, "string", pbType)
})

t.Run("test column attribute", func(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion entity/genarray/gen_array.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ func main() {
entity.FieldTypeInt64,
entity.FieldTypeFloat,
entity.FieldTypeDouble,
entity.FieldTypeVarChar,
// entity.FieldTypeVarChar, change to hand written
}

pf := func(ft entity.FieldType) interface{} {
Expand Down
40 changes: 39 additions & 1 deletion entity/rows.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@ package entity

import (
"encoding/json"
"errors"
"fmt"
"go/ast"
"reflect"
"strconv"
"strings"

"github.com/cockroachdb/errors"
)

const (
Expand Down Expand Up @@ -324,6 +325,12 @@ func AnyToColumns(rows []interface{}, schemas ...*Schema) ([]Column, error) {
data := make([][]byte, 0, rowsLen)
col := NewColumnJSONBytes(field.Name, data)
nameColumns[field.Name] = col
case FieldTypeArray:
col := NewArrayColumn(field)
if col == nil {
return nil, errors.Errorf("unsupported element type %s for Array", field.ElementType.String())
}
nameColumns[field.Name] = col
case FieldTypeFloatVector:
data := make([][]float32, 0, rowsLen)
dimStr, has := field.TypeParams[TypeParamDim]
Expand Down Expand Up @@ -415,6 +422,37 @@ func AnyToColumns(rows []interface{}, schemas ...*Schema) ([]Column, error) {
return columns, nil
}

func NewArrayColumn(f *Field) Column {
switch f.ElementType {
case FieldTypeBool:
return NewColumnBoolArray(f.Name, nil)

case FieldTypeInt8:
return NewColumnInt8Array(f.Name, nil)

case FieldTypeInt16:
return NewColumnInt16Array(f.Name, nil)

case FieldTypeInt32:
return NewColumnInt32Array(f.Name, nil)

case FieldTypeInt64:
return NewColumnInt64Array(f.Name, nil)

case FieldTypeFloat:
return NewColumnFloatArray(f.Name, nil)

case FieldTypeDouble:
return NewColumnDoubleArray(f.Name, nil)

case FieldTypeVarChar:
return NewColumnVarCharArray(f.Name, nil)

default:
return nil
}
}

// RowsToColumns rows to columns
func RowsToColumns(rows []Row, schemas ...*Schema) ([]Column, error) {
anys := make([]interface{}, 0, len(rows))
Expand Down
4 changes: 2 additions & 2 deletions entity/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ func (t FieldType) String() string {
case FieldTypeString:
return "string"
case FieldTypeVarChar:
return "[]byte"
return "string"
case FieldTypeArray:
return "Array"
case FieldTypeJSON:
Expand Down Expand Up @@ -425,7 +425,7 @@ func (t FieldType) PbFieldType() (string, string) {
case FieldTypeString:
return "String", "string"
case FieldTypeVarChar:
return "Bytes", "[]byte"
return "VarChar", "string"
case FieldTypeJSON:
return "JSON", "JSON"
case FieldTypeBinaryVector:
Expand Down
Loading