From c4ae9f4eced088dc0d669da74cff78ac401db36a Mon Sep 17 00:00:00 2001 From: sthuang <167743503+shaoting-huang@users.noreply.github.com> Date: Fri, 24 Jan 2025 17:21:13 +0800 Subject: [PATCH] feat: introduce third-party milvus-storage (#39418) related: https://github.com/milvus-io/milvus/issues/39173 Signed-off-by: shaoting-huang --- DEVELOPMENT.md | 3 +- go.mod | 18 +- go.sum | 32 +- internal/core/conanfile.py | 3 +- internal/core/src/CMakeLists.txt | 2 + internal/core/src/segcore/packed_reader_c.cpp | 97 ++ internal/core/src/segcore/packed_reader_c.h | 64 + internal/core/src/segcore/packed_writer_c.cpp | 81 + internal/core/src/segcore/packed_writer_c.h | 41 + internal/core/src/storage/PayloadReader.cpp | 2 +- internal/core/thirdparty/CMakeLists.txt | 1 + .../thirdparty/milvus-storage/CMakeLists.txt | 51 + .../milvus-storage/milvus-storage.pc.in | 9 + internal/core/unittest/CMakeLists.txt | 3 + internal/storagev2/OWNERS | 7 + .../storagev2/common/arrowutil/arrow_util.go | 80 + .../storagev2/common/constant/constant.go | 31 + internal/storagev2/common/errors/errors.go | 27 + internal/storagev2/common/log/field.go | 70 + internal/storagev2/common/log/log.go | 106 ++ internal/storagev2/common/log/log_test.go | 33 + internal/storagev2/common/log/options.go | 34 + internal/storagev2/common/utils/utils.go | 404 +++++ internal/storagev2/docs/layout.md | 22 + internal/storagev2/file/blob/blob.go | 41 + .../storagev2/file/fragment/deletefragment.go | 45 + internal/storagev2/file/fragment/fragment.go | 76 + .../storagev2/filter/conjunction_filter.go | 84 + internal/storagev2/filter/constant_filter.go | 151 ++ internal/storagev2/filter/filter.go | 48 + .../io/format/parquet/file_reader.go | 220 +++ .../io/format/parquet/file_writer.go | 61 + internal/storagev2/io/format/reader.go | 24 + internal/storagev2/io/format/writer.go | 23 + internal/storagev2/io/fs/factory.go | 40 + internal/storagev2/io/fs/file/file.go | 25 + internal/storagev2/io/fs/file/local_file.go | 52 + internal/storagev2/io/fs/file/memory_file.go | 116 ++ internal/storagev2/io/fs/file/minio_file.go | 73 + internal/storagev2/io/fs/fs.go | 34 + internal/storagev2/io/fs/fs_util.go | 42 + internal/storagev2/io/fs/local_fs.go | 95 ++ internal/storagev2/io/fs/memory_fs.go | 78 + internal/storagev2/io/fs/minio_fs.go | 201 +++ internal/storagev2/packed/arrow/c/abi.h | 95 ++ internal/storagev2/packed/arrow/c/helpers.h | 115 ++ internal/storagev2/packed/packed_reader.go | 84 + internal/storagev2/packed/packed_test.go | 156 ++ internal/storagev2/packed/packed_writer.go | 77 + internal/storagev2/packed/type.go | 46 + .../reader/commonreader/delete_reader.go | 65 + .../reader/commonreader/filter_reader.go | 84 + .../reader/commonreader/projection_reader.go | 35 + .../recordreader/filter_query_record.go | 49 + .../recordreader/merge_record_reader.go | 77 + .../multi_files_sequential_reader.go | 119 ++ .../reader/recordreader/record_reader.go | 93 + .../reader/recordreader/scan_record.go | 151 ++ .../storagev2/storage/lock/lock_manager.go | 98 ++ internal/storagev2/storage/manifest/commit.go | 80 + .../storagev2/storage/manifest/commit_op.go | 68 + .../storagev2/storage/manifest/manifest.go | 243 +++ .../storage/manifest/reader_writer.go | 119 ++ internal/storagev2/storage/options/options.go | 144 ++ internal/storagev2/storage/schema/schema.go | 150 ++ .../storagev2/storage/schema/schema_option.go | 97 ++ .../storagev2/storage/schema/schema_test.go | 53 + internal/storagev2/storage/space.go | 220 +++ .../storage/transaction/transaction.go | 327 ++++ pkg/proto/storagev2.proto | 131 ++ pkg/proto/storagev2pb/storagev2.pb.go | 1519 +++++++++++++++++ scripts/core_build.sh | 4 +- scripts/generate_proto.sh | 2 + 73 files changed, 7222 insertions(+), 29 deletions(-) create mode 100644 internal/core/src/segcore/packed_reader_c.cpp create mode 100644 internal/core/src/segcore/packed_reader_c.h create mode 100644 internal/core/src/segcore/packed_writer_c.cpp create mode 100644 internal/core/src/segcore/packed_writer_c.h create mode 100644 internal/core/thirdparty/milvus-storage/CMakeLists.txt create mode 100644 internal/core/thirdparty/milvus-storage/milvus-storage.pc.in create mode 100644 internal/storagev2/OWNERS create mode 100644 internal/storagev2/common/arrowutil/arrow_util.go create mode 100644 internal/storagev2/common/constant/constant.go create mode 100644 internal/storagev2/common/errors/errors.go create mode 100644 internal/storagev2/common/log/field.go create mode 100644 internal/storagev2/common/log/log.go create mode 100644 internal/storagev2/common/log/log_test.go create mode 100644 internal/storagev2/common/log/options.go create mode 100644 internal/storagev2/common/utils/utils.go create mode 100644 internal/storagev2/docs/layout.md create mode 100644 internal/storagev2/file/blob/blob.go create mode 100644 internal/storagev2/file/fragment/deletefragment.go create mode 100644 internal/storagev2/file/fragment/fragment.go create mode 100644 internal/storagev2/filter/conjunction_filter.go create mode 100644 internal/storagev2/filter/constant_filter.go create mode 100644 internal/storagev2/filter/filter.go create mode 100644 internal/storagev2/io/format/parquet/file_reader.go create mode 100644 internal/storagev2/io/format/parquet/file_writer.go create mode 100644 internal/storagev2/io/format/reader.go create mode 100644 internal/storagev2/io/format/writer.go create mode 100644 internal/storagev2/io/fs/factory.go create mode 100644 internal/storagev2/io/fs/file/file.go create mode 100644 internal/storagev2/io/fs/file/local_file.go create mode 100644 internal/storagev2/io/fs/file/memory_file.go create mode 100644 internal/storagev2/io/fs/file/minio_file.go create mode 100644 internal/storagev2/io/fs/fs.go create mode 100644 internal/storagev2/io/fs/fs_util.go create mode 100644 internal/storagev2/io/fs/local_fs.go create mode 100644 internal/storagev2/io/fs/memory_fs.go create mode 100644 internal/storagev2/io/fs/minio_fs.go create mode 100644 internal/storagev2/packed/arrow/c/abi.h create mode 100644 internal/storagev2/packed/arrow/c/helpers.h create mode 100644 internal/storagev2/packed/packed_reader.go create mode 100644 internal/storagev2/packed/packed_test.go create mode 100644 internal/storagev2/packed/packed_writer.go create mode 100644 internal/storagev2/packed/type.go create mode 100644 internal/storagev2/reader/commonreader/delete_reader.go create mode 100644 internal/storagev2/reader/commonreader/filter_reader.go create mode 100644 internal/storagev2/reader/commonreader/projection_reader.go create mode 100644 internal/storagev2/reader/recordreader/filter_query_record.go create mode 100644 internal/storagev2/reader/recordreader/merge_record_reader.go create mode 100644 internal/storagev2/reader/recordreader/multi_files_sequential_reader.go create mode 100644 internal/storagev2/reader/recordreader/record_reader.go create mode 100644 internal/storagev2/reader/recordreader/scan_record.go create mode 100644 internal/storagev2/storage/lock/lock_manager.go create mode 100644 internal/storagev2/storage/manifest/commit.go create mode 100644 internal/storagev2/storage/manifest/commit_op.go create mode 100644 internal/storagev2/storage/manifest/manifest.go create mode 100644 internal/storagev2/storage/manifest/reader_writer.go create mode 100644 internal/storagev2/storage/options/options.go create mode 100644 internal/storagev2/storage/schema/schema.go create mode 100644 internal/storagev2/storage/schema/schema_option.go create mode 100644 internal/storagev2/storage/schema/schema_test.go create mode 100644 internal/storagev2/storage/space.go create mode 100644 internal/storagev2/storage/transaction/transaction.go create mode 100644 pkg/proto/storagev2.proto create mode 100644 pkg/proto/storagev2pb/storagev2.pb.go diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 9953c9aecf0ae..0335ea16871ec 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -307,7 +307,8 @@ ${CMAKE_EXTRA_ARGS} \ -DUSE_DYNAMIC_SIMD=${USE_DYNAMIC_SIMD} \ -DCPU_ARCH=${CPU_ARCH} \ -DINDEX_ENGINE=${INDEX_ENGINE} \ - -DENABLE_GCP_NATIVE=${ENABLE_GCP_NATIVE} " + -DENABLE_GCP_NATIVE=${ENABLE_GCP_NATIVE} \ + -DENABLE_AZURE_FS=${ENABLE_AZURE_FS} " if [ -z "$BUILD_WITHOUT_AZURE" ]; then CMAKE_CMD=${CMAKE_CMD}"-DAZURE_BUILD_DIR=${AZURE_BUILD_DIR} \ -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} " diff --git a/go.mod b/go.mod index f3e3de1e8bb9c..be8747e564870 100644 --- a/go.mod +++ b/go.mod @@ -18,7 +18,7 @@ require ( github.com/gin-gonic/gin v1.9.1 github.com/go-playground/validator/v10 v10.14.0 github.com/gofrs/flock v0.8.1 - github.com/golang/protobuf v1.5.4 // indirect + github.com/golang/protobuf v1.5.4 github.com/google/btree v1.1.2 github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 github.com/klauspost/compress v1.17.9 @@ -101,9 +101,9 @@ require ( github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible // indirect github.com/alibabacloud-go/debug v0.0.0-20190504072949-9472017b5c68 // indirect github.com/alibabacloud-go/tea v1.1.8 // indirect - github.com/andybalholm/brotli v1.0.4 // indirect + github.com/andybalholm/brotli v1.1.0 // indirect github.com/apache/pulsar-client-go v0.6.1-0.20210728062540-29414db801a7 // indirect - github.com/apache/thrift v0.18.1 // indirect + github.com/apache/thrift v0.19.0 // indirect github.com/ardielle/ardielle-go v1.5.2 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.7 // indirect github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.21 // indirect @@ -158,7 +158,7 @@ require ( github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/mock v1.6.0 // indirect github.com/golang/snappy v0.0.4 // indirect - github.com/google/flatbuffers v2.0.8+incompatible // indirect + github.com/google/flatbuffers v24.3.25+incompatible // indirect github.com/google/s2a-go v0.1.7 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect github.com/googleapis/gax-go/v2 v2.12.5 // indirect @@ -205,7 +205,7 @@ require ( github.com/pelletier/go-toml/v2 v2.0.8 // indirect github.com/petermattis/goid v0.0.0-20180202154549-b0b1615b78e5 // indirect github.com/pierrec/lz4 v2.5.2+incompatible // indirect - github.com/pierrec/lz4/v4 v4.1.18 // indirect + github.com/pierrec/lz4/v4 v4.1.21 // indirect github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c // indirect github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 // indirect github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989 // indirect @@ -260,13 +260,13 @@ require ( go.opentelemetry.io/proto/otlp v1.0.0 // indirect go.uber.org/automaxprocs v1.5.3 // indirect golang.org/x/arch v0.3.0 // indirect - golang.org/x/mod v0.17.0 // indirect + golang.org/x/mod v0.18.0 // indirect golang.org/x/sys v0.28.0 // indirect golang.org/x/term v0.27.0 // indirect golang.org/x/time v0.5.0 // indirect - golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect - golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect - gonum.org/v1/gonum v0.11.0 // indirect + golang.org/x/tools v0.22.0 // indirect + golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect + gonum.org/v1/gonum v0.14.0 // indirect google.golang.org/genproto v0.0.0-20240624140628-dc46fd24d27d // indirect google.golang.org/genproto/googleapis/api v0.0.0-20240617180043-68d350f18fd4 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240730163845-b1a4ccb954bf // indirect diff --git a/go.sum b/go.sum index b121ef559b18f..4282f7fa5d329 100644 --- a/go.sum +++ b/go.sum @@ -104,14 +104,14 @@ github.com/alibabacloud-go/tea v1.1.8 h1:vFF0707fqjGiQTxrtMnIXRjOCvQXf49CuDVRtTo github.com/alibabacloud-go/tea v1.1.8/go.mod h1:/tmnEaQMyb4Ky1/5D+SE1BAsa5zj/KeGOFfwYm3N/p4= github.com/aliyun/credentials-go v1.2.7 h1:gLtFylxLZ1TWi1pStIt1O6a53GFU1zkNwjtJir2B4ow= github.com/aliyun/credentials-go v1.2.7/go.mod h1:/KowD1cfGSLrLsH28Jr8W+xwoId0ywIy5lNzDz6O1vw= -github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= -github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= +github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= +github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= github.com/antihax/optional v0.0.0-20180407024304-ca021399b1a6/go.mod h1:V8iCPQYkqmusNa815XgQio277wI47sdRh1dUOLdyC6Q= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/antlr4-go/antlr/v4 v4.13.1 h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYWrPrQ= github.com/antlr4-go/antlr/v4 v4.13.1/go.mod h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmOABFgjdkM7Nw= -github.com/apache/thrift v0.18.1 h1:lNhK/1nqjbwbiOPDBPFJVKxgDEGSepKuTh6OLiXW8kg= -github.com/apache/thrift v0.18.1/go.mod h1:rdQn/dCcDKEWjjylUeueum4vQEjG2v8v2PqriUnbr+I= +github.com/apache/thrift v0.19.0 h1:sOqkWPzMj7w6XaYbJQG7m4sGqVolaW/0D28Ln7yPzMk= +github.com/apache/thrift v0.19.0/go.mod h1:SUALL216IiaOw2Oy+5Vs9lboJ/t9g40C+G07Dc0QC1I= github.com/ardielle/ardielle-go v1.5.2 h1:TilHTpHIQJ27R1Tl/iITBzMwiUGSlVfiVhwDNGM3Zj4= github.com/ardielle/ardielle-go v1.5.2/go.mod h1:I4hy1n795cUhaVt/ojz83SNVCYIGsAFAONtv2Dr7HUI= github.com/ardielle/ardielle-tools v1.5.4/go.mod h1:oZN+JRMnqGiIhrzkRN9l26Cej9dEx4jeNG6A+AdkShk= @@ -427,8 +427,8 @@ github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ github.com/google/btree v1.0.1/go.mod h1:xXMiIv4Fb/0kKde4SpL7qlzvu5cMJDRkFDxJfI9uaxA= github.com/google/btree v1.1.2 h1:xf4v41cLI2Z6FxbKm+8Bu+m8ifhj15JuZ9sa0jZCMUU= github.com/google/btree v1.1.2/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= -github.com/google/flatbuffers v2.0.8+incompatible h1:ivUb1cGomAB101ZM1T0nOiWz9pSrTMoa9+EiY7igmkM= -github.com/google/flatbuffers v2.0.8+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/flatbuffers v24.3.25+incompatible h1:CX395cjN9Kke9mmalRoL3d81AtFUxJM+yDthflgJGkI= +github.com/google/flatbuffers v24.3.25+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.2.1-0.20190312032427-6f77996f0c42/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= @@ -742,8 +742,8 @@ github.com/phpdave11/gofpdf v1.4.2/go.mod h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2 github.com/phpdave11/gofpdi v1.0.12/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI= github.com/pierrec/lz4 v2.5.2+incompatible h1:WCjObylUIOlKy/+7Abdn34TLIkXiA4UWUMhxq9m9ZXI= github.com/pierrec/lz4 v2.5.2+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= -github.com/pierrec/lz4/v4 v4.1.18 h1:xaKrnTkyoqfh1YItXl56+6KJNVYWlEEPuAQW9xsplYQ= -github.com/pierrec/lz4/v4 v4.1.18/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= +github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pingcap/errors v0.11.0/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c h1:xpW9bvK+HuuTmyFqUwr+jcCvpVkK7sumiz+ko5H9eq4= @@ -1122,8 +1122,8 @@ golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= -golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0= +golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -1367,19 +1367,19 @@ golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.3/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= -golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= +golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA= +golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3jS9O0/s90v0rJh3X/OLHEUk= -golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= +golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= +golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0= gonum.org/v1/gonum v0.9.3/go.mod h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0= -gonum.org/v1/gonum v0.11.0 h1:f1IJhK4Km5tBJmaiJXtk/PkL4cdVX6J+tGiM187uT5E= -gonum.org/v1/gonum v0.11.0/go.mod h1:fSG4YDCxxUZQJ7rKsQrj0gMOg00Il0Z96/qMA4bVQhA= +gonum.org/v1/gonum v0.14.0 h1:2NiG67LD1tEH0D7kM+ps2V+fXmsAnpUeec7n8tcr4S0= +gonum.org/v1/gonum v0.14.0/go.mod h1:AoWeoz0becf9QMWtE8iWXNXc27fK4fNeHNf/oMejGfU= gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw= gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc= gonum.org/v1/plot v0.9.0/go.mod h1:3Pcqqmp6RHvJI72kgb8fThyUnav364FOsdDo2aGW5lY= diff --git a/internal/core/conanfile.py b/internal/core/conanfile.py index 9978fe53f0c32..c2bf8b2fd4a41 100644 --- a/internal/core/conanfile.py +++ b/internal/core/conanfile.py @@ -13,7 +13,7 @@ class MilvusConan(ConanFile): "lz4/1.9.4#c5afb86edd69ac0df30e3a9e192e43db", "snappy/1.1.9#0519333fef284acd04806243de7d3070", "lzo/2.10#9517fc1bcc4d4cc229a79806003a1baa", - "arrow/15.0.0#0456d916ff25d509e0724c5b219b4c45", + "arrow/17.0.0#8cea917a6e06ca17c28411966d6fcdd7", "openssl/3.1.2#02594c4c0a6e2b4feb3cd15119993597", "aws-sdk-cpp/1.9.234#28d6d2c175975900ce292bafe8022c88", "googleapis/cci.20221108#65604e1b3b9a6b363044da625b201a2a", @@ -72,6 +72,7 @@ class MilvusConan(ConanFile): "aws-sdk-cpp:transfer": False, "gtest:build_gmock": False, "boost:without_locale": False, + "boost:without_test": True, "glog:with_gflags": True, "glog:shared": True, "prometheus-cpp:with_pull": False, diff --git a/internal/core/src/CMakeLists.txt b/internal/core/src/CMakeLists.txt index 0c17d074bd224..1b1baa28b235b 100644 --- a/internal/core/src/CMakeLists.txt +++ b/internal/core/src/CMakeLists.txt @@ -32,6 +32,7 @@ include_directories( ${SIMDJSON_INCLUDE_DIR} ${TANTIVY_INCLUDE_DIR} ${CONAN_INCLUDE_DIRS} + ${MILVUS_STORAGE_INCLUDE_DIR} ) add_subdirectory( pb ) @@ -73,6 +74,7 @@ set(LINK_TARGETS simdjson tantivy_binding knowhere + milvus-storage ${OpenMP_CXX_FLAGS} ${CONAN_LIBS}) diff --git a/internal/core/src/segcore/packed_reader_c.cpp b/internal/core/src/segcore/packed_reader_c.cpp new file mode 100644 index 0000000000000..56aaf0e1977c5 --- /dev/null +++ b/internal/core/src/segcore/packed_reader_c.cpp @@ -0,0 +1,97 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "segcore/packed_reader_c.h" +#include "milvus-storage/packed/reader.h" +#include "milvus-storage/common/log.h" +#include "milvus-storage/filesystem/fs.h" +#include "milvus-storage/common/config.h" + +#include +#include +#include +#include + +int +NewPackedReader(const char* path, + struct ArrowSchema* schema, + const int64_t buffer_size, + CPackedReader* c_packed_reader) { + try { + auto truePath = std::string(path); + auto factory = std::make_shared(); + auto conf = milvus_storage::StorageConfig(); + conf.uri = "file:///tmp/"; + auto trueFs = factory->BuildFileSystem(conf, &truePath).value(); + auto trueSchema = arrow::ImportSchema(schema).ValueOrDie(); + std::set needed_columns; + for (int i = 0; i < trueSchema->num_fields(); i++) { + needed_columns.emplace(i); + } + auto reader = std::make_unique( + *trueFs, path, trueSchema, needed_columns, buffer_size); + *c_packed_reader = reader.release(); + return 0; + } catch (std::exception& e) { + return -1; + } +} + +int +ReadNext(CPackedReader c_packed_reader, + CArrowArray* out_array, + CArrowSchema* out_schema) { + try { + auto packed_reader = + static_cast( + c_packed_reader); + std::shared_ptr record_batch; + auto status = packed_reader->ReadNext(&record_batch); + if (!status.ok()) { + return -1; + } + if (record_batch == nullptr) { + // end of file + return 0; + } else { + std::unique_ptr arr = std::make_unique(); + std::unique_ptr schema = + std::make_unique(); + auto status = arrow::ExportRecordBatch( + *record_batch, arr.get(), schema.get()); + if (!status.ok()) { + return -1; + } + *out_array = arr.release(); + *out_schema = schema.release(); + return 0; + } + return 0; + } catch (std::exception& e) { + return -1; + } +} + +int +CloseReader(CPackedReader c_packed_reader) { + try { + auto packed_reader = + static_cast( + c_packed_reader); + delete packed_reader; + return 0; + } catch (std::exception& e) { + return -1; + } +} \ No newline at end of file diff --git a/internal/core/src/segcore/packed_reader_c.h b/internal/core/src/segcore/packed_reader_c.h new file mode 100644 index 0000000000000..7a5c90cf16e3c --- /dev/null +++ b/internal/core/src/segcore/packed_reader_c.h @@ -0,0 +1,64 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +typedef void* CPackedReader; +typedef void* CArrowArray; +typedef void* CArrowSchema; + +/** + * @brief Open a packed reader to read needed columns in the specified path. + * + * @param path The root path of the packed files to read. + * @param schema The original schema of data. + * @param buffer_size The max buffer size of the packed reader. + * @param c_packed_reader The output pointer of the packed reader. + */ +int +NewPackedReader(const char* path, + struct ArrowSchema* schema, + const int64_t buffer_size, + CPackedReader* c_packed_reader); + +/** + * @brief Read the next record batch from the packed reader. + * By default, the maximum return batch is 1024 rows. + * + * @param c_packed_reader The packed reader to read. + * @param out_array The output pointer of the arrow array. + * @param out_schema The output pointer of the arrow schema. + */ +int +ReadNext(CPackedReader c_packed_reader, + CArrowArray* out_array, + CArrowSchema* out_schema); + +/** + * @brief Close the packed reader and release the resources. + * + * @param c_packed_reader The packed reader to close. + */ +int +CloseReader(CPackedReader c_packed_reader); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/internal/core/src/segcore/packed_writer_c.cpp b/internal/core/src/segcore/packed_writer_c.cpp new file mode 100644 index 0000000000000..613e21d78013a --- /dev/null +++ b/internal/core/src/segcore/packed_writer_c.cpp @@ -0,0 +1,81 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "segcore/packed_writer_c.h" +#include "milvus-storage/packed/writer.h" +#include "milvus-storage/common/log.h" +#include "milvus-storage/common/config.h" +#include "milvus-storage/filesystem/fs.h" + +#include +#include + +int +NewPackedWriter(const char* path, + struct ArrowSchema* schema, + const int64_t buffer_size, + CPackedWriter* c_packed_writer) { + try { + auto truePath = std::string(path); + auto factory = std::make_shared(); + auto conf = milvus_storage::StorageConfig(); + conf.uri = "file:///tmp/"; + auto trueFs = factory->BuildFileSystem(conf, &truePath).value(); + auto trueSchema = arrow::ImportSchema(schema).ValueOrDie(); + auto writer = std::make_unique( + buffer_size, trueSchema, trueFs, truePath, conf); + + *c_packed_writer = writer.release(); + return 0; + } catch (std::exception& e) { + return -1; + } +} + +int +WriteRecordBatch(CPackedWriter c_packed_writer, + struct ArrowArray* array, + struct ArrowSchema* schema) { + try { + auto packed_writer = + static_cast( + c_packed_writer); + auto record_batch = + arrow::ImportRecordBatch(array, schema).ValueOrDie(); + auto status = packed_writer->Write(record_batch); + if (!status.ok()) { + return -1; + } + return 0; + } catch (std::exception& e) { + return -1; + } +} + +int +CloseWriter(CPackedWriter c_packed_writer) { + try { + auto packed_writer = + static_cast( + c_packed_writer); + auto status = packed_writer->Close(); + delete packed_writer; + if (!status.ok()) { + return -1; + } + return 0; + } catch (std::exception& e) { + return -1; + } +} \ No newline at end of file diff --git a/internal/core/src/segcore/packed_writer_c.h b/internal/core/src/segcore/packed_writer_c.h new file mode 100644 index 0000000000000..207aba502d468 --- /dev/null +++ b/internal/core/src/segcore/packed_writer_c.h @@ -0,0 +1,41 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +typedef void* CPackedWriter; + +int +NewPackedWriter(const char* path, + struct ArrowSchema* schema, + const int64_t buffer_size, + CPackedWriter* c_packed_writer); + +int +WriteRecordBatch(CPackedWriter c_packed_writer, + struct ArrowArray* array, + struct ArrowSchema* schema); + +int +CloseWriter(CPackedWriter c_packed_writer); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/internal/core/src/storage/PayloadReader.cpp b/internal/core/src/storage/PayloadReader.cpp index 4d38ac69bfbe2..41baed93b37d9 100644 --- a/internal/core/src/storage/PayloadReader.cpp +++ b/internal/core/src/storage/PayloadReader.cpp @@ -43,7 +43,7 @@ PayloadReader::init(std::shared_ptr input, // Configure general Parquet reader settings auto reader_properties = parquet::ReaderProperties(pool); reader_properties.set_buffer_size(4096 * 4); - reader_properties.enable_buffered_stream(); + // reader_properties.enable_buffered_stream(); // Configure Arrow-specific Parquet reader settings auto arrow_reader_props = parquet::ArrowReaderProperties(); diff --git a/internal/core/thirdparty/CMakeLists.txt b/internal/core/thirdparty/CMakeLists.txt index 5fe44881ad3e8..391528d679540 100644 --- a/internal/core/thirdparty/CMakeLists.txt +++ b/internal/core/thirdparty/CMakeLists.txt @@ -45,3 +45,4 @@ if (LINUX) add_subdirectory(jemalloc) endif() +add_subdirectory(milvus-storage) \ No newline at end of file diff --git a/internal/core/thirdparty/milvus-storage/CMakeLists.txt b/internal/core/thirdparty/milvus-storage/CMakeLists.txt new file mode 100644 index 0000000000000..a847c41a47597 --- /dev/null +++ b/internal/core/thirdparty/milvus-storage/CMakeLists.txt @@ -0,0 +1,51 @@ +#------------------------------------------------------------------------------- +# Copyright (C) 2019-2020 Zilliz. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under the License. +#------------------------------------------------------------------------------- + +# Update milvus-storage_VERSION for the first occurrence +milvus_add_pkg_config("milvus-storage") +set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES "") +set( milvus-storage_VERSION 7475494 ) +set( GIT_REPOSITORY "https://github.com/milvus-io/milvus-storage.git") +message(STATUS "milvus-storage repo: ${GIT_REPOSITORY}") +message(STATUS "milvus-storage version: ${milvus-storage_VERSION}") + +message(STATUS "Building milvus-storage-${milvus-storage_SOURCE_VER} from source") +message(STATUS ${CMAKE_BUILD_TYPE}) + +if ( ENABLE_AZURE_FS STREQUAL "ON" ) + set(WITH_AZURE_FS ON CACHE BOOL "" FORCE ) +else () + set(WITH_AZURE_FS OFF CACHE BOOL "" FORCE ) +endif () + +set( CMAKE_PREFIX_PATH ${CONAN_BOOST_ROOT} ) +FetchContent_Declare( + milvus-storage + GIT_REPOSITORY ${GIT_REPOSITORY} + GIT_TAG ${milvus-storage_VERSION} + SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/milvus-storage-src + BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/milvus-storage-build + SOURCE_SUBDIR cpp + DOWNLOAD_DIR ${THIRDPARTY_DOWNLOAD_PATH} ) + +FetchContent_GetProperties( milvus-storage ) +if ( NOT milvus-storage_POPULATED ) + FetchContent_Populate( milvus-storage ) + + # Adding the following target: + # milvus-storage + add_subdirectory( ${milvus-storage_SOURCE_DIR}/cpp + ${milvus-storage_BINARY_DIR} ) +endif() + +set( MILVUS_STORAGE_INCLUDE_DIR ${milvus-storage_SOURCE_DIR}/cpp/include CACHE INTERNAL "Path to milvus-storage include directory" ) \ No newline at end of file diff --git a/internal/core/thirdparty/milvus-storage/milvus-storage.pc.in b/internal/core/thirdparty/milvus-storage/milvus-storage.pc.in new file mode 100644 index 0000000000000..e72695474cbf8 --- /dev/null +++ b/internal/core/thirdparty/milvus-storage/milvus-storage.pc.in @@ -0,0 +1,9 @@ +libdir=@CMAKE_INSTALL_FULL_LIBDIR@ +includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ + +Name: Milvus Storage +Description: Storage modules for Milvus +Version: @MILVUS_VERSION@ + +Libs: -L${libdir} -lmilvus-storage +Cflags: -I${includedir} diff --git a/internal/core/unittest/CMakeLists.txt b/internal/core/unittest/CMakeLists.txt index 172bc4a05c6c9..7724b574ebcee 100644 --- a/internal/core/unittest/CMakeLists.txt +++ b/internal/core/unittest/CMakeLists.txt @@ -16,6 +16,7 @@ include_directories( ${SIMDJSON_INCLUDE_DIR} ${TANTIVY_INCLUDE_DIR} ${CONAN_INCLUDE_DIRS} + ${MILVUS_STORAGE_INCLUDE_DIR} ) add_definitions(-DMILVUS_TEST_SEGCORE_YAML_PATH="${CMAKE_SOURCE_DIR}/unittest/test_utils/test_segcore.yaml") @@ -157,6 +158,7 @@ if (LINUX) gtest milvus_core knowhere + milvus-storage ) install(TARGETS index_builder_test DESTINATION unittest) endif() @@ -169,6 +171,7 @@ target_link_libraries(all_tests gtest milvus_core knowhere + milvus-storage ) install(TARGETS all_tests DESTINATION unittest) diff --git a/internal/storagev2/OWNERS b/internal/storagev2/OWNERS new file mode 100644 index 0000000000000..43bbbe7016716 --- /dev/null +++ b/internal/storagev2/OWNERS @@ -0,0 +1,7 @@ +reviewers: + - tedxu + - shaoting-huang + - sunby + +approvers: + - maintainers \ No newline at end of file diff --git a/internal/storagev2/common/arrowutil/arrow_util.go b/internal/storagev2/common/arrowutil/arrow_util.go new file mode 100644 index 0000000000000..a67ce3ea2ca21 --- /dev/null +++ b/internal/storagev2/common/arrowutil/arrow_util.go @@ -0,0 +1,80 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package arrowutil + +import ( + "context" + + "github.com/apache/arrow/go/v12/arrow/array" + "github.com/apache/arrow/go/v12/arrow/memory" + "github.com/apache/arrow/go/v12/parquet/file" + "github.com/apache/arrow/go/v12/parquet/pqarrow" + + "github.com/milvus-io/milvus/internal/storagev2/common/constant" + "github.com/milvus-io/milvus/internal/storagev2/io/fs" + "github.com/milvus-io/milvus/internal/storagev2/storage/options" +) + +func MakeArrowFileReader(fs fs.Fs, filePath string) (*pqarrow.FileReader, error) { + f, err := fs.OpenFile(filePath) + if err != nil { + return nil, err + } + parquetReader, err := file.NewParquetReader(f) + if err != nil { + return nil, err + } + return pqarrow.NewFileReader(parquetReader, pqarrow.ArrowReadProperties{BatchSize: constant.ReadBatchSize}, memory.DefaultAllocator) +} + +func MakeArrowRecordReader(reader *pqarrow.FileReader, opts *options.ReadOptions) (array.RecordReader, error) { + var rowGroupsIndices []int + var columnIndices []int + metadata := reader.ParquetReader().MetaData() + for _, c := range opts.Columns { + columnIndices = append(columnIndices, metadata.Schema.ColumnIndexByName(c)) + } + for _, f := range opts.Filters { + columnIndices = append(columnIndices, metadata.Schema.ColumnIndexByName(f.GetColumnName())) + } + + for i := 0; i < len(metadata.RowGroups); i++ { + rg := metadata.RowGroup(i) + var canIgnored bool + for _, filter := range opts.Filters { + columnIndex := rg.Schema.ColumnIndexByName(filter.GetColumnName()) + columnChunk, err := rg.ColumnChunk(columnIndex) + if err != nil { + return nil, err + } + columnStats, err := columnChunk.Statistics() + if err != nil { + return nil, err + } + if columnStats == nil || !columnStats.HasMinMax() { + continue + } + if filter.CheckStatistics(columnStats) { + canIgnored = true + break + } + } + if !canIgnored { + rowGroupsIndices = append(rowGroupsIndices, i) + } + } + + return reader.GetRecordReader(context.TODO(), columnIndices, rowGroupsIndices) +} diff --git a/internal/storagev2/common/constant/constant.go b/internal/storagev2/common/constant/constant.go new file mode 100644 index 0000000000000..8a72710a95970 --- /dev/null +++ b/internal/storagev2/common/constant/constant.go @@ -0,0 +1,31 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package constant + +const ( + ReadBatchSize = 1024 + ManifestTempFileSuffix = ".manifest.tmp" + ManifestFileSuffix = ".manifest" + ManifestDir = "versions" + BlobDir = "blobs" + ParquetDataFileSuffix = ".parquet" + OffsetFieldName = "__offset" + VectorDataDir = "vector" + ScalarDataDir = "scalar" + DeleteDataDir = "delete" + LatestManifestVersion = -1 + + EndpointOverride = "endpoint_override" +) diff --git a/internal/storagev2/common/errors/errors.go b/internal/storagev2/common/errors/errors.go new file mode 100644 index 0000000000000..c5599ce0ae48b --- /dev/null +++ b/internal/storagev2/common/errors/errors.go @@ -0,0 +1,27 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package errors + +import "github.com/cockroachdb/errors" + +var ( + ErrSchemaIsNil = errors.New("schema is nil") + ErrBlobAlreadyExist = errors.New("blob already exist") + ErrBlobNotExist = errors.New("blob not exist") + ErrSchemaNotMatch = errors.New("schema not match") + ErrColumnNotExist = errors.New("column not exist") + ErrInvalidPath = errors.New("invalid path") + ErrNoEndpoint = errors.New("no endpoint is specified") +) diff --git a/internal/storagev2/common/log/field.go b/internal/storagev2/common/log/field.go new file mode 100644 index 0000000000000..d59e3b915d499 --- /dev/null +++ b/internal/storagev2/common/log/field.go @@ -0,0 +1,70 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package log + +import "go.uber.org/zap" + +var ( + // not lint + Skip = zap.Skip + Binary = zap.Binary + Bool = zap.Bool + Boolp = zap.Boolp + ByteString = zap.ByteString + Complex128 = zap.Complex128 + Complex128p = zap.Complex128p + Complex64 = zap.Complex64 + Complex64p = zap.Complex64p + Float64 = zap.Float64 + Float64p = zap.Float64p + Float32 = zap.Float32 + Float32p = zap.Float32p + Int = zap.Int + Intp = zap.Intp + Int64 = zap.Int64 + Int64p = zap.Int64p + Int32 = zap.Int32 + Int32p = zap.Int32p + Int16 = zap.Int16 + Int16p = zap.Int16p + Int8 = zap.Int8 + Int8p = zap.Int8p + String = zap.String + Stringp = zap.Stringp + Uint = zap.Uint + Uintp = zap.Uintp + Uint64 = zap.Uint64 + Uint64p = zap.Uint64p + Uint32 = zap.Uint32 + Uint32p = zap.Uint32p + Uint16 = zap.Uint16 + Uint16p = zap.Uint16p + Uint8 = zap.Uint8 + Uint8p = zap.Uint8p + Uintptr = zap.Uintptr + Uintptrp = zap.Uintptrp + Reflect = zap.Reflect + Namespace = zap.Namespace + Stringer = zap.Stringer + Time = zap.Time + Timep = zap.Timep + Stack = zap.Stack + StackSkip = zap.StackSkip + Duration = zap.Duration + Durationp = zap.Durationp + Object = zap.Object + Inline = zap.Inline + Any = zap.Any +) diff --git a/internal/storagev2/common/log/log.go b/internal/storagev2/common/log/log.go new file mode 100644 index 0000000000000..87f477d9c23ca --- /dev/null +++ b/internal/storagev2/common/log/log.go @@ -0,0 +1,106 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package log + +import ( + "io" + "os" + + "go.uber.org/zap" + "go.uber.org/zap/zapcore" +) + +type Level = zapcore.Level + +const ( + DebugLevel = zapcore.DebugLevel + InfoLevel = zapcore.InfoLevel + WarnLevel = zapcore.WarnLevel + ErrorLevel = zapcore.ErrorLevel + PanicLevel = zapcore.PanicLevel + FatalLevel = zapcore.FatalLevel +) + +type Logger struct { + l *zap.Logger + al *zap.AtomicLevel +} + +func New(out io.Writer, level Level) *Logger { + if out == nil { + out = os.Stderr + } + + al := zap.NewAtomicLevelAt(level) + cfg := zap.NewDevelopmentEncoderConfig() + + core := zapcore.NewCore( + zapcore.NewConsoleEncoder(cfg), + zapcore.AddSync(out), + al, + ) + return &Logger{l: zap.New(core, zap.AddCaller(), zap.AddCallerSkip(2)), al: &al} +} + +func (l *Logger) SetLevel(level Level) { + if l.al != nil { + l.al.SetLevel(level) + } +} + +type Field = zap.Field + +func (l *Logger) Debug(msg string, fields ...Field) { + l.l.Debug(msg, fields...) +} + +func (l *Logger) Info(msg string, fields ...Field) { + l.l.Info(msg, fields...) +} + +func (l *Logger) Warn(msg string, fields ...Field) { + l.l.Warn(msg, fields...) +} + +func (l *Logger) Error(msg string, fields ...Field) { + l.l.Error(msg, fields...) +} + +func (l *Logger) Panic(msg string, fields ...Field) { + l.l.Panic(msg, fields...) +} + +func (l *Logger) Fatal(msg string, fields ...Field) { + l.l.Fatal(msg, fields...) +} + +func (l *Logger) Sync() error { + return l.l.Sync() +} + +var std = New(os.Stderr, DebugLevel) + +func Default() *Logger { return std } +func ReplaceDefault(l *Logger) { std = l } +func SetLevel(level Level) { std.SetLevel(level) } + +func Debug(msg string, fields ...Field) { std.Debug(msg, fields...) } +func Info(msg string, fields ...Field) { std.Info(msg, fields...) } +func Warn(msg string, fields ...Field) { std.Warn(msg, fields...) } +func Error(msg string, fields ...Field) { std.Error(msg, fields...) } +func Panic(msg string, fields ...Field) { std.Panic(msg, fields...) } +func Fatal(msg string, fields ...Field) { std.Fatal(msg, fields...) } + +func Sync() error { return std.Sync() } diff --git a/internal/storagev2/common/log/log_test.go b/internal/storagev2/common/log/log_test.go new file mode 100644 index 0000000000000..646e1323e68e6 --- /dev/null +++ b/internal/storagev2/common/log/log_test.go @@ -0,0 +1,33 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package log + +import ( + "testing" +) + +func TestLogger(t *testing.T) { + defer Sync() + Info("Testing") + Debug("Testing") + Warn("Testing") + Error("Testing") + defer func() { + if err := recover(); err != nil { + Debug("logPanic recover") + } + }() + Panic("Testing") +} diff --git a/internal/storagev2/common/log/options.go b/internal/storagev2/common/log/options.go new file mode 100644 index 0000000000000..3e93d1beaea99 --- /dev/null +++ b/internal/storagev2/common/log/options.go @@ -0,0 +1,34 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package log + +import "go.uber.org/zap" + +type Option = zap.Option + +var ( + WrapCore = zap.WrapCore + Hooks = zap.Hooks + Fields = zap.Fields + ErrorOutput = zap.ErrorOutput + Development = zap.Development + AddCaller = zap.AddCaller + WithCaller = zap.WithCaller + AddCallerSkip = zap.AddCallerSkip + AddStacktrace = zap.AddStacktrace + IncreaseLevel = zap.IncreaseLevel + WithFatalHook = zap.WithFatalHook + WithClock = zap.WithClock +) diff --git a/internal/storagev2/common/utils/utils.go b/internal/storagev2/common/utils/utils.go new file mode 100644 index 0000000000000..134a5ca40c2e1 --- /dev/null +++ b/internal/storagev2/common/utils/utils.go @@ -0,0 +1,404 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "fmt" + "path/filepath" + "strconv" + "strings" + + "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/arrow/endian" + "github.com/cockroachdb/errors" + "github.com/google/uuid" + + "github.com/milvus-io/milvus/internal/storagev2/common/constant" + "github.com/milvus-io/milvus/internal/storagev2/common/log" + "github.com/milvus-io/milvus/pkg/proto/storagev2pb" +) + +var ErrInvalidArgument = errors.New("invalid argument") + +func ToProtobufType(dataType arrow.Type) (storagev2pb.LogicType, error) { + typeId := int(dataType) + if typeId < 0 || typeId >= int(storagev2pb.LogicType_MAX_ID) { + return storagev2pb.LogicType_NA, fmt.Errorf("parse data type %v: %w", dataType, ErrInvalidArgument) + } + return storagev2pb.LogicType(typeId), nil +} + +func ToProtobufMetadata(metadata *arrow.Metadata) (*storagev2pb.KeyValueMetadata, error) { + keys := metadata.Keys() + values := metadata.Values() + return &storagev2pb.KeyValueMetadata{Keys: keys, Values: values}, nil +} + +func ToProtobufDataType(dataType arrow.DataType) (*storagev2pb.DataType, error) { + protoType := &storagev2pb.DataType{} + err := SetTypeValues(protoType, dataType) + if err != nil { + return nil, err + } + logicType, err := ToProtobufType(dataType.ID()) + if err != nil { + return nil, err + } + protoType.LogicType = logicType + + if len(GetFields(dataType)) > 0 { + for _, field := range GetFields(dataType) { + fieldCopy := field + protoFieldType, err := ToProtobufField(&fieldCopy) + if err != nil { + return nil, err + } + protoType.Children = append(protoType.Children, protoFieldType) + } + } + + return protoType, nil +} + +// GetFields TODO CHECK MORE TYPES +func GetFields(dataType arrow.DataType) []arrow.Field { + switch dataType.ID() { + case arrow.LIST: + listType, _ := dataType.(*arrow.ListType) + return listType.Fields() + case arrow.STRUCT: + structType, _ := dataType.(*arrow.StructType) + return structType.Fields() + case arrow.MAP: + mapType, _ := dataType.(*arrow.MapType) + return mapType.Fields() + case arrow.FIXED_SIZE_LIST: + listType, _ := dataType.(*arrow.FixedSizeListType) + return listType.Fields() + default: + return nil + } +} + +func ToProtobufField(field *arrow.Field) (*storagev2pb.Field, error) { + protoField := &storagev2pb.Field{} + protoField.Name = field.Name + protoField.Nullable = field.Nullable + + if field.Metadata.Len() != 0 { + fieldMetadata, err := ToProtobufMetadata(&field.Metadata) + if err != nil { + return nil, fmt.Errorf("convert to protobuf field: %w", err) + } + protoField.Metadata = fieldMetadata + } + + dataType, err := ToProtobufDataType(field.Type) + if err != nil { + return nil, fmt.Errorf("convert to protobuf field: %w", err) + } + protoField.DataType = dataType + return protoField, nil +} + +func SetTypeValues(protoType *storagev2pb.DataType, dataType arrow.DataType) error { + switch dataType.ID() { + case arrow.FIXED_SIZE_BINARY: + realType, ok := dataType.(*arrow.FixedSizeBinaryType) + if !ok { + return fmt.Errorf("convert to fixed size binary type: %w", ErrInvalidArgument) + } + fixedSizeBinaryType := &storagev2pb.FixedSizeBinaryType{} + fixedSizeBinaryType.ByteWidth = int32(realType.ByteWidth) + protoType.TypeRelatedValues = &storagev2pb.DataType_FixedSizeBinaryType{FixedSizeBinaryType: fixedSizeBinaryType} + case arrow.FIXED_SIZE_LIST: + realType, ok := dataType.(*arrow.FixedSizeListType) + if !ok { + return fmt.Errorf("convert to fixed size list type: %w", ErrInvalidArgument) + } + fixedSizeListType := &storagev2pb.FixedSizeListType{} + fixedSizeListType.ListSize = realType.Len() + protoType.TypeRelatedValues = &storagev2pb.DataType_FixedSizeListType{FixedSizeListType: fixedSizeListType} + case arrow.DICTIONARY: + realType, ok := dataType.(*arrow.DictionaryType) + if !ok { + return fmt.Errorf("convert to dictionary type: %w", ErrInvalidArgument) + } + dictionaryType := &storagev2pb.DictionaryType{} + indexType, err := ToProtobufDataType(realType.IndexType) + if err != nil { + return err + } + dictionaryType.IndexType = indexType + valueType, err := ToProtobufDataType(realType.ValueType) + if err != nil { + return err + } + dictionaryType.ValueType = valueType + dictionaryType.Ordered = realType.Ordered + protoType.TypeRelatedValues = &storagev2pb.DataType_DictionaryType{DictionaryType: dictionaryType} + + case arrow.MAP: + realType, ok := dataType.(*arrow.MapType) + if !ok { + return fmt.Errorf("convert to map type: %w", ErrInvalidArgument) + } + mapType := &storagev2pb.MapType{} + mapType.KeysSorted = realType.KeysSorted + protoType.TypeRelatedValues = &storagev2pb.DataType_MapType{MapType: mapType} + + default: + } + + return nil +} + +func ToProtobufSchema(schema *arrow.Schema) (*storagev2pb.ArrowSchema, error) { + protoSchema := &storagev2pb.ArrowSchema{} + for _, field := range schema.Fields() { + fieldCopy := field + protoField, err := ToProtobufField(&fieldCopy) + if err != nil { + return nil, err + } + protoSchema.Fields = append(protoSchema.Fields, protoField) + } + if schema.Endianness() == endian.LittleEndian { + protoSchema.Endianness = storagev2pb.Endianness_Little + } else if schema.Endianness() == endian.BigEndian { + protoSchema.Endianness = storagev2pb.Endianness_Big + } + + // TODO FIX ME: golang proto not support proto_schema->mutable_metadata()->add_keys(key); + if schema.HasMetadata() && !schema.HasMetadata() { + for _, key := range schema.Metadata().Keys() { + protoKeyValue := protoSchema.GetMetadata() + protoKeyValue.Keys = append(protoKeyValue.Keys, key) + } + for _, value := range schema.Metadata().Values() { + protoKeyValue := protoSchema.GetMetadata() + protoKeyValue.Values = append(protoKeyValue.Values, value) + } + } + + return protoSchema, nil +} + +func FromProtobufSchema(schema *storagev2pb.ArrowSchema) (*arrow.Schema, error) { + fields := make([]arrow.Field, 0, len(schema.Fields)) + for _, field := range schema.Fields { + tmp, err := FromProtobufField(field) + if err != nil { + return nil, err + } + fields = append(fields, *tmp) + } + tmp, err := FromProtobufKeyValueMetadata(schema.Metadata) + if err != nil { + return nil, err + } + newSchema := arrow.NewSchema(fields, tmp) + return newSchema, nil +} + +func FromProtobufField(field *storagev2pb.Field) (*arrow.Field, error) { + datatype, err := FromProtobufDataType(field.DataType) + if err != nil { + return nil, err + } + + metadata, err := FromProtobufKeyValueMetadata(field.GetMetadata()) + if err != nil { + return nil, err + } + + return &arrow.Field{Name: field.Name, Type: datatype, Nullable: field.Nullable, Metadata: *metadata}, nil +} + +func FromProtobufKeyValueMetadata(metadata *storagev2pb.KeyValueMetadata) (*arrow.Metadata, error) { + keys := make([]string, 0) + values := make([]string, 0) + if metadata != nil { + keys = metadata.Keys + values = metadata.Values + } + newMetadata := arrow.NewMetadata(keys, values) + return &newMetadata, nil +} + +func FromProtobufDataType(dataType *storagev2pb.DataType) (arrow.DataType, error) { + switch dataType.LogicType { + case storagev2pb.LogicType_NA: + return &arrow.NullType{}, nil + case storagev2pb.LogicType_BOOL: + return &arrow.BooleanType{}, nil + case storagev2pb.LogicType_UINT8: + return &arrow.Uint8Type{}, nil + case storagev2pb.LogicType_INT8: + return &arrow.Int8Type{}, nil + case storagev2pb.LogicType_UINT16: + return &arrow.Uint16Type{}, nil + case storagev2pb.LogicType_INT16: + return &arrow.Int16Type{}, nil + case storagev2pb.LogicType_UINT32: + return &arrow.Uint32Type{}, nil + case storagev2pb.LogicType_INT32: + return &arrow.Int32Type{}, nil + case storagev2pb.LogicType_UINT64: + return &arrow.Uint64Type{}, nil + case storagev2pb.LogicType_INT64: + return &arrow.Int64Type{}, nil + case storagev2pb.LogicType_HALF_FLOAT: + return &arrow.Float16Type{}, nil + case storagev2pb.LogicType_FLOAT: + return &arrow.Float32Type{}, nil + case storagev2pb.LogicType_DOUBLE: + return &arrow.Float64Type{}, nil + case storagev2pb.LogicType_STRING: + return &arrow.StringType{}, nil + case storagev2pb.LogicType_BINARY: + return &arrow.BinaryType{}, nil + + case storagev2pb.LogicType_LIST: + fieldType, err := FromProtobufField(dataType.Children[0]) + if err != nil { + return nil, err + } + listType := arrow.ListOf(fieldType.Type) + return listType, nil + + case storagev2pb.LogicType_STRUCT: + fields := make([]arrow.Field, 0, len(dataType.Children)) + for _, child := range dataType.Children { + field, err := FromProtobufField(child) + if err != nil { + return nil, err + } + fields = append(fields, *field) + } + structType := arrow.StructOf(fields...) + return structType, nil + + case storagev2pb.LogicType_DICTIONARY: + keyType, err := FromProtobufField(dataType.Children[0]) + if err != nil { + return nil, err + } + valueType, err := FromProtobufField(dataType.Children[1]) + if err != nil { + return nil, err + } + dictType := &arrow.DictionaryType{ + IndexType: keyType.Type, + ValueType: valueType.Type, + } + return dictType, nil + + case storagev2pb.LogicType_MAP: + fieldType, err := FromProtobufField(dataType.Children[0]) + if err != nil { + return nil, err + } + // TODO FIX ME + return arrow.MapOf(fieldType.Type, fieldType.Type), nil + + case storagev2pb.LogicType_FIXED_SIZE_BINARY: + + sizeBinaryType := arrow.FixedSizeBinaryType{ByteWidth: int(dataType.GetFixedSizeBinaryType().ByteWidth)} + return &sizeBinaryType, nil + + case storagev2pb.LogicType_FIXED_SIZE_LIST: + fieldType, err := FromProtobufField(dataType.Children[0]) + if err != nil { + return nil, err + } + fixedSizeListType := arrow.FixedSizeListOf(int32(int(dataType.GetFixedSizeListType().ListSize)), fieldType.Type) + return fixedSizeListType, nil + + default: + return nil, fmt.Errorf("parse protobuf datatype: %w", ErrInvalidArgument) + } +} + +func GetNewParquetFilePath(path string) string { + scalarFileId := uuid.New() + path = filepath.Join(path, scalarFileId.String()+constant.ParquetDataFileSuffix) + return path +} + +func GetManifestFilePath(path string, version int64) string { + path = filepath.Join(path, constant.ManifestDir, strconv.FormatInt(version, 10)+constant.ManifestFileSuffix) + return path +} + +func GetManifestTmpFilePath(path string, version int64) string { + path = filepath.Join(path, constant.ManifestDir, strconv.FormatInt(version, 10)+constant.ManifestTempFileSuffix) + return path +} + +func GetBlobFilePath(path string) string { + blobId := uuid.New() + return filepath.Join(GetBlobDir(path), blobId.String()) +} + +func GetManifestDir(path string) string { + path = filepath.Join(path, constant.ManifestDir) + return path +} + +func GetVectorDataDir(path string) string { + return filepath.Join(path, constant.VectorDataDir) +} + +func GetScalarDataDir(path string) string { + return filepath.Join(path, constant.ScalarDataDir) +} + +func GetBlobDir(path string) string { + return filepath.Join(path, constant.BlobDir) +} + +func GetDeleteDataDir(path string) string { + return filepath.Join(path, constant.DeleteDataDir) +} + +func ParseVersionFromFileName(path string) int64 { + pos := strings.Index(path, constant.ManifestFileSuffix) + if pos == -1 || !strings.HasSuffix(path, constant.ManifestFileSuffix) { + log.Warn("manifest file suffix not match", log.String("path", path)) + return -1 + } + version := path[0:pos] + versionInt, err := strconv.ParseInt(version, 10, 64) + if err != nil { + log.Error("parse version from file name error", log.String("path", path), log.String("version", version)) + return -1 + } + return versionInt +} + +func ProjectSchema(sc *arrow.Schema, columns []string) *arrow.Schema { + var fields []arrow.Field + for _, field := range sc.Fields() { + for _, column := range columns { + if field.Name == column { + fields = append(fields, field) + break + } + } + } + + return arrow.NewSchema(fields, nil) +} diff --git a/internal/storagev2/docs/layout.md b/internal/storagev2/docs/layout.md new file mode 100644 index 0000000000000..776d0cd39d26e --- /dev/null +++ b/internal/storagev2/docs/layout.md @@ -0,0 +1,22 @@ + + +**storage layer interface**: supply reader/writer of storage which contains read options. Maintain meta of storage and handle atomic read/write with multiple files (maybe have different format) on disks. + +--- + +**File Reader/Writer interface**: receive data and read options from upper layer and turn the raw data to our defined data. + +--- + +**File Format Reader/Writer**: file format reader/writer (eg. parquet/raw/others like orc). + +--- + +**File system interface**: support different file system (eg. in-memory, aws, minio, posix, windows). + + + + + + + diff --git a/internal/storagev2/file/blob/blob.go b/internal/storagev2/file/blob/blob.go new file mode 100644 index 0000000000000..b1d4312229b9f --- /dev/null +++ b/internal/storagev2/file/blob/blob.go @@ -0,0 +1,41 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package blob + +import ( + "github.com/milvus-io/milvus/pkg/proto/storagev2pb" +) + +type Blob struct { + Name string + Size int64 + File string +} + +func (b Blob) ToProtobuf() *storagev2pb.Blob { + blob := &storagev2pb.Blob{} + blob.Name = b.Name + blob.Size = b.Size + blob.File = b.File + return blob +} + +func FromProtobuf(blob *storagev2pb.Blob) Blob { + return Blob{ + Name: blob.Name, + Size: blob.Size, + File: blob.File, + } +} diff --git a/internal/storagev2/file/fragment/deletefragment.go b/internal/storagev2/file/fragment/deletefragment.go new file mode 100644 index 0000000000000..2d7329adf5a8b --- /dev/null +++ b/internal/storagev2/file/fragment/deletefragment.go @@ -0,0 +1,45 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fragment + +import ( + "github.com/milvus-io/milvus/internal/storagev2/io/fs" + "github.com/milvus-io/milvus/internal/storagev2/storage/schema" +) + +type ( + pkType any + DeleteFragmentVector []DeleteFragment + DeleteFragment struct { + id int64 + schema *schema.Schema + fs fs.Fs + data map[pkType][]int64 + } +) + +func NewDeleteFragment(id int64, schema *schema.Schema, fs fs.Fs) *DeleteFragment { + return &DeleteFragment{ + id: id, + schema: schema, + fs: fs, + data: make(map[pkType][]int64), + } +} + +func Make(f fs.Fs, s *schema.Schema, frag Fragment) DeleteFragment { + // TODO: implement + panic("implement me") +} diff --git a/internal/storagev2/file/fragment/fragment.go b/internal/storagev2/file/fragment/fragment.go new file mode 100644 index 0000000000000..7b18473445592 --- /dev/null +++ b/internal/storagev2/file/fragment/fragment.go @@ -0,0 +1,76 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fragment + +import "github.com/milvus-io/milvus/pkg/proto/storagev2pb" + +type FragmentType int32 + +const ( + kUnknown FragmentType = 0 + kData FragmentType = 1 + kDelete FragmentType = 2 +) + +type Fragment struct { + fragmentId int64 + files []string +} + +type FragmentVector []Fragment + +func ToFilesVector(fragments []Fragment) []string { + files := make([]string, 0) + for _, fragment := range fragments { + files = append(files, fragment.files...) + } + return files +} + +func NewFragment() Fragment { + return Fragment{ + files: make([]string, 0), + } +} + +func (f *Fragment) AddFile(file string) { + f.files = append(f.files, file) +} + +func (f *Fragment) Files() []string { + return f.files +} + +func (f *Fragment) FragmentId() int64 { + return f.fragmentId +} + +func (f *Fragment) SetFragmentId(fragmentId int64) { + f.fragmentId = fragmentId +} + +func (f *Fragment) ToProtobuf() *storagev2pb.Fragment { + fragment := &storagev2pb.Fragment{} + fragment.Id = f.fragmentId + fragment.Files = append(fragment.Files, f.files...) + return fragment +} + +func FromProtobuf(fragment *storagev2pb.Fragment) Fragment { + newFragment := NewFragment() + newFragment.SetFragmentId(fragment.GetId()) + newFragment.files = append(newFragment.files, fragment.Files...) + return newFragment +} diff --git a/internal/storagev2/filter/conjunction_filter.go b/internal/storagev2/filter/conjunction_filter.go new file mode 100644 index 0000000000000..30d9bcb44cf74 --- /dev/null +++ b/internal/storagev2/filter/conjunction_filter.go @@ -0,0 +1,84 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package filter + +import ( + "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/parquet/metadata" + "github.com/bits-and-blooms/bitset" +) + +type ConjunctionAndFilter struct { + filters []Filter + columnName string +} + +func (f *ConjunctionAndFilter) GetColumnName() string { + return f.columnName +} + +// FIXME: should have 3 cases. +// 1. all records satisfy the filter, this group dont need to check filter again. +// 2. no record satisfies the filter. +// 3. some records satisfy the filter, this group should check filter again. +func (f *ConjunctionAndFilter) CheckStatistics(stats metadata.TypedStatistics) bool { + for _, filter := range f.filters { + if filter.CheckStatistics(stats) { + return true + } + } + return false +} + +func (f *ConjunctionAndFilter) Type() FilterType { + return And +} + +func (f *ConjunctionAndFilter) Apply(colData arrow.Array, filterBitSet *bitset.BitSet) { + for i := 0; i < len(f.filters); i++ { + f.filters[i].Apply(colData, filterBitSet) + } +} + +type ConjunctionOrFilter struct { + filters []Filter +} + +func (f *ConjunctionOrFilter) CheckStatistics(stats metadata.TypedStatistics) bool { + for _, filter := range f.filters { + if !filter.CheckStatistics(stats) { + return false + } + } + return true +} + +func (f *ConjunctionOrFilter) Apply(colData arrow.Array, filterBitSet *bitset.BitSet) { + orBitSet := bitset.New(filterBitSet.Len()) + for i := 1; i < len(f.filters); i++ { + childBitSet := filterBitSet.Clone() + f.filters[i].Apply(colData, childBitSet) + orBitSet.Intersection(childBitSet) + } + filterBitSet.Union(orBitSet) +} + +func (f *ConjunctionOrFilter) Type() FilterType { + return Or +} + +func NewConjunctionAndFilter(filters ...Filter) *ConjunctionAndFilter { + return &ConjunctionAndFilter{filters: filters} +} diff --git a/internal/storagev2/filter/constant_filter.go b/internal/storagev2/filter/constant_filter.go new file mode 100644 index 0000000000000..63a94b307a1b8 --- /dev/null +++ b/internal/storagev2/filter/constant_filter.go @@ -0,0 +1,151 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package filter + +import ( + "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/arrow/array" + "github.com/apache/arrow/go/v12/parquet" + "github.com/apache/arrow/go/v12/parquet/metadata" + "github.com/bits-and-blooms/bitset" +) + +type ConstantFilter struct { + cmpType ComparisonType + value interface{} + columnName string +} + +func (f *ConstantFilter) GetColumnName() string { + return f.columnName +} + +func (f *ConstantFilter) CheckStatistics(stats metadata.TypedStatistics) bool { + // FIXME: value may be int8/uint8/...., we should encapsulate the value type, now we just do type assertion for prototype + switch stats.Type() { + case parquet.Types.Int32: + i32stats := stats.(*metadata.Int32Statistics) + if i32stats.HasMinMax() { + return checkStats(f.value.(int32), i32stats.Min(), i32stats.Max(), f.cmpType) + } + case parquet.Types.Int64: + i64stats := stats.(*metadata.Int64Statistics) + if i64stats.HasMinMax() { + return checkStats(f.value.(int64), i64stats.Min(), i64stats.Max(), f.cmpType) + } + case parquet.Types.Float: + floatstats := stats.(*metadata.Float32Statistics) + if floatstats.HasMinMax() { + return checkStats(f.value.(float32), floatstats.Min(), floatstats.Max(), f.cmpType) + } + case parquet.Types.Double: + doublestats := stats.(*metadata.Float64Statistics) + if doublestats.HasMinMax() { + return checkStats(f.value.(float64), doublestats.Min(), doublestats.Max(), f.cmpType) + } + } + return false +} + +type comparableValue interface { + int32 | int64 | float32 | float64 +} + +func checkStats[T comparableValue](value, min, max T, cmpType ComparisonType) bool { + switch cmpType { + case Equal: + return value < min || value > max + case NotEqual: + return value == min && value == max + case LessThan: + return value <= min + case LessThanOrEqual: + return value < min + case GreaterThan: + return value >= max + case GreaterThanOrEqual: + return value > max + default: + return false + } +} + +func (f *ConstantFilter) Apply(colData arrow.Array, filterBitSet *bitset.BitSet) { + switch data := colData.(type) { + case *array.Int8: + filterColumn(f.value.(int8), data.Int8Values(), f.cmpType, filterBitSet) + case *array.Uint8: + filterColumn(f.value.(uint8), data.Uint8Values(), f.cmpType, filterBitSet) + case *array.Int16: + filterColumn(f.value.(int16), data.Int16Values(), f.cmpType, filterBitSet) + case *array.Uint16: + filterColumn(f.value.(uint16), data.Uint16Values(), f.cmpType, filterBitSet) + case *array.Int32: + filterColumn(f.value.(int32), data.Int32Values(), f.cmpType, filterBitSet) + case *array.Uint32: + filterColumn(f.value.(uint32), data.Uint32Values(), f.cmpType, filterBitSet) + case *array.Int64: + filterColumn(f.value.(int64), data.Int64Values(), f.cmpType, filterBitSet) + case *array.Uint64: + filterColumn(f.value.(uint64), data.Uint64Values(), f.cmpType, filterBitSet) + case *array.Float32: + filterColumn(f.value.(float32), data.Float32Values(), f.cmpType, filterBitSet) + case *array.Float64: + filterColumn(f.value.(float64), data.Float64Values(), f.cmpType, filterBitSet) + } +} + +type comparableColumnType interface { + int8 | uint8 | int16 | uint16 | int32 | uint32 | int64 | uint64 | float32 | float64 +} + +func filterColumn[T comparableColumnType](value T, targets []T, cmpType ComparisonType, filterBitSet *bitset.BitSet) { + for i, target := range targets { + if checkColumn(value, target, cmpType) { + filterBitSet.Set(uint(i)) + } + } +} + +func checkColumn[T comparableColumnType](value, target T, cmpType ComparisonType) bool { + switch cmpType { + case Equal: + return value != target + case NotEqual: + return value == target + case LessThan: + return value <= target + case LessThanOrEqual: + return value < target + case GreaterThan: + return value >= target + case GreaterThanOrEqual: + return value > target + default: + return false + } +} + +func (f *ConstantFilter) Type() FilterType { + return Constant +} + +func NewConstantFilter(cmpType ComparisonType, columnName string, value interface{}) *ConstantFilter { + return &ConstantFilter{ + cmpType: cmpType, + columnName: columnName, + value: value, + } +} diff --git a/internal/storagev2/filter/filter.go b/internal/storagev2/filter/filter.go new file mode 100644 index 0000000000000..4b803fcdd3352 --- /dev/null +++ b/internal/storagev2/filter/filter.go @@ -0,0 +1,48 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package filter + +import ( + "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/parquet/metadata" + "github.com/bits-and-blooms/bitset" +) + +type FilterType int8 + +const ( + And FilterType = iota + Or + Constant + Range +) + +type Filter interface { + CheckStatistics(metadata.TypedStatistics) bool + Type() FilterType + Apply(colData arrow.Array, filterBitSet *bitset.BitSet) + GetColumnName() string +} + +type ComparisonType int8 + +const ( + Equal ComparisonType = iota + NotEqual + LessThan + LessThanOrEqual + GreaterThan + GreaterThanOrEqual +) diff --git a/internal/storagev2/io/format/parquet/file_reader.go b/internal/storagev2/io/format/parquet/file_reader.go new file mode 100644 index 0000000000000..5e4b689758fb4 --- /dev/null +++ b/internal/storagev2/io/format/parquet/file_reader.go @@ -0,0 +1,220 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package parquet + +import ( + "context" + + "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/arrow/array" + "github.com/apache/arrow/go/v12/arrow/memory" + "github.com/apache/arrow/go/v12/parquet/file" + "github.com/apache/arrow/go/v12/parquet/metadata" + "github.com/apache/arrow/go/v12/parquet/pqarrow" + "github.com/bits-and-blooms/bitset" + + "github.com/milvus-io/milvus/internal/storagev2/common/constant" + "github.com/milvus-io/milvus/internal/storagev2/filter" + "github.com/milvus-io/milvus/internal/storagev2/io/fs" + "github.com/milvus-io/milvus/internal/storagev2/storage/options" +) + +type FileReader struct { + reader *pqarrow.FileReader + options *options.ReadOptions + recReader pqarrow.RecordReader +} + +// When the Reader reaches the end of the underlying stream, it returns (nil, io.EOF) +func (r *FileReader) Read() (arrow.Record, error) { + if r.recReader == nil { + // lazy init + if err := r.initRecReader(); err != nil { + return nil, err + } + } + rec, err := r.recReader.Read() + if err != nil { + return nil, err + } + + return applyFilters(rec, r.options.Filters), nil +} + +func applyFilters(rec arrow.Record, filters map[string]filter.Filter) arrow.Record { + filterBitSet := bitset.New(uint(rec.NumRows())) + for col, f := range filters { + colIndices := rec.Schema().FieldIndices(col) + if len(colIndices) == 0 { + panic("column not found") + } + colIndex := colIndices[0] + arr := rec.Column(colIndex) + f.Apply(arr, filterBitSet) + } + + if filterBitSet.None() { + return rec + } + + var cols []arrow.Array + for i := 0; i < int(rec.NumCols()); i++ { + col := rec.Column(i) + switch t := col.(type) { + case *array.Int8: + builder := array.NewInt8Builder(memory.DefaultAllocator) + filtered := filterRecord(t.Int8Values(), filterBitSet) + builder.AppendValues(filtered, nil) + cols = append(cols, builder.NewArray()) + case *array.Uint8: + builder := array.NewUint8Builder(memory.DefaultAllocator) + filtered := filterRecord(t.Uint8Values(), filterBitSet) + builder.AppendValues(filtered, nil) + cols = append(cols, builder.NewArray()) + case *array.Int16: + builder := array.NewInt16Builder(memory.DefaultAllocator) + filtered := filterRecord(t.Int16Values(), filterBitSet) + builder.AppendValues(filtered, nil) + cols = append(cols, builder.NewArray()) + case *array.Uint16: + builder := array.NewUint16Builder(memory.DefaultAllocator) + filtered := filterRecord(t.Uint16Values(), filterBitSet) + builder.AppendValues(filtered, nil) + cols = append(cols, builder.NewArray()) + case *array.Int32: + builder := array.NewInt32Builder(memory.DefaultAllocator) + filtered := filterRecord(t.Int32Values(), filterBitSet) + builder.AppendValues(filtered, nil) + cols = append(cols, builder.NewArray()) + case *array.Uint32: + builder := array.NewUint32Builder(memory.DefaultAllocator) + filtered := filterRecord(t.Uint32Values(), filterBitSet) + builder.AppendValues(filtered, nil) + cols = append(cols, builder.NewArray()) + case *array.Int64: + builder := array.NewInt64Builder(memory.DefaultAllocator) + filtered := filterRecord(t.Int64Values(), filterBitSet) + builder.AppendValues(filtered, nil) + cols = append(cols, builder.NewArray()) + case *array.Uint64: + builder := array.NewUint64Builder(memory.DefaultAllocator) + filtered := filterRecord(t.Uint64Values(), filterBitSet) + builder.AppendValues(filtered, nil) + cols = append(cols, builder.NewArray()) + default: + panic("unsupported type") + } + } + + return array.NewRecord(rec.Schema(), cols, int64(cols[0].Len())) +} + +type comparableColumnType interface { + int8 | uint8 | int16 | uint16 | int32 | uint32 | int64 | uint64 | float32 | float64 +} + +func filterRecord[T comparableColumnType](targets []T, filterBitSet *bitset.BitSet) []T { + var res []T + for i := 0; i < int(filterBitSet.Len()); i++ { + if !filterBitSet.Test(uint(i)) { + res = append(res, targets[i]) + } + } + return res +} + +func (r *FileReader) initRecReader() error { + var ( + filters map[string]filter.Filter = r.options.Filters + columns []string = r.options.Columns + ) + + var ( + rowGroupNum int = r.reader.ParquetReader().NumRowGroups() + fileMetaData *metadata.FileMetaData = r.reader.ParquetReader().MetaData() + ) + + var rowGroups []int + var colIndices []int + // filters check column statistics +x1: + for i := 0; i < rowGroupNum; i++ { + rowGroupMetaData := fileMetaData.RowGroup(i) + for col, filter := range filters { + if checkColumnStats(rowGroupMetaData, col, filter) { + // ignore the row group + break x1 + } + } + rowGroups = append(rowGroups, i) + } + + for _, col := range columns { + colIndex := fileMetaData.Schema.Root().FieldIndexByName(col) + if colIndex == -1 { + panic("column not found") + } + colIndices = append(colIndices, colIndex) + } + + recReader, err := r.reader.GetRecordReader(context.TODO(), colIndices, rowGroups) + if err != nil { + return err + } + r.recReader = recReader + return nil +} + +func checkColumnStats(rowGroupMetaData *metadata.RowGroupMetaData, col string, f filter.Filter) bool { + colIndex := rowGroupMetaData.Schema.Root().FieldIndexByName(col) + if colIndex == -1 { + panic("column not found") + } + colMetaData, err := rowGroupMetaData.ColumnChunk(colIndex) + if err != nil { + panic(err) + } + + stats, err := colMetaData.Statistics() + if err != nil || stats == nil { + return false + } + return f.CheckStatistics(stats) +} + +func (r *FileReader) Close() error { + if r.recReader != nil { + r.recReader.Release() + } + return nil +} + +func NewFileReader(fs fs.Fs, filePath string, options *options.ReadOptions) (*FileReader, error) { + f, err := fs.OpenFile(filePath) + if err != nil { + return nil, err + } + + parquetReader, err := file.NewParquetReader(f) + if err != nil { + return nil, err + } + + reader, err := pqarrow.NewFileReader(parquetReader, pqarrow.ArrowReadProperties{BatchSize: constant.ReadBatchSize}, memory.DefaultAllocator) + if err != nil { + return nil, err + } + return &FileReader{reader: reader, options: options}, nil +} diff --git a/internal/storagev2/io/format/parquet/file_writer.go b/internal/storagev2/io/format/parquet/file_writer.go new file mode 100644 index 0000000000000..2132343ca06a9 --- /dev/null +++ b/internal/storagev2/io/format/parquet/file_writer.go @@ -0,0 +1,61 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package parquet + +import ( + "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/parquet" + "github.com/apache/arrow/go/v12/parquet/pqarrow" + + "github.com/milvus-io/milvus/internal/storagev2/io/format" + "github.com/milvus-io/milvus/internal/storagev2/io/fs" +) + +var _ format.Writer = (*FileWriter)(nil) + +type FileWriter struct { + writer *pqarrow.FileWriter + count int64 +} + +func (f *FileWriter) Write(record arrow.Record) error { + if err := f.writer.Write(record); err != nil { + return err + } + f.count += record.NumRows() + return nil +} + +func (f *FileWriter) Count() int64 { + return f.count +} + +func (f *FileWriter) Close() error { + return f.writer.Close() +} + +func NewFileWriter(schema *arrow.Schema, fs fs.Fs, filePath string) (*FileWriter, error) { + file, err := fs.OpenFile(filePath) + if err != nil { + return nil, err + } + + w, err := pqarrow.NewFileWriter(schema, file, parquet.NewWriterProperties(), pqarrow.DefaultWriterProps()) + if err != nil { + return nil, err + } + + return &FileWriter{writer: w}, nil +} diff --git a/internal/storagev2/io/format/reader.go b/internal/storagev2/io/format/reader.go new file mode 100644 index 0000000000000..7907020b29da6 --- /dev/null +++ b/internal/storagev2/io/format/reader.go @@ -0,0 +1,24 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package format + +import ( + "github.com/apache/arrow/go/v12/arrow" +) + +type Reader interface { + Read() (arrow.Record, error) + Close() error +} diff --git a/internal/storagev2/io/format/writer.go b/internal/storagev2/io/format/writer.go new file mode 100644 index 0000000000000..04ec6df738e90 --- /dev/null +++ b/internal/storagev2/io/format/writer.go @@ -0,0 +1,23 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package format + +import "github.com/apache/arrow/go/v12/arrow" + +type Writer interface { + Write(record arrow.Record) error + Count() int64 + Close() error +} diff --git a/internal/storagev2/io/fs/factory.go b/internal/storagev2/io/fs/factory.go new file mode 100644 index 0000000000000..7271115ec7167 --- /dev/null +++ b/internal/storagev2/io/fs/factory.go @@ -0,0 +1,40 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fs + +import ( + "net/url" + + "github.com/milvus-io/milvus/internal/storagev2/storage/options" +) + +type Factory struct{} + +func (f *Factory) Create(fsType options.FsType, uri *url.URL) (Fs, error) { + switch fsType { + case options.InMemory: + return NewMemoryFs(), nil + case options.LocalFS: + return NewLocalFs(uri), nil + case options.S3: + return NewMinioFs(uri) + default: + panic("unknown fs type") + } +} + +func NewFsFactory() *Factory { + return &Factory{} +} diff --git a/internal/storagev2/io/fs/file/file.go b/internal/storagev2/io/fs/file/file.go new file mode 100644 index 0000000000000..d29729a587449 --- /dev/null +++ b/internal/storagev2/io/fs/file/file.go @@ -0,0 +1,25 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package file + +import "io" + +type File interface { + io.Writer + io.ReaderAt + io.Seeker + io.Reader + io.Closer +} diff --git a/internal/storagev2/io/fs/file/local_file.go b/internal/storagev2/io/fs/file/local_file.go new file mode 100644 index 0000000000000..035c411c3718a --- /dev/null +++ b/internal/storagev2/io/fs/file/local_file.go @@ -0,0 +1,52 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package file + +import ( + "io" + "os" +) + +var EOF = io.EOF + +type LocalFile struct { + file os.File +} + +func (l *LocalFile) Read(p []byte) (n int, err error) { + return l.file.Read(p) +} + +func (l *LocalFile) Write(p []byte) (n int, err error) { + return l.file.Write(p) +} + +func (l *LocalFile) ReadAt(p []byte, off int64) (n int, err error) { + return l.file.ReadAt(p, off) +} + +func (l *LocalFile) Seek(offset int64, whence int) (int64, error) { + return l.file.Seek(offset, whence) +} + +func (l *LocalFile) Close() error { + return l.file.Close() +} + +func NewLocalFile(f *os.File) *LocalFile { + return &LocalFile{ + file: *f, + } +} diff --git a/internal/storagev2/io/fs/file/memory_file.go b/internal/storagev2/io/fs/file/memory_file.go new file mode 100644 index 0000000000000..3e0ebfa963854 --- /dev/null +++ b/internal/storagev2/io/fs/file/memory_file.go @@ -0,0 +1,116 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package file + +import ( + "io" + + "github.com/cockroachdb/errors" +) + +var errInvalid = errors.New("invalid argument") + +type MemoryFile struct { + b []byte + i int +} + +func (f *MemoryFile) Close() error { + return nil +} + +func (f *MemoryFile) Read(p []byte) (n int, err error) { + if f.i >= len(f.b) { + return 0, io.EOF + } + n = copy(p, f.b[f.i:]) + f.i += n + return n, nil +} + +func (f *MemoryFile) Write(b []byte) (int, error) { + n, err := f.writeAt(b, int64(f.i)) + f.i += n + return n, err +} + +func (f *MemoryFile) writeAt(b []byte, off int64) (int, error) { + if off < 0 || int64(int(off)) < off { + return 0, errInvalid + } + if off > int64(len(f.b)) { + f.truncate(off) + } + n := copy(f.b[off:], b) + f.b = append(f.b, b[n:]...) + return len(b), nil +} + +func (f *MemoryFile) truncate(n int64) error { + switch { + case n < 0 || int64(int(n)) < n: + return errInvalid + case n <= int64(len(f.b)): + f.b = f.b[:n] + return nil + default: + f.b = append(f.b, make([]byte, int(n)-len(f.b))...) + return nil + } +} + +func (f *MemoryFile) ReadAt(b []byte, off int64) (n int, err error) { + if off < 0 || int64(int(off)) < off { + return 0, errInvalid + } + if off > int64(len(f.b)) { + return 0, io.EOF + } + n = copy(b, f.b[off:]) + f.i += n + if n < len(b) { + return n, io.EOF + } + return n, nil +} + +func (f *MemoryFile) Seek(offset int64, whence int) (int64, error) { + var abs int64 + switch whence { + case io.SeekStart: + abs = offset + case io.SeekCurrent: + abs = int64(f.i) + offset + case io.SeekEnd: + abs = int64(len(f.b)) + offset + default: + return 0, errInvalid + } + if abs < 0 { + return 0, errInvalid + } + f.i = int(abs) + return abs, nil +} + +func (f *MemoryFile) Bytes() []byte { + return f.b +} + +func NewMemoryFile(b []byte) *MemoryFile { + return &MemoryFile{ + b: b, + } +} diff --git a/internal/storagev2/io/fs/file/minio_file.go b/internal/storagev2/io/fs/file/minio_file.go new file mode 100644 index 0000000000000..b2967aa557db2 --- /dev/null +++ b/internal/storagev2/io/fs/file/minio_file.go @@ -0,0 +1,73 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package file + +import ( + "bytes" + "context" + + "github.com/minio/minio-go/v7" +) + +var _ File = (*MinioFile)(nil) + +type MinioFile struct { + *minio.Object + writer *MemoryFile + client *minio.Client + fileName string + bucketName string +} + +func (f *MinioFile) Write(b []byte) (int, error) { + return f.writer.Write(b) +} + +func (f *MinioFile) Close() error { + if len(f.writer.b) == 0 { + return nil + } + _, err := f.client.PutObject(context.TODO(), f.bucketName, f.fileName, bytes.NewReader(f.writer.b), int64(len(f.writer.b)), minio.PutObjectOptions{}) + return err +} + +func NewMinioFile(client *minio.Client, fileName string, bucketName string) (*MinioFile, error) { + _, err := client.StatObject(context.TODO(), bucketName, fileName, minio.StatObjectOptions{}) + if err != nil { + eresp := minio.ToErrorResponse(err) + if eresp.Code != "NoSuchKey" { + return nil, err + } + return &MinioFile{ + writer: NewMemoryFile(nil), + client: client, + fileName: fileName, + bucketName: bucketName, + }, nil + } + + object, err := client.GetObject(context.TODO(), bucketName, fileName, minio.GetObjectOptions{}) + if err != nil { + return nil, err + } + + return &MinioFile{ + Object: object, + writer: NewMemoryFile(nil), + client: client, + fileName: fileName, + bucketName: bucketName, + }, nil +} diff --git a/internal/storagev2/io/fs/fs.go b/internal/storagev2/io/fs/fs.go new file mode 100644 index 0000000000000..ef605de4da3d0 --- /dev/null +++ b/internal/storagev2/io/fs/fs.go @@ -0,0 +1,34 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fs + +import ( + "github.com/milvus-io/milvus/internal/storagev2/io/fs/file" +) + +type Fs interface { + OpenFile(path string) (file.File, error) + Rename(src string, dst string) error + DeleteFile(path string) error + CreateDir(path string) error + List(path string) ([]FileEntry, error) + ReadFile(path string) ([]byte, error) + Exist(path string) (bool, error) + Path() string + MkdirAll(dir string, i int) error +} +type FileEntry struct { + Path string +} diff --git a/internal/storagev2/io/fs/fs_util.go b/internal/storagev2/io/fs/fs_util.go new file mode 100644 index 0000000000000..117923f6d8d52 --- /dev/null +++ b/internal/storagev2/io/fs/fs_util.go @@ -0,0 +1,42 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fs + +import ( + "fmt" + "net/url" + + "github.com/cockroachdb/errors" + + "github.com/milvus-io/milvus/internal/storagev2/storage/options" +) + +var ErrInvalidFsType = errors.New("invalid fs type") + +func BuildFileSystem(uri string) (Fs, error) { + parsedURI, err := url.Parse(uri) + if err != nil { + return nil, fmt.Errorf("build file system with uri %s: %w", uri, err) + } + switch parsedURI.Scheme { + case "file": + return NewFsFactory().Create(options.LocalFS, parsedURI) + case "s3": + return NewFsFactory().Create(options.S3, parsedURI) + + default: + return nil, fmt.Errorf("build file system with uri %s: %w", uri, ErrInvalidFsType) + } +} diff --git a/internal/storagev2/io/fs/local_fs.go b/internal/storagev2/io/fs/local_fs.go new file mode 100644 index 0000000000000..a08a82f610d3a --- /dev/null +++ b/internal/storagev2/io/fs/local_fs.go @@ -0,0 +1,95 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fs + +import ( + "net/url" + "os" + "path/filepath" + + "github.com/milvus-io/milvus/internal/storagev2/common/log" + "github.com/milvus-io/milvus/internal/storagev2/io/fs/file" +) + +type LocalFS struct { + path string +} + +func (l *LocalFS) MkdirAll(dir string, i int) error { + return os.MkdirAll(dir, os.FileMode(i)) +} + +func (l *LocalFS) OpenFile(path string) (file.File, error) { + // Extract the directory from the path + dir := filepath.Dir(path) + // Create the directory (including all necessary parent directories) + err := os.MkdirAll(dir, os.ModePerm) + if err != nil { + return nil, err + } + open, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0o666) + if err != nil { + return nil, err + } + return file.NewLocalFile(open), nil +} + +// Rename renames (moves) a file. If newpath already exists and is not a directory, Rename replaces it. +func (l *LocalFS) Rename(src string, dst string) error { + return os.Rename(src, dst) +} + +func (l *LocalFS) DeleteFile(path string) error { + return os.Remove(path) +} + +func (l *LocalFS) CreateDir(path string) error { + err := os.MkdirAll(path, os.ModePerm) + if err != nil && !os.IsExist(err) { + log.Error(err.Error()) + } + return nil +} + +func (l *LocalFS) List(path string) ([]FileEntry, error) { + entries, err := os.ReadDir(path) + if err != nil { + log.Error(err.Error()) + return nil, err + } + + ret := make([]FileEntry, 0, len(entries)) + for _, entry := range entries { + ret = append(ret, FileEntry{Path: filepath.Join(path, entry.Name())}) + } + + return ret, nil +} + +func (l *LocalFS) ReadFile(path string) ([]byte, error) { + return os.ReadFile(path) +} + +func (l *LocalFS) Exist(path string) (bool, error) { + panic("not implemented") +} + +func (l *LocalFS) Path() string { + return l.path +} + +func NewLocalFs(uri *url.URL) *LocalFS { + return &LocalFS{uri.Path} +} diff --git a/internal/storagev2/io/fs/memory_fs.go b/internal/storagev2/io/fs/memory_fs.go new file mode 100644 index 0000000000000..5bebc3c40f13d --- /dev/null +++ b/internal/storagev2/io/fs/memory_fs.go @@ -0,0 +1,78 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fs + +import ( + "github.com/milvus-io/milvus/internal/storagev2/io/fs/file" +) + +type MemoryFs struct { + files map[string]*file.MemoryFile +} + +func (m *MemoryFs) MkdirAll(dir string, i int) error { + // TODO implement me + panic("implement me") +} + +func (m *MemoryFs) List(path string) ([]FileEntry, error) { + // TODO implement me + panic("implement me") +} + +func (m *MemoryFs) OpenFile(path string) (file.File, error) { + if f, ok := m.files[path]; ok { + return file.NewMemoryFile(f.Bytes()), nil + } + f := file.NewMemoryFile(nil) + m.files[path] = f + return f, nil +} + +func (m *MemoryFs) Rename(path string, path2 string) error { + if _, ok := m.files[path]; !ok { + return nil + } + m.files[path2] = m.files[path] + delete(m.files, path) + return nil +} + +func (m *MemoryFs) DeleteFile(path string) error { + delete(m.files, path) + return nil +} + +func (m *MemoryFs) CreateDir(path string) error { + return nil +} + +func (m *MemoryFs) ReadFile(path string) ([]byte, error) { + panic("implement me") +} + +func (m *MemoryFs) Exist(path string) (bool, error) { + panic("not implemented") +} + +func (m *MemoryFs) Path() string { + panic("not implemented") +} + +func NewMemoryFs() *MemoryFs { + return &MemoryFs{ + files: make(map[string]*file.MemoryFile), + } +} diff --git a/internal/storagev2/io/fs/minio_fs.go b/internal/storagev2/io/fs/minio_fs.go new file mode 100644 index 0000000000000..bd7ce235377e6 --- /dev/null +++ b/internal/storagev2/io/fs/minio_fs.go @@ -0,0 +1,201 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fs + +import ( + "context" + "fmt" + "io" + "net/url" + "path" + "strings" + + "github.com/minio/minio-go/v7" + "github.com/minio/minio-go/v7/pkg/credentials" + "go.uber.org/zap" + + "github.com/milvus-io/milvus/internal/storagev2/common/constant" + "github.com/milvus-io/milvus/internal/storagev2/common/errors" + "github.com/milvus-io/milvus/internal/storagev2/common/log" + "github.com/milvus-io/milvus/internal/storagev2/io/fs/file" +) + +type MinioFs struct { + client *minio.Client + bucketName string + path string +} + +func (fs *MinioFs) MkdirAll(dir string, i int) error { + // TODO implement me + panic("implement me") +} + +func (fs *MinioFs) OpenFile(path string) (file.File, error) { + err, bucket, path := getRealPath(path) + if err != nil { + return nil, err + } + return file.NewMinioFile(fs.client, path, bucket) +} + +func (fs *MinioFs) Rename(src string, dst string) error { + err, dstBucket, dst := getRealPath(dst) + if err != nil { + return err + } + err, srcBucket, src := getRealPath(src) + if err != nil { + return err + } + _, err = fs.client.CopyObject(context.TODO(), minio.CopyDestOptions{Bucket: dstBucket, Object: dst}, minio.CopySrcOptions{Bucket: srcBucket, Object: src}) + if err != nil { + return err + } + err = fs.client.RemoveObject(context.TODO(), srcBucket, src, minio.RemoveObjectOptions{}) + if err != nil { + log.Warn("failed to remove source object", log.String("source", src)) + } + return nil +} + +func (fs *MinioFs) DeleteFile(path string) error { + err, bucket, path := getRealPath(path) + if err != nil { + return err + } + return fs.client.RemoveObject(context.TODO(), bucket, path, minio.RemoveObjectOptions{}) +} + +func (fs *MinioFs) CreateDir(path string) error { + return nil +} + +func (fs *MinioFs) List(prefix string) ([]FileEntry, error) { + err, bucket, prefix := getRealPath(prefix) + if err != nil { + return nil, err + } + ret := make([]FileEntry, 0) + for objInfo := range fs.client.ListObjects(context.TODO(), bucket, minio.ListObjectsOptions{Prefix: prefix, Recursive: true}) { + if objInfo.Err != nil { + log.Warn("list object error", zap.Error(objInfo.Err)) + return nil, objInfo.Err + } + ret = append(ret, FileEntry{Path: path.Join(bucket, objInfo.Key)}) + } + return ret, nil +} + +func (fs *MinioFs) ReadFile(path string) ([]byte, error) { + err, bucket, path := getRealPath(path) + if err != nil { + return nil, err + } + obj, err := fs.client.GetObject(context.TODO(), bucket, path, minio.GetObjectOptions{}) + if err != nil { + return nil, err + } + + stat, err := obj.Stat() + if err != nil { + return nil, err + } + + buf := make([]byte, stat.Size) + n, err := obj.Read(buf) + if err != nil && err != io.EOF { + return nil, err + } + if n != int(stat.Size) { + return nil, fmt.Errorf("failed to read full file, expect: %d, actual: %d", stat.Size, n) + } + return buf, nil +} + +func (fs *MinioFs) Exist(path string) (bool, error) { + err, bucket, path := getRealPath(path) + if err != nil { + return false, err + } + _, err = fs.client.StatObject(context.TODO(), bucket, path, minio.StatObjectOptions{}) + if err != nil { + resp := minio.ToErrorResponse(err) + if resp.Code == "NoSuchKey" { + return false, nil + } + return false, err + } + return true, nil +} + +func (fs *MinioFs) Path() string { + return path.Join(fs.bucketName, strings.TrimPrefix(fs.path, "/")) +} + +// uri should be s3://username:password@bucket/path?endpoint_override=localhost%3A9000 +func NewMinioFs(uri *url.URL) (*MinioFs, error) { + accessKey := uri.User.Username() + secretAccessKey, set := uri.User.Password() + if !set { + log.Warn("secret access key not set") + } + + endpoints, ok := uri.Query()[constant.EndpointOverride] + if !ok || len(endpoints) == 0 { + return nil, errors.ErrNoEndpoint + } + + cli, err := minio.New(endpoints[0], &minio.Options{ + BucketLookup: minio.BucketLookupAuto, + Creds: credentials.NewStaticV4(accessKey, secretAccessKey, ""), + }) + if err != nil { + return nil, err + } + + bucket := uri.Host + path := uri.Path + + log.Info("minio fs infos", zap.String("endpoint", endpoints[0]), zap.String("bucket", bucket), zap.String("path", path)) + + exist, err := cli.BucketExists(context.TODO(), bucket) + if err != nil { + return nil, err + } + + if !exist { + if err = cli.MakeBucket(context.TODO(), bucket, minio.MakeBucketOptions{}); err != nil { + return nil, err + } + } + + return &MinioFs{ + client: cli, + bucketName: bucket, + path: path, + }, nil +} + +func getRealPath(path string) (error, string, string) { + if strings.HasPrefix(path, "/") { + return fmt.Errorf("Invalid path, %s should not start with '/'", path), "", "" + } + words := strings.SplitN(path, "/", 2) + if (len(words)) != 2 { + return fmt.Errorf("Invalid path, %s should contains at least one '/'", path), "", "" + } + return nil, words[0], words[1] +} diff --git a/internal/storagev2/packed/arrow/c/abi.h b/internal/storagev2/packed/arrow/c/abi.h new file mode 100644 index 0000000000000..002e3d8c57786 --- /dev/null +++ b/internal/storagev2/packed/arrow/c/abi.h @@ -0,0 +1,95 @@ + +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef ARROW_C_DATA_INTERFACE +#define ARROW_C_DATA_INTERFACE + +#define ARROW_FLAG_DICTIONARY_ORDERED 1 +#define ARROW_FLAG_NULLABLE 2 +#define ARROW_FLAG_MAP_KEYS_SORTED 4 + +struct ArrowSchema { + // Array type description + const char* format; + const char* name; + const char* metadata; + int64_t flags; + int64_t n_children; + struct ArrowSchema** children; + struct ArrowSchema* dictionary; + + // Release callback + void (*release)(struct ArrowSchema*); + // Opaque producer-specific data + void* private_data; +}; + +struct ArrowArray { + // Array data description + int64_t length; + int64_t null_count; + int64_t offset; + int64_t n_buffers; + int64_t n_children; + const void** buffers; + struct ArrowArray** children; + struct ArrowArray* dictionary; + + // Release callback + void (*release)(struct ArrowArray*); + // Opaque producer-specific data + void* private_data; +}; + +#endif // ARROW_C_DATA_INTERFACE + +#ifndef ARROW_C_STREAM_INTERFACE +#define ARROW_C_STREAM_INTERFACE + +struct ArrowArrayStream { + // Callback to get the stream type + // (will be the same for all arrays in the stream). + // + // Return value: 0 if successful, an `errno`-compatible error code otherwise. + // + // If successful, the ArrowSchema must be released independently from the stream. + int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out); + + // Callback to get the next array + // (if no error and the array is released, the stream has ended) + // + // Return value: 0 if successful, an `errno`-compatible error code otherwise. + // + // If successful, the ArrowArray must be released independently from the stream. + int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out); + + // Callback to get optional detailed error information. + // This must only be called if the last stream operation failed + // with a non-0 return code. + // + // Return value: pointer to a null-terminated character array describing + // the last error, or NULL if no description is available. + // + // The returned pointer is only valid until the next operation on this stream + // (including release). + const char* (*get_last_error)(struct ArrowArrayStream*); + + // Release callback: release the stream's own resources. + // Note that arrays returned by `get_next` must be individually released. + void (*release)(struct ArrowArrayStream*); + + // Opaque producer-specific data + void* private_data; +}; + +#endif // ARROW_C_STREAM_INTERFACE + +#ifdef __cplusplus +} +#endif diff --git a/internal/storagev2/packed/arrow/c/helpers.h b/internal/storagev2/packed/arrow/c/helpers.h new file mode 100644 index 0000000000000..aa33d7ada4019 --- /dev/null +++ b/internal/storagev2/packed/arrow/c/helpers.h @@ -0,0 +1,115 @@ + +#pragma once + +#include +#include + +#include "arrow/c/abi.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/// Query whether the C schema is released +static inline int +ArrowSchemaIsReleased(const struct ArrowSchema* schema) { + return schema->release == NULL; +} + +/// Mark the C schema released (for use in release callbacks) +static inline void +ArrowSchemaMarkReleased(struct ArrowSchema* schema) { + schema->release = NULL; +} + +/// Move the C schema from `src` to `dest` +/// +/// Note `dest` must *not* point to a valid schema already, otherwise there +/// will be a memory leak. +static inline void +ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dest) { + assert(dest != src); + assert(!ArrowSchemaIsReleased(src)); + memcpy(dest, src, sizeof(struct ArrowSchema)); + ArrowSchemaMarkReleased(src); +} + +/// Release the C schema, if necessary, by calling its release callback +static inline void +ArrowSchemaRelease(struct ArrowSchema* schema) { + if (!ArrowSchemaIsReleased(schema)) { + schema->release(schema); + assert(ArrowSchemaIsReleased(schema)); + } +} + +/// Query whether the C array is released +static inline int +ArrowArrayIsReleased(const struct ArrowArray* array) { + return array->release == NULL; +} + +/// Mark the C array released (for use in release callbacks) +static inline void +ArrowArrayMarkReleased(struct ArrowArray* array) { + array->release = NULL; +} + +/// Move the C array from `src` to `dest` +/// +/// Note `dest` must *not* point to a valid array already, otherwise there +/// will be a memory leak. +static inline void +ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dest) { + assert(dest != src); + assert(!ArrowArrayIsReleased(src)); + memcpy(dest, src, sizeof(struct ArrowArray)); + ArrowArrayMarkReleased(src); +} + +/// Release the C array, if necessary, by calling its release callback +static inline void +ArrowArrayRelease(struct ArrowArray* array) { + if (!ArrowArrayIsReleased(array)) { + array->release(array); + assert(ArrowArrayIsReleased(array)); + } +} + +/// Query whether the C array stream is released +static inline int +ArrowArrayStreamIsReleased(const struct ArrowArrayStream* stream) { + return stream->release == NULL; +} + +/// Mark the C array stream released (for use in release callbacks) +static inline void +ArrowArrayStreamMarkReleased(struct ArrowArrayStream* stream) { + stream->release = NULL; +} + +/// Move the C array stream from `src` to `dest` +/// +/// Note `dest` must *not* point to a valid stream already, otherwise there +/// will be a memory leak. +static inline void +ArrowArrayStreamMove(struct ArrowArrayStream* src, + struct ArrowArrayStream* dest) { + assert(dest != src); + assert(!ArrowArrayStreamIsReleased(src)); + memcpy(dest, src, sizeof(struct ArrowArrayStream)); + ArrowArrayStreamMarkReleased(src); +} + +/// Release the C array stream, if necessary, by calling its release callback +static inline void +ArrowArrayStreamRelease(struct ArrowArrayStream* stream) { + if (!ArrowArrayStreamIsReleased(stream)) { + stream->release(stream); + assert(ArrowArrayStreamIsReleased(stream)); + } +} + +#ifdef __cplusplus +} +#endif diff --git a/internal/storagev2/packed/packed_reader.go b/internal/storagev2/packed/packed_reader.go new file mode 100644 index 0000000000000..8623ea3b312f8 --- /dev/null +++ b/internal/storagev2/packed/packed_reader.go @@ -0,0 +1,84 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package packed + +/* +#cgo pkg-config: milvus_core + +#include +#include "segcore/packed_reader_c.h" +#include "arrow/c/abi.h" +#include "arrow/c/helpers.h" +*/ +import "C" + +import ( + "fmt" + "unsafe" + + "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/arrow/cdata" + "github.com/cockroachdb/errors" +) + +func NewPackedReader(path string, schema *arrow.Schema, bufferSize int) (*PackedReader, error) { + var cas cdata.CArrowSchema + cdata.ExportArrowSchema(schema, &cas) + cSchema := (*C.struct_ArrowSchema)(unsafe.Pointer(&cas)) + + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + + cBufferSize := C.int64_t(bufferSize) + + var cPackedReader C.CPackedReader + status := C.NewPackedReader(cPath, cSchema, cBufferSize, &cPackedReader) + if status != 0 { + return nil, fmt.Errorf("failed to new packed reader: %s, status: %d", path, status) + } + return &PackedReader{cPackedReader: cPackedReader, schema: schema}, nil +} + +func (pr *PackedReader) ReadNext() (arrow.Record, error) { + var cArr C.CArrowArray + var cSchema C.CArrowSchema + status := C.ReadNext(pr.cPackedReader, &cArr, &cSchema) + if status != 0 { + return nil, fmt.Errorf("ReadNext failed with error code %d", status) + } + + if cArr == nil { + return nil, nil // end of stream, no more records to read + } + + // Convert ArrowArray to Go RecordBatch using cdata + goCArr := (*cdata.CArrowArray)(unsafe.Pointer(cArr)) + goCSchema := (*cdata.CArrowSchema)(unsafe.Pointer(cSchema)) + recordBatch, err := cdata.ImportCRecordBatch(goCArr, goCSchema) + if err != nil { + return nil, fmt.Errorf("failed to convert ArrowArray to Record: %w", err) + } + + // Return the RecordBatch as an arrow.Record + return recordBatch, nil +} + +func (pr *PackedReader) Close() error { + status := C.CloseReader(pr.cPackedReader) + if status != 0 { + return errors.New("PackedReader: failed to close file") + } + return nil +} diff --git a/internal/storagev2/packed/packed_test.go b/internal/storagev2/packed/packed_test.go new file mode 100644 index 0000000000000..3043e0a80fbd2 --- /dev/null +++ b/internal/storagev2/packed/packed_test.go @@ -0,0 +1,156 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package packed + +import ( + "testing" + + "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/arrow/array" + "github.com/apache/arrow/go/v12/arrow/memory" + "github.com/stretchr/testify/suite" + "golang.org/x/exp/rand" +) + +func TestPackedReadAndWrite(t *testing.T) { + suite.Run(t, new(PackedTestSuite)) +} + +type PackedTestSuite struct { + suite.Suite + schema *arrow.Schema + rec arrow.Record +} + +func (suite *PackedTestSuite) SetupTest() { + schema := arrow.NewSchema([]arrow.Field{ + {Name: "a", Type: arrow.PrimitiveTypes.Int32}, + {Name: "b", Type: arrow.PrimitiveTypes.Int64}, + {Name: "c", Type: arrow.BinaryTypes.String}, + }, nil) + suite.schema = schema + + b := array.NewRecordBuilder(memory.DefaultAllocator, schema) + defer b.Release() + for idx := range schema.Fields() { + switch idx { + case 0: + b.Field(idx).(*array.Int32Builder).AppendValues( + []int32{int32(1), int32(2), int32(3)}, nil, + ) + case 1: + b.Field(idx).(*array.Int64Builder).AppendValues( + []int64{int64(4), int64(5), int64(6)}, nil, + ) + case 2: + b.Field(idx).(*array.StringBuilder).AppendValues( + []string{"a", "b", "c"}, nil, + ) + } + } + rec := b.NewRecord() + suite.rec = rec +} + +func (suite *PackedTestSuite) TestPackedOneFile() { + batches := 100 + + path := "/tmp" + bufferSize := 10 * 1024 * 1024 // 10MB + pw, err := NewPackedWriter(path, suite.schema, bufferSize) + suite.NoError(err) + for i := 0; i < batches; i++ { + err = pw.WriteRecordBatch(suite.rec) + suite.NoError(err) + } + err = pw.Close() + suite.NoError(err) + + reader, err := NewPackedReader(path, suite.schema, bufferSize) + suite.NoError(err) + rr, err := reader.ReadNext() + suite.NoError(err) + defer rr.Release() + suite.Equal(int64(3*batches), rr.NumRows()) +} + +func (suite *PackedTestSuite) TestPackedMultiFiles() { + batches := 1000 + + b := array.NewRecordBuilder(memory.DefaultAllocator, suite.schema) + strLen := 1000 + arrLen := 30 + defer b.Release() + for idx := range suite.schema.Fields() { + switch idx { + case 0: + values := make([]int32, arrLen) + for i := 0; i < arrLen; i++ { + values[i] = int32(i + 1) + } + b.Field(idx).(*array.Int32Builder).AppendValues(values, nil) + case 1: + values := make([]int64, arrLen) + for i := 0; i < arrLen; i++ { + values[i] = int64(i + 1) + } + b.Field(idx).(*array.Int64Builder).AppendValues(values, nil) + case 2: + values := make([]string, arrLen) + for i := 0; i < arrLen; i++ { + values[i] = randomString(strLen) + } + b.Field(idx).(*array.StringBuilder).AppendValues(values, nil) + } + } + rec := b.NewRecord() + defer rec.Release() + path := "/tmp" + bufferSize := 10 * 1024 * 1024 // 10MB + pw, err := NewPackedWriter(path, suite.schema, bufferSize) + suite.NoError(err) + for i := 0; i < batches; i++ { + err = pw.WriteRecordBatch(rec) + suite.NoError(err) + } + err = pw.Close() + suite.NoError(err) + + reader, err := NewPackedReader(path, suite.schema, bufferSize) + suite.NoError(err) + var rows int64 = 0 + var rr arrow.Record + for { + rr, err = reader.ReadNext() + suite.NoError(err) + if rr == nil { + // end of file + break + } + + rows += rr.NumRows() + } + + suite.Equal(int64(arrLen*batches), rows) +} + +func randomString(length int) string { + const charset = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + result := make([]byte, length) + for i := range result { + result[i] = charset[rand.Intn(len(charset))] + } + return string(result) +} diff --git a/internal/storagev2/packed/packed_writer.go b/internal/storagev2/packed/packed_writer.go new file mode 100644 index 0000000000000..bca82da0a1cf9 --- /dev/null +++ b/internal/storagev2/packed/packed_writer.go @@ -0,0 +1,77 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package packed + +/* +#cgo pkg-config: milvus_core + +#include +#include "segcore/packed_writer_c.h" +#include "arrow/c/abi.h" +#include "arrow/c/helpers.h" +*/ +import "C" + +import ( + "fmt" + "unsafe" + + "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/arrow/cdata" + "github.com/cockroachdb/errors" +) + +func NewPackedWriter(path string, schema *arrow.Schema, bufferSize int) (*PackedWriter, error) { + var cas cdata.CArrowSchema + cdata.ExportArrowSchema(schema, &cas) + cSchema := (*C.struct_ArrowSchema)(unsafe.Pointer(&cas)) + + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + + cBufferSize := C.int64_t(bufferSize) + + var cPackedWriter C.CPackedWriter + status := C.NewPackedWriter(cPath, cSchema, cBufferSize, &cPackedWriter) + if status != 0 { + return nil, fmt.Errorf("failed to new packed writer: %s, status: %d", path, status) + } + return &PackedWriter{cPackedWriter: cPackedWriter}, nil +} + +func (pw *PackedWriter) WriteRecordBatch(recordBatch arrow.Record) error { + var caa cdata.CArrowArray + var cas cdata.CArrowSchema + + cdata.ExportArrowRecordBatch(recordBatch, &caa, &cas) + + cArr := (*C.struct_ArrowArray)(unsafe.Pointer(&caa)) + cSchema := (*C.struct_ArrowSchema)(unsafe.Pointer(&cas)) + + status := C.WriteRecordBatch(pw.cPackedWriter, cArr, cSchema) + if status != 0 { + return errors.New("PackedWriter: failed to write record batch") + } + + return nil +} + +func (pw *PackedWriter) Close() error { + status := C.CloseWriter(pw.cPackedWriter) + if status != 0 { + return errors.New("PackedWriter: failed to close file") + } + return nil +} diff --git a/internal/storagev2/packed/type.go b/internal/storagev2/packed/type.go new file mode 100644 index 0000000000000..8d55ee77a7012 --- /dev/null +++ b/internal/storagev2/packed/type.go @@ -0,0 +1,46 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package packed + +/* +#include +#include "arrow/c/abi.h" +#include "arrow/c/helpers.h" +#include "segcore/packed_reader_c.h" +#include "segcore/packed_writer_c.h" +*/ +import "C" + +import ( + "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/arrow/cdata" +) + +type PackedWriter struct { + cPackedWriter C.CPackedWriter +} + +type PackedReader struct { + cPackedReader C.CPackedReader + arr *cdata.CArrowArray + schema *arrow.Schema +} + +type ( + // CArrowSchema is the C Data Interface for ArrowSchemas + CArrowSchema = C.struct_ArrowSchema + // CArrowArray is the C Data Interface object for Arrow Arrays as defined in abi.h + CArrowArray = C.struct_ArrowArray +) diff --git a/internal/storagev2/reader/commonreader/delete_reader.go b/internal/storagev2/reader/commonreader/delete_reader.go new file mode 100644 index 0000000000000..f245a951c0a23 --- /dev/null +++ b/internal/storagev2/reader/commonreader/delete_reader.go @@ -0,0 +1,65 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package commonreader + +import ( + "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/arrow/array" + + "github.com/milvus-io/milvus/internal/storagev2/file/fragment" + "github.com/milvus-io/milvus/internal/storagev2/storage/options" + "github.com/milvus-io/milvus/internal/storagev2/storage/schema" +) + +type DeleteReader struct { + recordReader array.RecordReader + schemaOptions *schema.SchemaOptions + deleteFragments fragment.DeleteFragmentVector + options *options.ReadOptions +} + +func (d DeleteReader) Retain() { + // TODO implement me + panic("implement me") +} + +func (d DeleteReader) Release() { + // TODO implement me + panic("implement me") +} + +func (d DeleteReader) Schema() *arrow.Schema { + // TODO implement me + panic("implement me") +} + +func (d DeleteReader) Next() bool { + // TODO implement me + panic("implement me") +} + +func (d DeleteReader) Record() arrow.Record { + // TODO implement me + panic("implement me") +} + +func (d DeleteReader) Err() error { + // TODO implement me + panic("implement me") +} + +func NewDeleteReader(recordReader array.RecordReader, schemaOptions *schema.SchemaOptions, deleteFragments fragment.DeleteFragmentVector, options *options.ReadOptions) *DeleteReader { + return &DeleteReader{recordReader: recordReader, schemaOptions: schemaOptions, deleteFragments: deleteFragments, options: options} +} diff --git a/internal/storagev2/reader/commonreader/filter_reader.go b/internal/storagev2/reader/commonreader/filter_reader.go new file mode 100644 index 0000000000000..3f5bd325a309d --- /dev/null +++ b/internal/storagev2/reader/commonreader/filter_reader.go @@ -0,0 +1,84 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package commonreader + +import ( + "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/arrow/array" + + "github.com/milvus-io/milvus/internal/storagev2/storage/options" +) + +type FilterReader struct { + recordReader array.RecordReader + option *options.ReadOptions + currentFilteredBatchReader array.RecordReader +} + +func (r *FilterReader) Retain() { + // TODO implement me + panic("implement me") +} + +func (r *FilterReader) Release() { + // TODO implement me + panic("implement me") +} + +func (r *FilterReader) Schema() *arrow.Schema { + // TODO implement me + panic("implement me") +} + +func (r *FilterReader) Record() arrow.Record { + // TODO implement me + panic("implement me") +} + +func (r *FilterReader) Err() error { + // TODO implement me + panic("implement me") +} + +func MakeFilterReader(recordReader array.RecordReader, option *options.ReadOptions) *FilterReader { + return &FilterReader{ + recordReader: recordReader, + option: option, + } +} + +func (r *FilterReader) Next() bool { + //for { + // if r.currentFilteredBatchReader != nil { + // filteredBatch := r.currentFilteredBatchReader.Next() + // if err != nil { + // return false + // } + // if filteredBatch == nil { + // r.currentFilteredBatchReader = nil + // continue + // } + // return filteredBatch, nil + // } + // err := r.NextFilteredBatchReader() + // if err != nil { + // return nil + // } + // if r.currentFilteredBatchReader == nil { + // return nil + // } + //} + return false +} diff --git a/internal/storagev2/reader/commonreader/projection_reader.go b/internal/storagev2/reader/commonreader/projection_reader.go new file mode 100644 index 0000000000000..5a9ca6cb55693 --- /dev/null +++ b/internal/storagev2/reader/commonreader/projection_reader.go @@ -0,0 +1,35 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package commonreader + +import ( + "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/arrow/array" + + "github.com/milvus-io/milvus/internal/storagev2/common/utils" + "github.com/milvus-io/milvus/internal/storagev2/storage/options" +) + +type ProjectionReader struct { + array.RecordReader + reader array.RecordReader + options *options.ReadOptions + schema *arrow.Schema +} + +func NewProjectionReader(reader array.RecordReader, options *options.ReadOptions, schema *arrow.Schema) array.RecordReader { + projectionSchema := utils.ProjectSchema(schema, options.Columns) + return &ProjectionReader{reader: reader, options: options, schema: projectionSchema} +} diff --git a/internal/storagev2/reader/recordreader/filter_query_record.go b/internal/storagev2/reader/recordreader/filter_query_record.go new file mode 100644 index 0000000000000..bf1b932e457a9 --- /dev/null +++ b/internal/storagev2/reader/recordreader/filter_query_record.go @@ -0,0 +1,49 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package recordreader + +import ( + "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/arrow/array" + + "github.com/milvus-io/milvus/internal/storagev2/file/fragment" + "github.com/milvus-io/milvus/internal/storagev2/io/fs" + "github.com/milvus-io/milvus/internal/storagev2/storage/options" + "github.com/milvus-io/milvus/internal/storagev2/storage/schema" +) + +type FilterQueryRecordReader struct { + // TODO implement me + ref int64 + schema *schema.Schema + options *options.ReadOptions + fs fs.Fs + scalarFragment fragment.FragmentVector + vectorFragment fragment.FragmentVector + deleteFragments fragment.DeleteFragmentVector + record arrow.Record +} + +func NewFilterQueryReader( + s *schema.Schema, + options *options.ReadOptions, + f fs.Fs, + scalarFragment fragment.FragmentVector, + vectorFragment fragment.FragmentVector, + deleteFragments fragment.DeleteFragmentVector, +) array.RecordReader { + // TODO implement me + panic("implement me") +} diff --git a/internal/storagev2/reader/recordreader/merge_record_reader.go b/internal/storagev2/reader/recordreader/merge_record_reader.go new file mode 100644 index 0000000000000..1348c8c6fecc5 --- /dev/null +++ b/internal/storagev2/reader/recordreader/merge_record_reader.go @@ -0,0 +1,77 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package recordreader + +import ( + "github.com/apache/arrow/go/v12/arrow" + + "github.com/milvus-io/milvus/internal/storagev2/file/fragment" + "github.com/milvus-io/milvus/internal/storagev2/io/fs" + "github.com/milvus-io/milvus/internal/storagev2/storage/options" + "github.com/milvus-io/milvus/internal/storagev2/storage/schema" +) + +type MergeRecordReader struct { + ref int64 + schema *schema.Schema + options *options.ReadOptions + fs fs.Fs + scalarFragments fragment.FragmentVector + vectorFragments fragment.FragmentVector + deleteFragments fragment.DeleteFragmentVector + record arrow.Record +} + +func (m MergeRecordReader) Retain() { + // TODO implement me + panic("implement me") +} + +func (m MergeRecordReader) Release() { + // TODO implement me + panic("implement me") +} + +func (m MergeRecordReader) Schema() *arrow.Schema { + // TODO implement me + panic("implement me") +} + +func (m MergeRecordReader) Next() bool { + // TODO implement me + panic("implement me") +} + +func (m MergeRecordReader) Record() arrow.Record { + // TODO implement me + panic("implement me") +} + +func (m MergeRecordReader) Err() error { + // TODO implement me + panic("implement me") +} + +func NewMergeRecordReader( + s *schema.Schema, + options *options.ReadOptions, + f fs.Fs, + scalarFragment fragment.FragmentVector, + vectorFragment fragment.FragmentVector, + deleteFragments fragment.DeleteFragmentVector, +) *MergeRecordReader { + // TODO implement me + panic("implement me") +} diff --git a/internal/storagev2/reader/recordreader/multi_files_sequential_reader.go b/internal/storagev2/reader/recordreader/multi_files_sequential_reader.go new file mode 100644 index 0000000000000..bbb2e84b07d7b --- /dev/null +++ b/internal/storagev2/reader/recordreader/multi_files_sequential_reader.go @@ -0,0 +1,119 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package recordreader + +import ( + "sync/atomic" + + "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/arrow/array" + "github.com/apache/arrow/go/v12/parquet/pqarrow" + + "github.com/milvus-io/milvus/internal/storagev2/common/arrowutil" + "github.com/milvus-io/milvus/internal/storagev2/file/fragment" + "github.com/milvus-io/milvus/internal/storagev2/io/fs" + "github.com/milvus-io/milvus/internal/storagev2/storage/options" +) + +type MultiFilesSequentialReader struct { + fs fs.Fs + schema *arrow.Schema + files []string + nextPos int + options *options.ReadOptions + currReader array.RecordReader + err error + ref int64 +} + +func (m *MultiFilesSequentialReader) Retain() { + atomic.AddInt64(&m.ref, 1) +} + +func (m *MultiFilesSequentialReader) Release() { + if atomic.AddInt64(&m.ref, -1) == 0 { + if m.currReader != nil { + m.currReader.Release() + m.currReader = nil + } + } +} + +func (m *MultiFilesSequentialReader) Schema() *arrow.Schema { + return m.schema +} + +func (m *MultiFilesSequentialReader) Next() bool { + for { + if m.currReader == nil { + if m.nextPos >= len(m.files) { + return false + } + + m.nextReader() + if m.err != nil { + return false + } + m.nextPos++ + } + if m.currReader.Next() { + return true + } + if m.currReader.Err() != nil { + m.err = m.currReader.Err() + return false + } + if m.currReader != nil { + m.currReader.Release() + m.currReader = nil + } + } +} + +func (m *MultiFilesSequentialReader) Record() arrow.Record { + if m.currReader != nil { + return m.currReader.Record() + } + return nil +} + +func (m *MultiFilesSequentialReader) Err() error { + return m.err +} + +func (m *MultiFilesSequentialReader) nextReader() { + var fileReader *pqarrow.FileReader + fileReader, m.err = arrowutil.MakeArrowFileReader(m.fs, m.files[m.nextPos]) + if m.err != nil { + return + } + m.currReader, m.err = arrowutil.MakeArrowRecordReader(fileReader, m.options) +} + +func NewMultiFilesSequentialReader(fs fs.Fs, fragments fragment.FragmentVector, schema *arrow.Schema, options *options.ReadOptions) *MultiFilesSequentialReader { + files := make([]string, 0, len(fragments)) + for _, f := range fragments { + files = append(files, f.Files()...) + } + + return &MultiFilesSequentialReader{ + fs: fs, + schema: schema, + options: options, + files: files, + nextPos: 0, + ref: 1, + } +} diff --git a/internal/storagev2/reader/recordreader/record_reader.go b/internal/storagev2/reader/recordreader/record_reader.go new file mode 100644 index 0000000000000..ab88b8d240386 --- /dev/null +++ b/internal/storagev2/reader/recordreader/record_reader.go @@ -0,0 +1,93 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package recordreader + +import ( + "github.com/apache/arrow/go/v12/arrow/array" + + "github.com/milvus-io/milvus/internal/storagev2/file/fragment" + "github.com/milvus-io/milvus/internal/storagev2/filter" + "github.com/milvus-io/milvus/internal/storagev2/io/fs" + "github.com/milvus-io/milvus/internal/storagev2/storage/manifest" + "github.com/milvus-io/milvus/internal/storagev2/storage/options" + "github.com/milvus-io/milvus/internal/storagev2/storage/schema" +) + +func MakeRecordReader( + m *manifest.Manifest, + s *schema.Schema, + f fs.Fs, + deleteFragments fragment.DeleteFragmentVector, + options *options.ReadOptions, +) array.RecordReader { + relatedColumns := make([]string, 0) + relatedColumns = append(relatedColumns, options.Columns...) + + for _, filter := range options.Filters { + relatedColumns = append(relatedColumns, filter.GetColumnName()) + } + + scalarData := m.GetScalarFragments() + vectorData := m.GetVectorFragments() + + onlyScalar := onlyContainScalarColumns(s, relatedColumns) + onlyVector := onlyContainVectorColumns(s, relatedColumns) + + if onlyScalar || onlyVector { + var dataFragments fragment.FragmentVector + if onlyScalar { + dataFragments = scalarData + } else { + dataFragments = vectorData + } + return NewScanRecordReader(s, options, f, dataFragments, deleteFragments) + } + if len(options.Filters) > 0 && filtersOnlyContainPKAndVersion(s, options.FiltersV2) { + return NewMergeRecordReader(s, options, f, scalarData, vectorData, deleteFragments) + } + return NewFilterQueryReader(s, options, f, scalarData, vectorData, deleteFragments) +} + +func onlyContainVectorColumns(schema *schema.Schema, relatedColumns []string) bool { + for _, column := range relatedColumns { + if schema.Options().VectorColumn != column && schema.Options().PrimaryColumn != column && schema.Options().VersionColumn != column { + return false + } + } + return true +} + +func onlyContainScalarColumns(schema *schema.Schema, relatedColumns []string) bool { + for _, column := range relatedColumns { + if schema.Options().VectorColumn == column { + return false + } + } + return true +} + +func filtersOnlyContainPKAndVersion(s *schema.Schema, filters []filter.Filter) bool { + for _, f := range filters { + if f.GetColumnName() != s.Options().PrimaryColumn && + f.GetColumnName() != s.Options().VersionColumn { + return false + } + } + return true +} + +func MakeScanDeleteReader(manifest *manifest.Manifest, fs fs.Fs) array.RecordReader { + return NewMultiFilesSequentialReader(fs, manifest.GetDeleteFragments(), manifest.GetSchema().DeleteSchema(), options.NewReadOptions()) +} diff --git a/internal/storagev2/reader/recordreader/scan_record.go b/internal/storagev2/reader/recordreader/scan_record.go new file mode 100644 index 0000000000000..b6846184fcc51 --- /dev/null +++ b/internal/storagev2/reader/recordreader/scan_record.go @@ -0,0 +1,151 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package recordreader + +import ( + "io" + "sync/atomic" + + "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/arrow/array" + "go.uber.org/zap" + + "github.com/milvus-io/milvus/internal/storagev2/common/log" + "github.com/milvus-io/milvus/internal/storagev2/common/utils" + "github.com/milvus-io/milvus/internal/storagev2/file/fragment" + "github.com/milvus-io/milvus/internal/storagev2/io/format" + "github.com/milvus-io/milvus/internal/storagev2/io/format/parquet" + "github.com/milvus-io/milvus/internal/storagev2/io/fs" + "github.com/milvus-io/milvus/internal/storagev2/reader/commonreader" + "github.com/milvus-io/milvus/internal/storagev2/storage/options" + "github.com/milvus-io/milvus/internal/storagev2/storage/schema" +) + +type ScanRecordReader struct { + ref int64 + schema *schema.Schema + options *options.ReadOptions + fs fs.Fs + dataFragments fragment.FragmentVector + deleteFragments fragment.DeleteFragmentVector + rec arrow.Record + curReader format.Reader + reader array.RecordReader + nextPos int + err error +} + +func NewScanRecordReader( + s *schema.Schema, + options *options.ReadOptions, + f fs.Fs, + dataFragments fragment.FragmentVector, + deleteFragments fragment.DeleteFragmentVector, +) *ScanRecordReader { + return &ScanRecordReader{ + ref: 1, + schema: s, + options: options, + fs: f, + dataFragments: dataFragments, + deleteFragments: deleteFragments, + } +} + +func (r *ScanRecordReader) Schema() *arrow.Schema { + return utils.ProjectSchema(r.schema.Schema(), r.options.OutputColumns()) +} + +func (r *ScanRecordReader) Retain() { + atomic.AddInt64(&r.ref, 1) +} + +func (r *ScanRecordReader) Release() { + if atomic.AddInt64(&r.ref, -1) == 0 { + if r.rec != nil { + r.rec.Release() + r.rec = nil + } + if r.curReader != nil { + r.curReader.Close() + r.curReader = nil + } + } +} + +func (r *ScanRecordReader) Next() bool { + datafiles := fragment.ToFilesVector(r.dataFragments) + log.Debug("ScanRecordReader Next", zap.Any("datafiles", datafiles)) + if r.rec != nil { + r.rec.Release() + r.rec = nil + } + for { + if r.curReader == nil { + if r.nextPos >= len(datafiles) { + return false + } + // FIXME: nil options + reader, err := parquet.NewFileReader(r.fs, datafiles[r.nextPos], r.options) + if err != nil { + r.err = err + return false + } + r.nextPos++ + r.curReader = reader + } + + rec, err := r.curReader.Read() + if err != nil { + if err == io.EOF { + r.curReader.Close() + r.curReader = nil + continue + } + // if error occurs in the middle of reading, return false + r.curReader.Close() + r.curReader = nil + r.err = err + return false + } + + if rec.NumRows() == 0 { + continue + } + + r.rec = rec + return true + } +} + +func (r *ScanRecordReader) Record() arrow.Record { + return r.rec +} + +func (r *ScanRecordReader) Err() error { + return r.err +} + +func (r *ScanRecordReader) MakeInnerReader() array.RecordReader { + // TODO implement me + reader := NewMultiFilesSequentialReader(r.fs, r.dataFragments, r.Schema(), r.options) + + filterReader := commonreader.MakeFilterReader(reader, r.options) + + deleteReader := commonreader.NewDeleteReader(filterReader, r.schema.Options(), r.deleteFragments, r.options) + + res := commonreader.NewProjectionReader(deleteReader, r.options, r.schema.Schema()) + return res +} diff --git a/internal/storagev2/storage/lock/lock_manager.go b/internal/storagev2/storage/lock/lock_manager.go new file mode 100644 index 0000000000000..58e9a45974eb1 --- /dev/null +++ b/internal/storagev2/storage/lock/lock_manager.go @@ -0,0 +1,98 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package lock + +import ( + "sync" + + "github.com/cockroachdb/errors" + "go.uber.org/zap" + + "github.com/milvus-io/milvus/internal/storagev2/common/constant" + "github.com/milvus-io/milvus/internal/storagev2/common/log" +) + +type LockManager interface { + // Acquire the lock, wait until the lock is available, return the version to be modified or use the newest version + Acquire() (version int64, useLatestVersion bool, err error) + // Release the lock, accepts the new allocated manifest version and success state of operations between Acquire and Release as parameters + Release(version int64, success bool) error +} + +type EmptyLockManager struct{} + +func (h *EmptyLockManager) Acquire() (version int64, useLatestVersion bool, err error) { + return constant.LatestManifestVersion, true, nil +} + +func (h *EmptyLockManager) Release(_ int64, _ bool) error { + return nil +} + +type MemoryLockManager struct { + mu sync.Mutex + locks map[int64]bool + nextVersion int64 +} + +func NewMemoryLockManager() *MemoryLockManager { + return &MemoryLockManager{ + mu: sync.Mutex{}, + locks: make(map[int64]bool), + nextVersion: 0, + } +} + +func (m *MemoryLockManager) Acquire() (version int64, useLatestVersion bool, err error) { + m.mu.Lock() + defer m.mu.Unlock() + + version = m.nextVersion + + if m.locks[version] { + log.Warn("lock is already acquired", zap.Int64("version", version)) + return version, false, errors.New("lock is already acquired") + } + + if version == constant.LatestManifestVersion { + useLatestVersion = true + } else { + useLatestVersion = false + } + m.locks[version] = true + log.Info("acquire lock", zap.Int64("version", version), zap.Bool("useLatestVersion", useLatestVersion)) + + return version, useLatestVersion, nil +} + +func (m *MemoryLockManager) Release(version int64, success bool) error { + m.mu.Lock() + defer m.mu.Unlock() + + realVersion := int64(0) + realVersion = version - 1 + if !m.locks[realVersion] { + return errors.New("lock is already released or does not exist") + } + m.locks[realVersion] = false + log.Info("release lock", zap.Int64("version", realVersion), zap.Bool("success", success)) + if success { + m.nextVersion = version + } else { + m.nextVersion = constant.LatestManifestVersion + } + + return nil +} diff --git a/internal/storagev2/storage/manifest/commit.go b/internal/storagev2/storage/manifest/commit.go new file mode 100644 index 0000000000000..33267de859493 --- /dev/null +++ b/internal/storagev2/storage/manifest/commit.go @@ -0,0 +1,80 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package manifest + +import ( + "github.com/milvus-io/milvus/internal/storagev2/common/constant" + "github.com/milvus-io/milvus/internal/storagev2/storage/lock" +) + +type ManifestCommit struct { + ops []ManifestCommitOp + lock lock.LockManager + rw ManifestReaderWriter +} + +func (m *ManifestCommit) AddOp(op ...ManifestCommitOp) { + m.ops = append(m.ops, op...) +} + +func (m ManifestCommit) Commit() (manifest *Manifest, err error) { + ver, latest, err := m.lock.Acquire() + if err != nil { + return nil, err + } + var version int64 + defer func() { + if err != nil { + if err2 := m.lock.Release(-1, false); err2 != nil { + err = err2 + } + } else { + err = m.lock.Release(version, true) + } + }() + var base *Manifest + if latest { + base, err = m.rw.Read(constant.LatestManifestVersion) + if err != nil { + return nil, err + } + base.version++ + } else { + base, err = m.rw.Read(ver) + if err != nil { + return nil, err + } + maxVersion, err := m.rw.MaxVersion() + if err != nil { + return nil, err + } + base.version = maxVersion + 1 + } + + for _, op := range m.ops { + op.commit(base) + } + version = base.version + + err = m.rw.Write(base) + if err != nil { + return nil, err + } + return base, nil +} + +func NewManifestCommit(lock lock.LockManager, rw ManifestReaderWriter) ManifestCommit { + return ManifestCommit{nil, lock, rw} +} diff --git a/internal/storagev2/storage/manifest/commit_op.go b/internal/storagev2/storage/manifest/commit_op.go new file mode 100644 index 0000000000000..e5117460199f8 --- /dev/null +++ b/internal/storagev2/storage/manifest/commit_op.go @@ -0,0 +1,68 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package manifest + +import ( + "github.com/milvus-io/milvus/internal/storagev2/common/errors" + "github.com/milvus-io/milvus/internal/storagev2/file/blob" + "github.com/milvus-io/milvus/internal/storagev2/file/fragment" +) + +type ManifestCommitOp interface { + commit(manifest *Manifest) error +} + +type AddScalarFragmentOp struct { + ScalarFragment fragment.Fragment +} + +func (op AddScalarFragmentOp) commit(manifest *Manifest) error { + op.ScalarFragment.SetFragmentId(manifest.Version()) + manifest.AddScalarFragment(op.ScalarFragment) + return nil +} + +type AddVectorFragmentOp struct { + VectorFragment fragment.Fragment +} + +func (op AddVectorFragmentOp) commit(manifest *Manifest) error { + op.VectorFragment.SetFragmentId(manifest.Version()) + manifest.AddVectorFragment(op.VectorFragment) + return nil +} + +type AddDeleteFragmentOp struct { + DeleteFragment fragment.Fragment +} + +func (op AddDeleteFragmentOp) commit(manifest *Manifest) error { + op.DeleteFragment.SetFragmentId(manifest.Version()) + manifest.AddDeleteFragment(op.DeleteFragment) + return nil +} + +type AddBlobOp struct { + Replace bool + Blob blob.Blob +} + +func (op AddBlobOp) commit(manifest *Manifest) error { + if !op.Replace && manifest.HasBlob(op.Blob.Name) { + return errors.ErrBlobAlreadyExist + } + manifest.AddBlob(op.Blob) + return nil +} diff --git a/internal/storagev2/storage/manifest/manifest.go b/internal/storagev2/storage/manifest/manifest.go new file mode 100644 index 0000000000000..6a0abc04a5e8f --- /dev/null +++ b/internal/storagev2/storage/manifest/manifest.go @@ -0,0 +1,243 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package manifest + +import ( + "fmt" + + "github.com/apache/arrow/go/v12/arrow" + "google.golang.org/protobuf/proto" + + "github.com/milvus-io/milvus/internal/storagev2/common/log" + "github.com/milvus-io/milvus/internal/storagev2/file/blob" + "github.com/milvus-io/milvus/internal/storagev2/file/fragment" + "github.com/milvus-io/milvus/internal/storagev2/io/fs" + "github.com/milvus-io/milvus/internal/storagev2/io/fs/file" + "github.com/milvus-io/milvus/internal/storagev2/storage/schema" + "github.com/milvus-io/milvus/pkg/proto/storagev2pb" +) + +type Manifest struct { + schema *schema.Schema + ScalarFragments fragment.FragmentVector + vectorFragments fragment.FragmentVector + deleteFragments fragment.FragmentVector + blobs []blob.Blob + version int64 +} + +func NewManifest(schema *schema.Schema) *Manifest { + return &Manifest{ + schema: schema, + } +} + +func Init() *Manifest { + return &Manifest{ + schema: schema.NewSchema(arrow.NewSchema(nil, nil), schema.DefaultSchemaOptions()), + } +} + +func (m *Manifest) Copy() *Manifest { + copied := *m + return &copied +} + +func (m *Manifest) GetSchema() *schema.Schema { + return m.schema +} + +func (m *Manifest) AddScalarFragment(fragment fragment.Fragment) { + m.ScalarFragments = append(m.ScalarFragments, fragment) +} + +func (m *Manifest) AddVectorFragment(fragment fragment.Fragment) { + m.vectorFragments = append(m.vectorFragments, fragment) +} + +func (m *Manifest) AddDeleteFragment(fragment fragment.Fragment) { + m.deleteFragments = append(m.deleteFragments, fragment) +} + +func (m *Manifest) GetScalarFragments() fragment.FragmentVector { + return m.ScalarFragments +} + +func (m *Manifest) GetVectorFragments() fragment.FragmentVector { + return m.vectorFragments +} + +func (m *Manifest) GetDeleteFragments() fragment.FragmentVector { + return m.deleteFragments +} + +func (m *Manifest) Version() int64 { + return m.version +} + +func (m *Manifest) SetVersion(version int64) { + m.version = version +} + +func (m *Manifest) GetBlobs() []blob.Blob { + return m.blobs +} + +func (m *Manifest) ToProtobuf() (*storagev2pb.Manifest, error) { + manifest := &storagev2pb.Manifest{} + manifest.Version = m.version + for _, vectorFragment := range m.vectorFragments { + manifest.VectorFragments = append(manifest.VectorFragments, vectorFragment.ToProtobuf()) + } + for _, scalarFragment := range m.ScalarFragments { + manifest.ScalarFragments = append(manifest.ScalarFragments, scalarFragment.ToProtobuf()) + } + for _, deleteFragment := range m.deleteFragments { + manifest.DeleteFragments = append(manifest.DeleteFragments, deleteFragment.ToProtobuf()) + } + + for _, blob := range m.blobs { + manifest.Blobs = append(manifest.Blobs, blob.ToProtobuf()) + } + + schemaProto, err := m.schema.ToProtobuf() + if err != nil { + return nil, err + } + manifest.Schema = schemaProto + + return manifest, nil +} + +func (m *Manifest) FromProtobuf(manifest *storagev2pb.Manifest) error { + err := m.schema.FromProtobuf(manifest.Schema) + if err != nil { + return err + } + + for _, vectorFragment := range manifest.VectorFragments { + m.vectorFragments = append(m.vectorFragments, fragment.FromProtobuf(vectorFragment)) + } + + for _, scalarFragment := range manifest.ScalarFragments { + m.ScalarFragments = append(m.ScalarFragments, fragment.FromProtobuf(scalarFragment)) + } + + for _, deleteFragment := range manifest.DeleteFragments { + m.deleteFragments = append(m.deleteFragments, fragment.FromProtobuf(deleteFragment)) + } + + for _, b := range manifest.Blobs { + m.blobs = append(m.blobs, blob.FromProtobuf(b)) + } + + m.version = manifest.Version + return nil +} + +func WriteManifestFile(manifest *Manifest, output file.File) error { + protoManifest, err := manifest.ToProtobuf() + if err != nil { + return err + } + + bytes, err := proto.Marshal(protoManifest) + if err != nil { + return fmt.Errorf("write manifest file: %w", err) + } + write, err := output.Write(bytes) + if err != nil { + return fmt.Errorf("write manifest file: %w", err) + } + if write != len(bytes) { + return fmt.Errorf("failed to write whole file, expect: %v, actual: %v", len(bytes), write) + } + if err = output.Close(); err != nil { + return err + } + return nil +} + +func (m *Manifest) HasBlob(name string) bool { + for _, b := range m.blobs { + if b.Name == name { + return true + } + } + + return false +} + +func (m *Manifest) AddBlob(blob blob.Blob) { + m.blobs = append(m.blobs, blob) +} + +func (m *Manifest) RemoveBlobIfExist(name string) { + idx := -1 + for i, b := range m.blobs { + if b.Name == name { + idx = i + break + } + } + + m.blobs = append(m.blobs[0:idx], m.blobs[idx+1:]...) +} + +func (m *Manifest) GetBlob(name string) (blob.Blob, bool) { + for _, b := range m.blobs { + if b.Name == name { + return b, true + } + } + + return blob.Blob{}, false +} + +func ParseFromFile(f fs.Fs, path string) (*Manifest, error) { + manifest := Init() + manifestProto := &storagev2pb.Manifest{} + + buf, err := f.ReadFile(path) + if err != nil { + return nil, err + } + err = proto.Unmarshal(buf, manifestProto) + if err != nil { + log.Error("Failed to unmarshal manifest proto", log.String("err", err.Error())) + return nil, fmt.Errorf("parse from file: %w", err) + } + err = manifest.FromProtobuf(manifestProto) + if err != nil { + return nil, err + } + + return manifest, nil +} + +// TODO REMOVE BELOW CODE + +type DataFile struct { + path string + cols []string +} + +func (d *DataFile) Path() string { + return d.path +} + +func NewDataFile(path string) *DataFile { + return &DataFile{path: path} +} diff --git a/internal/storagev2/storage/manifest/reader_writer.go b/internal/storagev2/storage/manifest/reader_writer.go new file mode 100644 index 0000000000000..c7defe701a581 --- /dev/null +++ b/internal/storagev2/storage/manifest/reader_writer.go @@ -0,0 +1,119 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package manifest + +import ( + "fmt" + "path/filepath" + + "github.com/cockroachdb/errors" + + "github.com/milvus-io/milvus/internal/storagev2/common/constant" + "github.com/milvus-io/milvus/internal/storagev2/common/log" + "github.com/milvus-io/milvus/internal/storagev2/common/utils" + "github.com/milvus-io/milvus/internal/storagev2/io/fs" +) + +var ErrManifestNotFound = errors.New("manifest not found") + +type ManifestReaderWriter struct { + fs fs.Fs + root string +} + +func findAllManifest(fs fs.Fs, path string) ([]fs.FileEntry, error) { + files, err := fs.List(path) + log.Debug("list all manifest:", log.Any("files", files)) + if err != nil { + return nil, err + } + return files, nil +} + +func (rw ManifestReaderWriter) Read(version int64) (*Manifest, error) { + manifests, err := findAllManifest(rw.fs, utils.GetManifestDir(rw.root)) + if err != nil { + return nil, err + } + + var maxVersionManifest string + var maxVersion int64 = -1 + for _, m := range manifests { + ver := utils.ParseVersionFromFileName(filepath.Base(m.Path)) + if ver == -1 { + continue + } + + if version != constant.LatestManifestVersion { + if ver == version { + return ParseFromFile(rw.fs, m.Path) + } + } else if ver > maxVersion { + maxVersion = ver + maxVersionManifest = m.Path + } + } + + if maxVersion != -1 { + return ParseFromFile(rw.fs, maxVersionManifest) + } + return nil, ErrManifestNotFound +} + +func (rw ManifestReaderWriter) MaxVersion() (int64, error) { + manifests, err := findAllManifest(rw.fs, utils.GetManifestDir(rw.root)) + if err != nil { + return -1, err + } + var max int64 = -1 + for _, m := range manifests { + ver := utils.ParseVersionFromFileName(filepath.Base(m.Path)) + if ver == -1 { + continue + } + + if ver > max { + max = ver + } + } + + if max == -1 { + return -1, ErrManifestNotFound + } + return max, nil +} + +func (rw ManifestReaderWriter) Write(m *Manifest) error { + tmpManifestFilePath := utils.GetManifestTmpFilePath(rw.root, m.Version()) + manifestFilePath := utils.GetManifestFilePath(rw.root, m.Version()) + log.Debug("path", log.String("tmpManifestFilePath", tmpManifestFilePath), log.String("manifestFilePath", manifestFilePath)) + output, err := rw.fs.OpenFile(tmpManifestFilePath) + if err != nil { + return fmt.Errorf("open file error: %w", err) + } + if err = WriteManifestFile(m, output); err != nil { + return err + } + err = rw.fs.Rename(tmpManifestFilePath, manifestFilePath) + if err != nil { + return fmt.Errorf("rename file error: %w", err) + } + log.Debug("save manifest file success", log.String("path", manifestFilePath)) + return nil +} + +func NewManifestReaderWriter(fs fs.Fs, root string) ManifestReaderWriter { + return ManifestReaderWriter{fs, root} +} diff --git a/internal/storagev2/storage/options/options.go b/internal/storagev2/storage/options/options.go new file mode 100644 index 0000000000000..f7fa2de9f5b5e --- /dev/null +++ b/internal/storagev2/storage/options/options.go @@ -0,0 +1,144 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package options + +import ( + "math" + + "github.com/milvus-io/milvus/internal/storagev2/common/constant" + "github.com/milvus-io/milvus/internal/storagev2/filter" + "github.com/milvus-io/milvus/internal/storagev2/storage/lock" + "github.com/milvus-io/milvus/internal/storagev2/storage/schema" +) + +type Options struct { + Schema *schema.Schema // optional + Version int64 // optional + LockManager lock.LockManager // optional, no lock manager as default +} + +type SpaceOptionsBuilder struct { + options Options +} + +func (b *SpaceOptionsBuilder) SetSchema(schema *schema.Schema) *SpaceOptionsBuilder { + b.options.Schema = schema + return b +} + +func (b *SpaceOptionsBuilder) SetVersion(version int64) *SpaceOptionsBuilder { + b.options.Version = version + return b +} + +func (b *SpaceOptionsBuilder) SetLockManager(lockManager lock.LockManager) *SpaceOptionsBuilder { + b.options.LockManager = lockManager + return b +} + +func (b *SpaceOptionsBuilder) Reset() { + b.options = Options{LockManager: &lock.EmptyLockManager{}} +} + +func (b *SpaceOptionsBuilder) Build() Options { return b.options } + +func NewSpaceOptionBuilder() *SpaceOptionsBuilder { + return &SpaceOptionsBuilder{ + options: Options{ + Version: constant.LatestManifestVersion, + LockManager: &lock.EmptyLockManager{}, + }, + } +} + +func DefaultOptions() *Options { + return &Options{} +} + +type WriteOptions struct { + MaxRecordPerFile int64 +} + +var DefaultWriteOptions = WriteOptions{ + MaxRecordPerFile: 1024, +} + +func NewWriteOption() *WriteOptions { + return &WriteOptions{ + MaxRecordPerFile: 1024, + } +} + +type FsType int8 + +const ( + InMemory FsType = iota + LocalFS + S3 +) + +type SpaceOptions struct { + Fs FsType + VectorColumns []string +} + +// TODO: Change to FilterSet type +type FilterSet []filter.Filter + +var version int64 = math.MaxInt64 + +type ReadOptions struct { + // Filters map[string]filter.Filter + Filters map[string]filter.Filter + FiltersV2 FilterSet + Columns []string + ManifestVersion int64 + version int64 +} + +func NewReadOptions() *ReadOptions { + return &ReadOptions{ + Filters: make(map[string]filter.Filter), + FiltersV2: make(FilterSet, 0), + Columns: make([]string, 0), + ManifestVersion: constant.LatestManifestVersion, + version: math.MaxInt64, + } +} + +func (o *ReadOptions) AddFilter(filter filter.Filter) { + o.Filters[filter.GetColumnName()] = filter + o.FiltersV2 = append(o.FiltersV2, filter) +} + +func (o *ReadOptions) AddColumn(column string) { + o.Columns = append(o.Columns, column) +} + +func (o *ReadOptions) SetColumns(columns []string) { + o.Columns = columns +} + +func (o *ReadOptions) SetVersion(version int64) { + o.version = version +} + +func (o *ReadOptions) GetVersion() int64 { + return o.version +} + +func (o *ReadOptions) OutputColumns() []string { + return o.Columns +} diff --git a/internal/storagev2/storage/schema/schema.go b/internal/storagev2/storage/schema/schema.go new file mode 100644 index 0000000000000..afb9e4ee944c7 --- /dev/null +++ b/internal/storagev2/storage/schema/schema.go @@ -0,0 +1,150 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package schema + +import ( + "github.com/apache/arrow/go/v12/arrow" + + "github.com/milvus-io/milvus/internal/storagev2/common/constant" + "github.com/milvus-io/milvus/internal/storagev2/common/utils" + "github.com/milvus-io/milvus/pkg/proto/storagev2pb" +) + +// Schema is a wrapper of arrow schema +type Schema struct { + schema *arrow.Schema + scalarSchema *arrow.Schema + vectorSchema *arrow.Schema + deleteSchema *arrow.Schema + + options *SchemaOptions +} + +func (s *Schema) Schema() *arrow.Schema { + return s.schema +} + +func (s *Schema) Options() *SchemaOptions { + return s.options +} + +func NewSchema(schema *arrow.Schema, options *SchemaOptions) *Schema { + return &Schema{ + schema: schema, + options: options, + } +} + +func (s *Schema) Validate() error { + err := s.options.Validate(s.schema) + if err != nil { + return err + } + err = s.BuildScalarSchema() + if err != nil { + return err + } + err = s.BuildVectorSchema() + if err != nil { + return err + } + err = s.BuildDeleteSchema() + if err != nil { + return err + } + return nil +} + +func (s *Schema) ScalarSchema() *arrow.Schema { + return s.scalarSchema +} + +func (s *Schema) VectorSchema() *arrow.Schema { + return s.vectorSchema +} + +func (s *Schema) DeleteSchema() *arrow.Schema { + return s.deleteSchema +} + +func (s *Schema) FromProtobuf(schema *storagev2pb.Schema) error { + schemaType, err := utils.FromProtobufSchema(schema.ArrowSchema) + if err != nil { + return err + } + + s.schema = schemaType + s.options.FromProtobuf(schema.GetSchemaOptions()) + s.BuildScalarSchema() + s.BuildVectorSchema() + s.BuildDeleteSchema() + return nil +} + +func (s *Schema) ToProtobuf() (*storagev2pb.Schema, error) { + schema := &storagev2pb.Schema{} + arrowSchema, err := utils.ToProtobufSchema(s.schema) + if err != nil { + return nil, err + } + schema.ArrowSchema = arrowSchema + schema.SchemaOptions = s.options.ToProtobuf() + return schema, nil +} + +func (s *Schema) BuildScalarSchema() error { + fields := make([]arrow.Field, 0, len(s.schema.Fields())) + for _, field := range s.schema.Fields() { + if field.Name == s.options.VectorColumn { + continue + } + fields = append(fields, field) + } + offsetFiled := arrow.Field{Name: constant.OffsetFieldName, Type: arrow.DataType(&arrow.Int64Type{})} + fields = append(fields, offsetFiled) + s.scalarSchema = arrow.NewSchema(fields, nil) + + return nil +} + +func (s *Schema) BuildVectorSchema() error { + fields := make([]arrow.Field, 0, len(s.schema.Fields())) + for _, field := range s.schema.Fields() { + if field.Name == s.options.VectorColumn || + field.Name == s.options.PrimaryColumn || + field.Name == s.options.VersionColumn { + fields = append(fields, field) + } + } + s.vectorSchema = arrow.NewSchema(fields, nil) + + return nil +} + +func (s *Schema) BuildDeleteSchema() error { + pkColumn, ok := s.schema.FieldsByName(s.options.PrimaryColumn) + if !ok { + return ErrPrimaryColumnNotFound + } + versionField, ok := s.schema.FieldsByName(s.options.VersionColumn) + if !ok { + return ErrVersionColumnNotFound + } + fields := make([]arrow.Field, 0, 2) + fields = append(fields, pkColumn[0]) + fields = append(fields, versionField[0]) + s.deleteSchema = arrow.NewSchema(fields, nil) + return nil +} diff --git a/internal/storagev2/storage/schema/schema_option.go b/internal/storagev2/storage/schema/schema_option.go new file mode 100644 index 0000000000000..6294048a6fa7a --- /dev/null +++ b/internal/storagev2/storage/schema/schema_option.go @@ -0,0 +1,97 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package schema + +import ( + "github.com/apache/arrow/go/v12/arrow" + "github.com/cockroachdb/errors" + + "github.com/milvus-io/milvus/pkg/proto/storagev2pb" +) + +var ( + ErrPrimaryColumnNotFound = errors.New("primary column not found") + ErrPrimaryColumnType = errors.New("primary column is not int64 or string") + ErrPrimaryColumnEmpty = errors.New("primary column is empty") + ErrVersionColumnNotFound = errors.New("version column not found") + ErrVersionColumnType = errors.New("version column is not int64") + ErrVectorColumnNotFound = errors.New("vector column not found") + ErrVectorColumnType = errors.New("vector column is not fixed size binary or fixed size list") + ErrVectorColumnEmpty = errors.New("vector column is empty") +) + +type SchemaOptions struct { + PrimaryColumn string + VersionColumn string + VectorColumn string +} + +func DefaultSchemaOptions() *SchemaOptions { + return &SchemaOptions{ + PrimaryColumn: "", + VersionColumn: "", + VectorColumn: "", + } +} + +func (o *SchemaOptions) ToProtobuf() *storagev2pb.SchemaOptions { + options := &storagev2pb.SchemaOptions{} + options.PrimaryColumn = o.PrimaryColumn + options.VersionColumn = o.VersionColumn + options.VectorColumn = o.VectorColumn + return options +} + +func (o *SchemaOptions) FromProtobuf(options *storagev2pb.SchemaOptions) { + o.PrimaryColumn = options.PrimaryColumn + o.VersionColumn = options.VersionColumn + o.VectorColumn = options.VectorColumn +} + +func (o *SchemaOptions) Validate(schema *arrow.Schema) error { + if o.PrimaryColumn != "" { + primaryField, ok := schema.FieldsByName(o.PrimaryColumn) + if !ok { + return ErrPrimaryColumnNotFound + } else if primaryField[0].Type.ID() != arrow.STRING && primaryField[0].Type.ID() != arrow.INT64 { + return ErrPrimaryColumnType + } + } else { + return ErrPrimaryColumnEmpty + } + if o.VersionColumn != "" { + versionField, ok := schema.FieldsByName(o.VersionColumn) + if !ok { + return ErrVersionColumnNotFound + } else if versionField[0].Type.ID() != arrow.INT64 { + return ErrVersionColumnType + } + } + if o.VectorColumn != "" { + vectorField, b := schema.FieldsByName(o.VectorColumn) + if !b { + return ErrVectorColumnNotFound + } else if vectorField[0].Type.ID() != arrow.FIXED_SIZE_BINARY && vectorField[0].Type.ID() != arrow.FIXED_SIZE_LIST { + return ErrVectorColumnType + } + } else { + return ErrVectorColumnEmpty + } + return nil +} + +func (o *SchemaOptions) HasVersionColumn() bool { + return o.VersionColumn != "" +} diff --git a/internal/storagev2/storage/schema/schema_test.go b/internal/storagev2/storage/schema/schema_test.go new file mode 100644 index 0000000000000..0967782add993 --- /dev/null +++ b/internal/storagev2/storage/schema/schema_test.go @@ -0,0 +1,53 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package schema + +import ( + "testing" + + "github.com/apache/arrow/go/v12/arrow" + "github.com/stretchr/testify/assert" +) + +// Test Schema.Schema +func TestBuildSchema(t *testing.T) { + pkField := arrow.Field{ + Name: "pk_field", + Type: arrow.DataType(&arrow.Int64Type{}), + Nullable: false, + } + vsField := arrow.Field{ + Name: "vs_field", + Type: arrow.DataType(&arrow.Int64Type{}), + Nullable: false, + } + vecField := arrow.Field{ + Name: "vec_field", + Type: arrow.DataType(&arrow.FixedSizeBinaryType{ByteWidth: 16}), + Nullable: false, + } + fields := []arrow.Field{pkField, vsField, vecField} + + as := arrow.NewSchema(fields, nil) + schemaOptions := &SchemaOptions{ + PrimaryColumn: "pk_field", + VersionColumn: "vs_field", + VectorColumn: "vec_field", + } + + sc := NewSchema(as, schemaOptions) + err := sc.Validate() + assert.NoError(t, err) +} diff --git a/internal/storagev2/storage/space.go b/internal/storagev2/storage/space.go new file mode 100644 index 0000000000000..2c8234fa55f11 --- /dev/null +++ b/internal/storagev2/storage/space.go @@ -0,0 +1,220 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "math" + + "github.com/apache/arrow/go/v12/arrow/array" + + "github.com/milvus-io/milvus/internal/storagev2/common/errors" + "github.com/milvus-io/milvus/internal/storagev2/common/log" + "github.com/milvus-io/milvus/internal/storagev2/common/utils" + "github.com/milvus-io/milvus/internal/storagev2/file/blob" + "github.com/milvus-io/milvus/internal/storagev2/file/fragment" + "github.com/milvus-io/milvus/internal/storagev2/filter" + "github.com/milvus-io/milvus/internal/storagev2/io/fs" + "github.com/milvus-io/milvus/internal/storagev2/reader/recordreader" + "github.com/milvus-io/milvus/internal/storagev2/storage/lock" + "github.com/milvus-io/milvus/internal/storagev2/storage/manifest" + "github.com/milvus-io/milvus/internal/storagev2/storage/options" + "github.com/milvus-io/milvus/internal/storagev2/storage/transaction" +) + +type Space struct { + path string + fs fs.Fs + deleteFragments fragment.DeleteFragmentVector + manifest *manifest.Manifest + lockManager lock.LockManager +} + +func (s *Space) init() error { + for _, f := range s.manifest.GetDeleteFragments() { + deleteFragment := fragment.Make(s.fs, s.manifest.GetSchema(), f) + s.deleteFragments = append(s.deleteFragments, deleteFragment) + } + return nil +} + +func NewSpace(f fs.Fs, path string, m *manifest.Manifest, lockManager lock.LockManager) *Space { + deleteFragments := fragment.DeleteFragmentVector{} + return &Space{ + fs: f, + path: path, + manifest: m, + deleteFragments: deleteFragments, + lockManager: lockManager, + } +} + +func (s *Space) NewTransaction() transaction.Transaction { + return transaction.NewConcurrentWriteTransaction(s) +} + +func (s *Space) Write(reader array.RecordReader, options *options.WriteOptions) error { + return transaction.NewConcurrentWriteTransaction(s).Write(reader, options).Commit() +} + +func (s *Space) Delete(reader array.RecordReader) error { + return transaction.NewConcurrentWriteTransaction(s).Delete(reader).Commit() +} + +// Open opened a space or create if the space does not exist. +// If space does not exist. schema should not be nullptr, or an error will be returned. +// If space exists and version is specified, it will restore to the state at this version, +// or it will choose the latest version. +func Open(uri string, opt options.Options) (*Space, error) { + var f fs.Fs + var m *manifest.Manifest + var path string + f, err := fs.BuildFileSystem(uri) + if err != nil { + return nil, err + } + + path = f.Path() + log.Debug("open space", log.String("path", path)) + + log.Debug(utils.GetManifestDir(path)) + // create if not exist + if err = f.CreateDir(utils.GetManifestDir(path)); err != nil { + return nil, err + } + if err = f.CreateDir(utils.GetScalarDataDir(path)); err != nil { + return nil, err + } + if err = f.CreateDir(utils.GetVectorDataDir(path)); err != nil { + return nil, err + } + if err = f.CreateDir(utils.GetBlobDir(path)); err != nil { + return nil, err + } + if err = f.CreateDir(utils.GetDeleteDataDir(path)); err != nil { + return nil, err + } + + rw := manifest.NewManifestReaderWriter(f, path) + m, err = rw.Read(opt.Version) + if err != nil { + // create the first manifest file + if err == manifest.ErrManifestNotFound { + if opt.Schema == nil { + log.Error("schema is nil") + return nil, errors.ErrSchemaIsNil + } + if err = opt.Schema.Validate(); err != nil { + return nil, err + } + m = manifest.NewManifest(opt.Schema) + m.SetVersion(0) // TODO: check if this is necessary + if err = rw.Write(m); err != nil { + return nil, err + } + } else { + return nil, err + } + } + space := NewSpace(f, path, m, opt.LockManager) + return space, nil +} + +func (s *Space) readManifest(version int64) error { + rw := manifest.NewManifestReaderWriter(s.fs, s.path) + manifest, err := rw.Read(version) + if err != nil { + return err + } + s.manifest = manifest + return nil +} + +func (s *Space) Read(readOptions *options.ReadOptions) (array.RecordReader, error) { + if s.manifest == nil || readOptions.ManifestVersion != s.manifest.Version() { + if err := s.readManifest(readOptions.ManifestVersion); err != nil { + return nil, err + } + } + if s.manifest.GetSchema().Options().HasVersionColumn() { + f := filter.NewConstantFilter(filter.LessThanOrEqual, s.manifest.GetSchema().Options().VersionColumn, int64(math.MaxInt64)) + readOptions.AddFilter(f) + readOptions.AddColumn(s.manifest.GetSchema().Options().VersionColumn) + } + log.Debug("read", log.Any("readOption", readOptions)) + + return recordreader.MakeRecordReader(s.manifest, s.manifest.GetSchema(), s.fs, s.deleteFragments, readOptions), nil +} + +func (s *Space) WriteBlob(content []byte, name string, replace bool) error { + return transaction.NewConcurrentWriteTransaction(s).WriteBlob(content, name, replace).Commit() +} + +func (s *Space) ReadBlob(name string, output []byte) (int, error) { + blob, ok := s.manifest.GetBlob(name) + if !ok { + return -1, errors.ErrBlobNotExist + } + + f, err := s.fs.OpenFile(blob.File) + if err != nil { + return -1, err + } + + return f.Read(output) +} + +func (s *Space) GetBlobByteSize(name string) (int64, error) { + blob, ok := s.manifest.GetBlob(name) + if !ok { + return -1, errors.ErrBlobNotExist + } + return blob.Size, nil +} + +func (s *Space) GetCurrentVersion() int64 { + return s.manifest.Version() +} + +func (s *Space) ScanDelete() (array.RecordReader, error) { + return recordreader.MakeScanDeleteReader(s.manifest, s.fs), nil +} + +func (s *Space) Path() string { + return s.path +} + +func (s *Space) Fs() fs.Fs { + return s.fs +} + +func (s *Space) Manifest() *manifest.Manifest { + return s.manifest +} + +func (s *Space) SetManifest(manifest *manifest.Manifest) { + s.manifest = manifest +} + +func (s *Space) LockManager() lock.LockManager { + return s.lockManager +} + +func (s *Space) SetLockManager(lockManager lock.LockManager) { + s.lockManager = lockManager +} + +func (s *Space) StatisticsBlobs() []blob.Blob { + return s.manifest.GetBlobs() +} diff --git a/internal/storagev2/storage/transaction/transaction.go b/internal/storagev2/storage/transaction/transaction.go new file mode 100644 index 0000000000000..261f8e7ec5d20 --- /dev/null +++ b/internal/storagev2/storage/transaction/transaction.go @@ -0,0 +1,327 @@ +// Copyright 2023 Zilliz +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package transaction + +import ( + "fmt" + + "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/arrow/array" + "github.com/apache/arrow/go/v12/arrow/memory" + + "github.com/milvus-io/milvus/internal/storagev2/common/errors" + "github.com/milvus-io/milvus/internal/storagev2/common/log" + "github.com/milvus-io/milvus/internal/storagev2/common/utils" + "github.com/milvus-io/milvus/internal/storagev2/file/blob" + "github.com/milvus-io/milvus/internal/storagev2/file/fragment" + "github.com/milvus-io/milvus/internal/storagev2/io/format" + "github.com/milvus-io/milvus/internal/storagev2/io/format/parquet" + "github.com/milvus-io/milvus/internal/storagev2/io/fs" + "github.com/milvus-io/milvus/internal/storagev2/storage/lock" + "github.com/milvus-io/milvus/internal/storagev2/storage/manifest" + "github.com/milvus-io/milvus/internal/storagev2/storage/options" +) + +type SpaceMeta interface { + Path() string + Fs() fs.Fs + Manifest() *manifest.Manifest + LockManager() lock.LockManager + SetManifest(manifest *manifest.Manifest) +} + +type Transaction interface { + Write(reader array.RecordReader, options *options.WriteOptions) Transaction + Delete(reader array.RecordReader) Transaction + WriteBlob(content []byte, name string, replace bool) Transaction + Commit() error +} + +type ConcurrentWriteTransaction struct { + operations []Operation + commit manifest.ManifestCommit + space SpaceMeta +} + +func (t *ConcurrentWriteTransaction) Write(reader array.RecordReader, options *options.WriteOptions) Transaction { + operation := &WriteOperation{ + reader: reader, + options: options, + space: t.space, + transaction: t, + } + t.operations = append(t.operations, operation) + return t +} + +func (t *ConcurrentWriteTransaction) Delete(reader array.RecordReader) Transaction { + operation := &DeleteOperation{ + reader: reader, + space: t.space, + transaction: t, + } + t.operations = append(t.operations, operation) + return t +} + +func (t *ConcurrentWriteTransaction) WriteBlob(content []byte, name string, replace bool) Transaction { + operation := &WriteBlobOperation{ + content: content, + name: name, + replace: replace, + space: t.space, + transaction: t, + } + t.operations = append(t.operations, operation) + return t +} + +func (t *ConcurrentWriteTransaction) Commit() error { + for _, op := range t.operations { + op.Execute() + } + nxtManifest, err := t.commit.Commit() + if err != nil { + return err + } + t.space.SetManifest(nxtManifest) + return nil +} + +func NewConcurrentWriteTransaction(space SpaceMeta) *ConcurrentWriteTransaction { + return &ConcurrentWriteTransaction{ + operations: make([]Operation, 0), + commit: manifest.NewManifestCommit(space.LockManager(), manifest.NewManifestReaderWriter(space.Fs(), space.Path())), + space: space, + } +} + +type Operation interface { + Execute() error +} + +type WriteOperation struct { + reader array.RecordReader + options *options.WriteOptions + space SpaceMeta + transaction *ConcurrentWriteTransaction +} + +func (w *WriteOperation) Execute() error { + if !w.space.Manifest().GetSchema().Schema().Equal(w.reader.Schema()) { + return errors.ErrSchemaNotMatch + } + + scalarSchema, vectorSchema := w.space.Manifest().GetSchema().ScalarSchema(), w.space.Manifest().GetSchema().VectorSchema() + var ( + scalarWriter format.Writer + vectorWriter format.Writer + ) + scalarFragment := fragment.NewFragment() + vectorFragment := fragment.NewFragment() + + isEmpty := true + for w.reader.Next() { + rec := w.reader.Record() + + if rec.NumRows() == 0 { + continue + } + + var err error + scalarWriter, err = w.write(scalarSchema, rec, scalarWriter, &scalarFragment, w.options, true) + if err != nil { + return err + } + vectorWriter, err = w.write(vectorSchema, rec, vectorWriter, &vectorFragment, w.options, false) + if err != nil { + return err + } + isEmpty = false + } + + if scalarWriter != nil { + if err := scalarWriter.Close(); err != nil { + return err + } + } + if vectorWriter != nil { + if err := vectorWriter.Close(); err != nil { + return err + } + } + + if isEmpty { + return nil + } + + op1 := manifest.AddScalarFragmentOp{ScalarFragment: scalarFragment} + op2 := manifest.AddVectorFragmentOp{VectorFragment: vectorFragment} + w.transaction.commit.AddOp(op1, op2) + return nil +} + +func (w *WriteOperation) write( + schema *arrow.Schema, + rec arrow.Record, + writer format.Writer, + fragment *fragment.Fragment, + opt *options.WriteOptions, + isScalar bool, +) (format.Writer, error) { + var columns []arrow.Array + cols := rec.Columns() + for k := range cols { + _, has := schema.FieldsByName(rec.ColumnName(k)) + if has { + columns = append(columns, cols[k]) + } + } + + var rootPath string + if isScalar { + // add offset column for scalar + offsetValues := make([]int64, rec.NumRows()) + for i := 0; i < int(rec.NumRows()); i++ { + offsetValues[i] = int64(i) + } + builder := array.NewInt64Builder(memory.DefaultAllocator) + builder.AppendValues(offsetValues, nil) + offsetColumn := builder.NewArray() + columns = append(columns, offsetColumn) + rootPath = utils.GetScalarDataDir(w.space.Path()) + } else { + rootPath = utils.GetVectorDataDir(w.space.Path()) + } + + var err error + + record := array.NewRecord(schema, columns, rec.NumRows()) + + if writer == nil { + filePath := utils.GetNewParquetFilePath(rootPath) + writer, err = parquet.NewFileWriter(schema, w.space.Fs(), filePath) + if err != nil { + return nil, err + } + fragment.AddFile(filePath) + } + + err = writer.Write(record) + if err != nil { + return nil, err + } + + if writer.Count() >= opt.MaxRecordPerFile { + log.Debug("close writer", log.Any("count", writer.Count())) + err = writer.Close() + if err != nil { + return nil, err + } + writer = nil + } + + return writer, nil +} + +type DeleteOperation struct { + reader array.RecordReader + space SpaceMeta + transaction *ConcurrentWriteTransaction +} + +func (o *DeleteOperation) Execute() error { + schema := o.space.Manifest().GetSchema().DeleteSchema() + fragment := fragment.NewFragment() + var ( + err error + writer format.Writer + deleteFile string + ) + + for o.reader.Next() { + rec := o.reader.Record() + if rec.NumRows() == 0 { + continue + } + + if writer == nil { + deleteFile = utils.GetNewParquetFilePath(utils.GetDeleteDataDir(o.space.Path())) + writer, err = parquet.NewFileWriter(schema, o.space.Fs(), deleteFile) + if err != nil { + return err + } + fragment.AddFile(deleteFile) + } + + if err = writer.Write(rec); err != nil { + return err + } + } + + if writer != nil { + if err = writer.Close(); err != nil { + return err + } + + op := manifest.AddDeleteFragmentOp{DeleteFragment: fragment} + o.transaction.commit.AddOp(op) + } + return nil +} + +type WriteBlobOperation struct { + content []byte + name string + replace bool + space SpaceMeta + transaction *ConcurrentWriteTransaction +} + +func (o *WriteBlobOperation) Execute() error { + if !o.replace && o.space.Manifest().HasBlob(o.name) { + return errors.ErrBlobAlreadyExist + } + + blobFile := utils.GetBlobFilePath(o.space.Path()) + f, err := o.space.Fs().OpenFile(blobFile) + if err != nil { + return err + } + + n, err := f.Write(o.content) + if err != nil { + return err + } + + if n != len(o.content) { + return fmt.Errorf("blob not written completely, written %d but expect %d", n, len(o.content)) + } + + if err = f.Close(); err != nil { + return err + } + + op := manifest.AddBlobOp{ + Replace: o.replace, + Blob: blob.Blob{ + Name: o.name, + Size: int64(len(o.content)), + File: blobFile, + }, + } + o.transaction.commit.AddOp(op) + return nil +} diff --git a/pkg/proto/storagev2.proto b/pkg/proto/storagev2.proto new file mode 100644 index 0000000000000..638bdda8e6171 --- /dev/null +++ b/pkg/proto/storagev2.proto @@ -0,0 +1,131 @@ +syntax = "proto3"; + +package milvus.proto.storagev2; + +option go_package = "github.com/milvus-io/milvus/pkg/proto/storagev2pb"; + +enum LogicType { + NA = 0; + BOOL = 1; + UINT8 = 2; + INT8 = 3; + UINT16 = 4; + INT16 = 5; + UINT32 = 6; + INT32 = 7; + UINT64 = 8; + INT64 = 9; + HALF_FLOAT = 10; + FLOAT = 11; + DOUBLE = 12; + STRING = 13; + BINARY = 14; + FIXED_SIZE_BINARY = 15; + // DATE32 = 16; + // DATE64 = 17; + // TIMESTAMP = 18; + // TIME32 = 19; + // TIME64 = 20; + // INTERVAL_MONTHS = 21; + // INTERVAL_DAY_TIME = 22; + // DECIMAL128 = 23; + // option allow_alias = true; + // DECIMAL = 23; // DECIMAL==DECIMAL128 + // DECIMAL256 = 24; + LIST = 25; + STRUCT = 26; + // SPARSE_UNION = 27; + // DENSE_UNION = 28; + DICTIONARY = 29; + MAP = 30; + // EXTENSION = 31; + FIXED_SIZE_LIST = 32; + // DURATION = 33; + // LARGE_STRING = 34; + // LARGE_BINARY = 35; + // LARGE_LIST = 36; + // INTERVAL_MONTH_DAY_NANO = 37; + // RUN_END_ENCODED = 38; + MAX_ID = 39; +} + +enum Endianness { + Little = 0; + Big = 1; +} + +message FixedSizeBinaryType { int32 byte_width = 1; } + +message FixedSizeListType { int32 list_size = 1; } + +message DictionaryType { + DataType index_type = 1; + DataType value_type = 2; + bool ordered = 3; +} + +message MapType { bool keys_sorted = 1; } + +message DataType { + oneof type_related_values { + FixedSizeBinaryType fixed_size_binary_type = 1; + FixedSizeListType fixed_size_list_type = 2; + DictionaryType dictionary_type = 3; + MapType map_type = 4; + } + LogicType logic_type = 100; + repeated Field children = 101; +} + +message KeyValueMetadata { + repeated string keys = 1; + repeated string values = 2; +} + +message Field { + string name = 1; + bool nullable = 2; + DataType data_type = 3; + KeyValueMetadata metadata = 4; +} + +message SchemaOptions { + string primary_column = 1; + string version_column = 2; + string vector_column = 3; +} + +message ArrowSchema { + repeated Field fields = 1; + Endianness endianness = 2; + KeyValueMetadata metadata = 3; +} + +message Schema { + ArrowSchema arrow_schema = 1; + SchemaOptions schema_options = 2; +} + + +message Options { string uri = 1; } + +message Manifest { + int64 version = 1; + Options options = 2; + Schema schema = 3; + repeated Fragment scalar_fragments = 4; + repeated Fragment vector_fragments = 5; + repeated Fragment delete_fragments = 6; + repeated Blob blobs = 7; +} + +message Fragment { + int64 id = 1; + repeated string files = 2; +} + +message Blob { + string name = 1; + int64 size = 2; + string file = 3; +} diff --git a/pkg/proto/storagev2pb/storagev2.pb.go b/pkg/proto/storagev2pb/storagev2.pb.go new file mode 100644 index 0000000000000..7852e3d073f27 --- /dev/null +++ b/pkg/proto/storagev2pb/storagev2.pb.go @@ -0,0 +1,1519 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.33.0 +// protoc v3.21.4 +// source: storagev2.proto + +package storagev2pb + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type LogicType int32 + +const ( + LogicType_NA LogicType = 0 + LogicType_BOOL LogicType = 1 + LogicType_UINT8 LogicType = 2 + LogicType_INT8 LogicType = 3 + LogicType_UINT16 LogicType = 4 + LogicType_INT16 LogicType = 5 + LogicType_UINT32 LogicType = 6 + LogicType_INT32 LogicType = 7 + LogicType_UINT64 LogicType = 8 + LogicType_INT64 LogicType = 9 + LogicType_HALF_FLOAT LogicType = 10 + LogicType_FLOAT LogicType = 11 + LogicType_DOUBLE LogicType = 12 + LogicType_STRING LogicType = 13 + LogicType_BINARY LogicType = 14 + LogicType_FIXED_SIZE_BINARY LogicType = 15 + // DATE32 = 16; + // DATE64 = 17; + // TIMESTAMP = 18; + // TIME32 = 19; + // TIME64 = 20; + // INTERVAL_MONTHS = 21; + // INTERVAL_DAY_TIME = 22; + // DECIMAL128 = 23; + // option allow_alias = true; + // DECIMAL = 23; // DECIMAL==DECIMAL128 + // DECIMAL256 = 24; + LogicType_LIST LogicType = 25 + LogicType_STRUCT LogicType = 26 + // SPARSE_UNION = 27; + // DENSE_UNION = 28; + LogicType_DICTIONARY LogicType = 29 + LogicType_MAP LogicType = 30 + // EXTENSION = 31; + LogicType_FIXED_SIZE_LIST LogicType = 32 + // DURATION = 33; + // LARGE_STRING = 34; + // LARGE_BINARY = 35; + // LARGE_LIST = 36; + // INTERVAL_MONTH_DAY_NANO = 37; + // RUN_END_ENCODED = 38; + LogicType_MAX_ID LogicType = 39 +) + +// Enum value maps for LogicType. +var ( + LogicType_name = map[int32]string{ + 0: "NA", + 1: "BOOL", + 2: "UINT8", + 3: "INT8", + 4: "UINT16", + 5: "INT16", + 6: "UINT32", + 7: "INT32", + 8: "UINT64", + 9: "INT64", + 10: "HALF_FLOAT", + 11: "FLOAT", + 12: "DOUBLE", + 13: "STRING", + 14: "BINARY", + 15: "FIXED_SIZE_BINARY", + 25: "LIST", + 26: "STRUCT", + 29: "DICTIONARY", + 30: "MAP", + 32: "FIXED_SIZE_LIST", + 39: "MAX_ID", + } + LogicType_value = map[string]int32{ + "NA": 0, + "BOOL": 1, + "UINT8": 2, + "INT8": 3, + "UINT16": 4, + "INT16": 5, + "UINT32": 6, + "INT32": 7, + "UINT64": 8, + "INT64": 9, + "HALF_FLOAT": 10, + "FLOAT": 11, + "DOUBLE": 12, + "STRING": 13, + "BINARY": 14, + "FIXED_SIZE_BINARY": 15, + "LIST": 25, + "STRUCT": 26, + "DICTIONARY": 29, + "MAP": 30, + "FIXED_SIZE_LIST": 32, + "MAX_ID": 39, + } +) + +func (x LogicType) Enum() *LogicType { + p := new(LogicType) + *p = x + return p +} + +func (x LogicType) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (LogicType) Descriptor() protoreflect.EnumDescriptor { + return file_storagev2_proto_enumTypes[0].Descriptor() +} + +func (LogicType) Type() protoreflect.EnumType { + return &file_storagev2_proto_enumTypes[0] +} + +func (x LogicType) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use LogicType.Descriptor instead. +func (LogicType) EnumDescriptor() ([]byte, []int) { + return file_storagev2_proto_rawDescGZIP(), []int{0} +} + +type Endianness int32 + +const ( + Endianness_Little Endianness = 0 + Endianness_Big Endianness = 1 +) + +// Enum value maps for Endianness. +var ( + Endianness_name = map[int32]string{ + 0: "Little", + 1: "Big", + } + Endianness_value = map[string]int32{ + "Little": 0, + "Big": 1, + } +) + +func (x Endianness) Enum() *Endianness { + p := new(Endianness) + *p = x + return p +} + +func (x Endianness) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (Endianness) Descriptor() protoreflect.EnumDescriptor { + return file_storagev2_proto_enumTypes[1].Descriptor() +} + +func (Endianness) Type() protoreflect.EnumType { + return &file_storagev2_proto_enumTypes[1] +} + +func (x Endianness) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use Endianness.Descriptor instead. +func (Endianness) EnumDescriptor() ([]byte, []int) { + return file_storagev2_proto_rawDescGZIP(), []int{1} +} + +type FixedSizeBinaryType struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + ByteWidth int32 `protobuf:"varint,1,opt,name=byte_width,json=byteWidth,proto3" json:"byte_width,omitempty"` +} + +func (x *FixedSizeBinaryType) Reset() { + *x = FixedSizeBinaryType{} + if protoimpl.UnsafeEnabled { + mi := &file_storagev2_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *FixedSizeBinaryType) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*FixedSizeBinaryType) ProtoMessage() {} + +func (x *FixedSizeBinaryType) ProtoReflect() protoreflect.Message { + mi := &file_storagev2_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use FixedSizeBinaryType.ProtoReflect.Descriptor instead. +func (*FixedSizeBinaryType) Descriptor() ([]byte, []int) { + return file_storagev2_proto_rawDescGZIP(), []int{0} +} + +func (x *FixedSizeBinaryType) GetByteWidth() int32 { + if x != nil { + return x.ByteWidth + } + return 0 +} + +type FixedSizeListType struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + ListSize int32 `protobuf:"varint,1,opt,name=list_size,json=listSize,proto3" json:"list_size,omitempty"` +} + +func (x *FixedSizeListType) Reset() { + *x = FixedSizeListType{} + if protoimpl.UnsafeEnabled { + mi := &file_storagev2_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *FixedSizeListType) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*FixedSizeListType) ProtoMessage() {} + +func (x *FixedSizeListType) ProtoReflect() protoreflect.Message { + mi := &file_storagev2_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use FixedSizeListType.ProtoReflect.Descriptor instead. +func (*FixedSizeListType) Descriptor() ([]byte, []int) { + return file_storagev2_proto_rawDescGZIP(), []int{1} +} + +func (x *FixedSizeListType) GetListSize() int32 { + if x != nil { + return x.ListSize + } + return 0 +} + +type DictionaryType struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + IndexType *DataType `protobuf:"bytes,1,opt,name=index_type,json=indexType,proto3" json:"index_type,omitempty"` + ValueType *DataType `protobuf:"bytes,2,opt,name=value_type,json=valueType,proto3" json:"value_type,omitempty"` + Ordered bool `protobuf:"varint,3,opt,name=ordered,proto3" json:"ordered,omitempty"` +} + +func (x *DictionaryType) Reset() { + *x = DictionaryType{} + if protoimpl.UnsafeEnabled { + mi := &file_storagev2_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *DictionaryType) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DictionaryType) ProtoMessage() {} + +func (x *DictionaryType) ProtoReflect() protoreflect.Message { + mi := &file_storagev2_proto_msgTypes[2] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DictionaryType.ProtoReflect.Descriptor instead. +func (*DictionaryType) Descriptor() ([]byte, []int) { + return file_storagev2_proto_rawDescGZIP(), []int{2} +} + +func (x *DictionaryType) GetIndexType() *DataType { + if x != nil { + return x.IndexType + } + return nil +} + +func (x *DictionaryType) GetValueType() *DataType { + if x != nil { + return x.ValueType + } + return nil +} + +func (x *DictionaryType) GetOrdered() bool { + if x != nil { + return x.Ordered + } + return false +} + +type MapType struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + KeysSorted bool `protobuf:"varint,1,opt,name=keys_sorted,json=keysSorted,proto3" json:"keys_sorted,omitempty"` +} + +func (x *MapType) Reset() { + *x = MapType{} + if protoimpl.UnsafeEnabled { + mi := &file_storagev2_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *MapType) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*MapType) ProtoMessage() {} + +func (x *MapType) ProtoReflect() protoreflect.Message { + mi := &file_storagev2_proto_msgTypes[3] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use MapType.ProtoReflect.Descriptor instead. +func (*MapType) Descriptor() ([]byte, []int) { + return file_storagev2_proto_rawDescGZIP(), []int{3} +} + +func (x *MapType) GetKeysSorted() bool { + if x != nil { + return x.KeysSorted + } + return false +} + +type DataType struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Types that are assignable to TypeRelatedValues: + // + // *DataType_FixedSizeBinaryType + // *DataType_FixedSizeListType + // *DataType_DictionaryType + // *DataType_MapType + TypeRelatedValues isDataType_TypeRelatedValues `protobuf_oneof:"type_related_values"` + LogicType LogicType `protobuf:"varint,100,opt,name=logic_type,json=logicType,proto3,enum=milvus.proto.storagev2.LogicType" json:"logic_type,omitempty"` + Children []*Field `protobuf:"bytes,101,rep,name=children,proto3" json:"children,omitempty"` +} + +func (x *DataType) Reset() { + *x = DataType{} + if protoimpl.UnsafeEnabled { + mi := &file_storagev2_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *DataType) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DataType) ProtoMessage() {} + +func (x *DataType) ProtoReflect() protoreflect.Message { + mi := &file_storagev2_proto_msgTypes[4] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DataType.ProtoReflect.Descriptor instead. +func (*DataType) Descriptor() ([]byte, []int) { + return file_storagev2_proto_rawDescGZIP(), []int{4} +} + +func (m *DataType) GetTypeRelatedValues() isDataType_TypeRelatedValues { + if m != nil { + return m.TypeRelatedValues + } + return nil +} + +func (x *DataType) GetFixedSizeBinaryType() *FixedSizeBinaryType { + if x, ok := x.GetTypeRelatedValues().(*DataType_FixedSizeBinaryType); ok { + return x.FixedSizeBinaryType + } + return nil +} + +func (x *DataType) GetFixedSizeListType() *FixedSizeListType { + if x, ok := x.GetTypeRelatedValues().(*DataType_FixedSizeListType); ok { + return x.FixedSizeListType + } + return nil +} + +func (x *DataType) GetDictionaryType() *DictionaryType { + if x, ok := x.GetTypeRelatedValues().(*DataType_DictionaryType); ok { + return x.DictionaryType + } + return nil +} + +func (x *DataType) GetMapType() *MapType { + if x, ok := x.GetTypeRelatedValues().(*DataType_MapType); ok { + return x.MapType + } + return nil +} + +func (x *DataType) GetLogicType() LogicType { + if x != nil { + return x.LogicType + } + return LogicType_NA +} + +func (x *DataType) GetChildren() []*Field { + if x != nil { + return x.Children + } + return nil +} + +type isDataType_TypeRelatedValues interface { + isDataType_TypeRelatedValues() +} + +type DataType_FixedSizeBinaryType struct { + FixedSizeBinaryType *FixedSizeBinaryType `protobuf:"bytes,1,opt,name=fixed_size_binary_type,json=fixedSizeBinaryType,proto3,oneof"` +} + +type DataType_FixedSizeListType struct { + FixedSizeListType *FixedSizeListType `protobuf:"bytes,2,opt,name=fixed_size_list_type,json=fixedSizeListType,proto3,oneof"` +} + +type DataType_DictionaryType struct { + DictionaryType *DictionaryType `protobuf:"bytes,3,opt,name=dictionary_type,json=dictionaryType,proto3,oneof"` +} + +type DataType_MapType struct { + MapType *MapType `protobuf:"bytes,4,opt,name=map_type,json=mapType,proto3,oneof"` +} + +func (*DataType_FixedSizeBinaryType) isDataType_TypeRelatedValues() {} + +func (*DataType_FixedSizeListType) isDataType_TypeRelatedValues() {} + +func (*DataType_DictionaryType) isDataType_TypeRelatedValues() {} + +func (*DataType_MapType) isDataType_TypeRelatedValues() {} + +type KeyValueMetadata struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Keys []string `protobuf:"bytes,1,rep,name=keys,proto3" json:"keys,omitempty"` + Values []string `protobuf:"bytes,2,rep,name=values,proto3" json:"values,omitempty"` +} + +func (x *KeyValueMetadata) Reset() { + *x = KeyValueMetadata{} + if protoimpl.UnsafeEnabled { + mi := &file_storagev2_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *KeyValueMetadata) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*KeyValueMetadata) ProtoMessage() {} + +func (x *KeyValueMetadata) ProtoReflect() protoreflect.Message { + mi := &file_storagev2_proto_msgTypes[5] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use KeyValueMetadata.ProtoReflect.Descriptor instead. +func (*KeyValueMetadata) Descriptor() ([]byte, []int) { + return file_storagev2_proto_rawDescGZIP(), []int{5} +} + +func (x *KeyValueMetadata) GetKeys() []string { + if x != nil { + return x.Keys + } + return nil +} + +func (x *KeyValueMetadata) GetValues() []string { + if x != nil { + return x.Values + } + return nil +} + +type Field struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + Nullable bool `protobuf:"varint,2,opt,name=nullable,proto3" json:"nullable,omitempty"` + DataType *DataType `protobuf:"bytes,3,opt,name=data_type,json=dataType,proto3" json:"data_type,omitempty"` + Metadata *KeyValueMetadata `protobuf:"bytes,4,opt,name=metadata,proto3" json:"metadata,omitempty"` +} + +func (x *Field) Reset() { + *x = Field{} + if protoimpl.UnsafeEnabled { + mi := &file_storagev2_proto_msgTypes[6] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Field) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Field) ProtoMessage() {} + +func (x *Field) ProtoReflect() protoreflect.Message { + mi := &file_storagev2_proto_msgTypes[6] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Field.ProtoReflect.Descriptor instead. +func (*Field) Descriptor() ([]byte, []int) { + return file_storagev2_proto_rawDescGZIP(), []int{6} +} + +func (x *Field) GetName() string { + if x != nil { + return x.Name + } + return "" +} + +func (x *Field) GetNullable() bool { + if x != nil { + return x.Nullable + } + return false +} + +func (x *Field) GetDataType() *DataType { + if x != nil { + return x.DataType + } + return nil +} + +func (x *Field) GetMetadata() *KeyValueMetadata { + if x != nil { + return x.Metadata + } + return nil +} + +type SchemaOptions struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + PrimaryColumn string `protobuf:"bytes,1,opt,name=primary_column,json=primaryColumn,proto3" json:"primary_column,omitempty"` + VersionColumn string `protobuf:"bytes,2,opt,name=version_column,json=versionColumn,proto3" json:"version_column,omitempty"` + VectorColumn string `protobuf:"bytes,3,opt,name=vector_column,json=vectorColumn,proto3" json:"vector_column,omitempty"` +} + +func (x *SchemaOptions) Reset() { + *x = SchemaOptions{} + if protoimpl.UnsafeEnabled { + mi := &file_storagev2_proto_msgTypes[7] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *SchemaOptions) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*SchemaOptions) ProtoMessage() {} + +func (x *SchemaOptions) ProtoReflect() protoreflect.Message { + mi := &file_storagev2_proto_msgTypes[7] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use SchemaOptions.ProtoReflect.Descriptor instead. +func (*SchemaOptions) Descriptor() ([]byte, []int) { + return file_storagev2_proto_rawDescGZIP(), []int{7} +} + +func (x *SchemaOptions) GetPrimaryColumn() string { + if x != nil { + return x.PrimaryColumn + } + return "" +} + +func (x *SchemaOptions) GetVersionColumn() string { + if x != nil { + return x.VersionColumn + } + return "" +} + +func (x *SchemaOptions) GetVectorColumn() string { + if x != nil { + return x.VectorColumn + } + return "" +} + +type ArrowSchema struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Fields []*Field `protobuf:"bytes,1,rep,name=fields,proto3" json:"fields,omitempty"` + Endianness Endianness `protobuf:"varint,2,opt,name=endianness,proto3,enum=milvus.proto.storagev2.Endianness" json:"endianness,omitempty"` + Metadata *KeyValueMetadata `protobuf:"bytes,3,opt,name=metadata,proto3" json:"metadata,omitempty"` +} + +func (x *ArrowSchema) Reset() { + *x = ArrowSchema{} + if protoimpl.UnsafeEnabled { + mi := &file_storagev2_proto_msgTypes[8] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *ArrowSchema) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ArrowSchema) ProtoMessage() {} + +func (x *ArrowSchema) ProtoReflect() protoreflect.Message { + mi := &file_storagev2_proto_msgTypes[8] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ArrowSchema.ProtoReflect.Descriptor instead. +func (*ArrowSchema) Descriptor() ([]byte, []int) { + return file_storagev2_proto_rawDescGZIP(), []int{8} +} + +func (x *ArrowSchema) GetFields() []*Field { + if x != nil { + return x.Fields + } + return nil +} + +func (x *ArrowSchema) GetEndianness() Endianness { + if x != nil { + return x.Endianness + } + return Endianness_Little +} + +func (x *ArrowSchema) GetMetadata() *KeyValueMetadata { + if x != nil { + return x.Metadata + } + return nil +} + +type Schema struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + ArrowSchema *ArrowSchema `protobuf:"bytes,1,opt,name=arrow_schema,json=arrowSchema,proto3" json:"arrow_schema,omitempty"` + SchemaOptions *SchemaOptions `protobuf:"bytes,2,opt,name=schema_options,json=schemaOptions,proto3" json:"schema_options,omitempty"` +} + +func (x *Schema) Reset() { + *x = Schema{} + if protoimpl.UnsafeEnabled { + mi := &file_storagev2_proto_msgTypes[9] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Schema) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Schema) ProtoMessage() {} + +func (x *Schema) ProtoReflect() protoreflect.Message { + mi := &file_storagev2_proto_msgTypes[9] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Schema.ProtoReflect.Descriptor instead. +func (*Schema) Descriptor() ([]byte, []int) { + return file_storagev2_proto_rawDescGZIP(), []int{9} +} + +func (x *Schema) GetArrowSchema() *ArrowSchema { + if x != nil { + return x.ArrowSchema + } + return nil +} + +func (x *Schema) GetSchemaOptions() *SchemaOptions { + if x != nil { + return x.SchemaOptions + } + return nil +} + +type Options struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Uri string `protobuf:"bytes,1,opt,name=uri,proto3" json:"uri,omitempty"` +} + +func (x *Options) Reset() { + *x = Options{} + if protoimpl.UnsafeEnabled { + mi := &file_storagev2_proto_msgTypes[10] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Options) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Options) ProtoMessage() {} + +func (x *Options) ProtoReflect() protoreflect.Message { + mi := &file_storagev2_proto_msgTypes[10] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Options.ProtoReflect.Descriptor instead. +func (*Options) Descriptor() ([]byte, []int) { + return file_storagev2_proto_rawDescGZIP(), []int{10} +} + +func (x *Options) GetUri() string { + if x != nil { + return x.Uri + } + return "" +} + +type Manifest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Version int64 `protobuf:"varint,1,opt,name=version,proto3" json:"version,omitempty"` + Options *Options `protobuf:"bytes,2,opt,name=options,proto3" json:"options,omitempty"` + Schema *Schema `protobuf:"bytes,3,opt,name=schema,proto3" json:"schema,omitempty"` + ScalarFragments []*Fragment `protobuf:"bytes,4,rep,name=scalar_fragments,json=scalarFragments,proto3" json:"scalar_fragments,omitempty"` + VectorFragments []*Fragment `protobuf:"bytes,5,rep,name=vector_fragments,json=vectorFragments,proto3" json:"vector_fragments,omitempty"` + DeleteFragments []*Fragment `protobuf:"bytes,6,rep,name=delete_fragments,json=deleteFragments,proto3" json:"delete_fragments,omitempty"` + Blobs []*Blob `protobuf:"bytes,7,rep,name=blobs,proto3" json:"blobs,omitempty"` +} + +func (x *Manifest) Reset() { + *x = Manifest{} + if protoimpl.UnsafeEnabled { + mi := &file_storagev2_proto_msgTypes[11] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Manifest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Manifest) ProtoMessage() {} + +func (x *Manifest) ProtoReflect() protoreflect.Message { + mi := &file_storagev2_proto_msgTypes[11] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Manifest.ProtoReflect.Descriptor instead. +func (*Manifest) Descriptor() ([]byte, []int) { + return file_storagev2_proto_rawDescGZIP(), []int{11} +} + +func (x *Manifest) GetVersion() int64 { + if x != nil { + return x.Version + } + return 0 +} + +func (x *Manifest) GetOptions() *Options { + if x != nil { + return x.Options + } + return nil +} + +func (x *Manifest) GetSchema() *Schema { + if x != nil { + return x.Schema + } + return nil +} + +func (x *Manifest) GetScalarFragments() []*Fragment { + if x != nil { + return x.ScalarFragments + } + return nil +} + +func (x *Manifest) GetVectorFragments() []*Fragment { + if x != nil { + return x.VectorFragments + } + return nil +} + +func (x *Manifest) GetDeleteFragments() []*Fragment { + if x != nil { + return x.DeleteFragments + } + return nil +} + +func (x *Manifest) GetBlobs() []*Blob { + if x != nil { + return x.Blobs + } + return nil +} + +type Fragment struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Id int64 `protobuf:"varint,1,opt,name=id,proto3" json:"id,omitempty"` + Files []string `protobuf:"bytes,2,rep,name=files,proto3" json:"files,omitempty"` +} + +func (x *Fragment) Reset() { + *x = Fragment{} + if protoimpl.UnsafeEnabled { + mi := &file_storagev2_proto_msgTypes[12] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Fragment) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Fragment) ProtoMessage() {} + +func (x *Fragment) ProtoReflect() protoreflect.Message { + mi := &file_storagev2_proto_msgTypes[12] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Fragment.ProtoReflect.Descriptor instead. +func (*Fragment) Descriptor() ([]byte, []int) { + return file_storagev2_proto_rawDescGZIP(), []int{12} +} + +func (x *Fragment) GetId() int64 { + if x != nil { + return x.Id + } + return 0 +} + +func (x *Fragment) GetFiles() []string { + if x != nil { + return x.Files + } + return nil +} + +type Blob struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + Size int64 `protobuf:"varint,2,opt,name=size,proto3" json:"size,omitempty"` + File string `protobuf:"bytes,3,opt,name=file,proto3" json:"file,omitempty"` +} + +func (x *Blob) Reset() { + *x = Blob{} + if protoimpl.UnsafeEnabled { + mi := &file_storagev2_proto_msgTypes[13] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Blob) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Blob) ProtoMessage() {} + +func (x *Blob) ProtoReflect() protoreflect.Message { + mi := &file_storagev2_proto_msgTypes[13] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Blob.ProtoReflect.Descriptor instead. +func (*Blob) Descriptor() ([]byte, []int) { + return file_storagev2_proto_rawDescGZIP(), []int{13} +} + +func (x *Blob) GetName() string { + if x != nil { + return x.Name + } + return "" +} + +func (x *Blob) GetSize() int64 { + if x != nil { + return x.Size + } + return 0 +} + +func (x *Blob) GetFile() string { + if x != nil { + return x.File + } + return "" +} + +var File_storagev2_proto protoreflect.FileDescriptor + +var file_storagev2_proto_rawDesc = []byte{ + 0x0a, 0x0f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x76, 0x32, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x12, 0x16, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, + 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x76, 0x32, 0x22, 0x34, 0x0a, 0x13, 0x46, 0x69, 0x78, + 0x65, 0x64, 0x53, 0x69, 0x7a, 0x65, 0x42, 0x69, 0x6e, 0x61, 0x72, 0x79, 0x54, 0x79, 0x70, 0x65, + 0x12, 0x1d, 0x0a, 0x0a, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x77, 0x69, 0x64, 0x74, 0x68, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x62, 0x79, 0x74, 0x65, 0x57, 0x69, 0x64, 0x74, 0x68, 0x22, + 0x30, 0x0a, 0x11, 0x46, 0x69, 0x78, 0x65, 0x64, 0x53, 0x69, 0x7a, 0x65, 0x4c, 0x69, 0x73, 0x74, + 0x54, 0x79, 0x70, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x6c, 0x69, 0x73, 0x74, 0x5f, 0x73, 0x69, 0x7a, + 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x6c, 0x69, 0x73, 0x74, 0x53, 0x69, 0x7a, + 0x65, 0x22, 0xac, 0x01, 0x0a, 0x0e, 0x44, 0x69, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x61, 0x72, 0x79, + 0x54, 0x79, 0x70, 0x65, 0x12, 0x3f, 0x0a, 0x0a, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x5f, 0x74, 0x79, + 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x20, 0x2e, 0x6d, 0x69, 0x6c, 0x76, 0x75, + 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x76, + 0x32, 0x2e, 0x44, 0x61, 0x74, 0x61, 0x54, 0x79, 0x70, 0x65, 0x52, 0x09, 0x69, 0x6e, 0x64, 0x65, + 0x78, 0x54, 0x79, 0x70, 0x65, 0x12, 0x3f, 0x0a, 0x0a, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x5f, 0x74, + 0x79, 0x70, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x20, 0x2e, 0x6d, 0x69, 0x6c, 0x76, + 0x75, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, + 0x76, 0x32, 0x2e, 0x44, 0x61, 0x74, 0x61, 0x54, 0x79, 0x70, 0x65, 0x52, 0x09, 0x76, 0x61, 0x6c, + 0x75, 0x65, 0x54, 0x79, 0x70, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6f, 0x72, 0x64, 0x65, 0x72, 0x65, + 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x6f, 0x72, 0x64, 0x65, 0x72, 0x65, 0x64, + 0x22, 0x2a, 0x0a, 0x07, 0x4d, 0x61, 0x70, 0x54, 0x79, 0x70, 0x65, 0x12, 0x1f, 0x0a, 0x0b, 0x6b, + 0x65, 0x79, 0x73, 0x5f, 0x73, 0x6f, 0x72, 0x74, 0x65, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, + 0x52, 0x0a, 0x6b, 0x65, 0x79, 0x73, 0x53, 0x6f, 0x72, 0x74, 0x65, 0x64, 0x22, 0xf1, 0x03, 0x0a, + 0x08, 0x44, 0x61, 0x74, 0x61, 0x54, 0x79, 0x70, 0x65, 0x12, 0x62, 0x0a, 0x16, 0x66, 0x69, 0x78, + 0x65, 0x64, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x5f, 0x62, 0x69, 0x6e, 0x61, 0x72, 0x79, 0x5f, 0x74, + 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x2b, 0x2e, 0x6d, 0x69, 0x6c, 0x76, + 0x75, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, + 0x76, 0x32, 0x2e, 0x46, 0x69, 0x78, 0x65, 0x64, 0x53, 0x69, 0x7a, 0x65, 0x42, 0x69, 0x6e, 0x61, + 0x72, 0x79, 0x54, 0x79, 0x70, 0x65, 0x48, 0x00, 0x52, 0x13, 0x66, 0x69, 0x78, 0x65, 0x64, 0x53, + 0x69, 0x7a, 0x65, 0x42, 0x69, 0x6e, 0x61, 0x72, 0x79, 0x54, 0x79, 0x70, 0x65, 0x12, 0x5c, 0x0a, + 0x14, 0x66, 0x69, 0x78, 0x65, 0x64, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x5f, 0x6c, 0x69, 0x73, 0x74, + 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x29, 0x2e, 0x6d, 0x69, + 0x6c, 0x76, 0x75, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, + 0x67, 0x65, 0x76, 0x32, 0x2e, 0x46, 0x69, 0x78, 0x65, 0x64, 0x53, 0x69, 0x7a, 0x65, 0x4c, 0x69, + 0x73, 0x74, 0x54, 0x79, 0x70, 0x65, 0x48, 0x00, 0x52, 0x11, 0x66, 0x69, 0x78, 0x65, 0x64, 0x53, + 0x69, 0x7a, 0x65, 0x4c, 0x69, 0x73, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x51, 0x0a, 0x0f, 0x64, + 0x69, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x61, 0x72, 0x79, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x26, 0x2e, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x76, 0x32, 0x2e, 0x44, 0x69, + 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x61, 0x72, 0x79, 0x54, 0x79, 0x70, 0x65, 0x48, 0x00, 0x52, 0x0e, + 0x64, 0x69, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x61, 0x72, 0x79, 0x54, 0x79, 0x70, 0x65, 0x12, 0x3c, + 0x0a, 0x08, 0x6d, 0x61, 0x70, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, + 0x32, 0x1f, 0x2e, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, + 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x76, 0x32, 0x2e, 0x4d, 0x61, 0x70, 0x54, 0x79, 0x70, + 0x65, 0x48, 0x00, 0x52, 0x07, 0x6d, 0x61, 0x70, 0x54, 0x79, 0x70, 0x65, 0x12, 0x40, 0x0a, 0x0a, + 0x6c, 0x6f, 0x67, 0x69, 0x63, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x64, 0x20, 0x01, 0x28, 0x0e, + 0x32, 0x21, 0x2e, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, + 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x76, 0x32, 0x2e, 0x4c, 0x6f, 0x67, 0x69, 0x63, 0x54, + 0x79, 0x70, 0x65, 0x52, 0x09, 0x6c, 0x6f, 0x67, 0x69, 0x63, 0x54, 0x79, 0x70, 0x65, 0x12, 0x39, + 0x0a, 0x08, 0x63, 0x68, 0x69, 0x6c, 0x64, 0x72, 0x65, 0x6e, 0x18, 0x65, 0x20, 0x03, 0x28, 0x0b, + 0x32, 0x1d, 0x2e, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, + 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x76, 0x32, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x52, + 0x08, 0x63, 0x68, 0x69, 0x6c, 0x64, 0x72, 0x65, 0x6e, 0x42, 0x15, 0x0a, 0x13, 0x74, 0x79, 0x70, + 0x65, 0x5f, 0x72, 0x65, 0x6c, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, + 0x22, 0x3e, 0x0a, 0x10, 0x4b, 0x65, 0x79, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x4d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x12, 0x12, 0x0a, 0x04, 0x6b, 0x65, 0x79, 0x73, 0x18, 0x01, 0x20, 0x03, + 0x28, 0x09, 0x52, 0x04, 0x6b, 0x65, 0x79, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x06, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, + 0x22, 0xbc, 0x01, 0x0a, 0x05, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, + 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x1a, + 0x0a, 0x08, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, + 0x52, 0x08, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x12, 0x3d, 0x0a, 0x09, 0x64, 0x61, + 0x74, 0x61, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x20, 0x2e, + 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x73, 0x74, 0x6f, + 0x72, 0x61, 0x67, 0x65, 0x76, 0x32, 0x2e, 0x44, 0x61, 0x74, 0x61, 0x54, 0x79, 0x70, 0x65, 0x52, + 0x08, 0x64, 0x61, 0x74, 0x61, 0x54, 0x79, 0x70, 0x65, 0x12, 0x44, 0x0a, 0x08, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x28, 0x2e, 0x6d, 0x69, + 0x6c, 0x76, 0x75, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, + 0x67, 0x65, 0x76, 0x32, 0x2e, 0x4b, 0x65, 0x79, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x4d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, + 0x82, 0x01, 0x0a, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x70, 0x72, 0x69, 0x6d, 0x61, 0x72, 0x79, 0x5f, 0x63, 0x6f, 0x6c, + 0x75, 0x6d, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x70, 0x72, 0x69, 0x6d, 0x61, + 0x72, 0x79, 0x43, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x12, 0x25, 0x0a, 0x0e, 0x76, 0x65, 0x72, 0x73, + 0x69, 0x6f, 0x6e, 0x5f, 0x63, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x0d, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x43, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x12, + 0x23, 0x0a, 0x0d, 0x76, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x63, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x76, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x43, 0x6f, + 0x6c, 0x75, 0x6d, 0x6e, 0x22, 0xce, 0x01, 0x0a, 0x0b, 0x41, 0x72, 0x72, 0x6f, 0x77, 0x53, 0x63, + 0x68, 0x65, 0x6d, 0x61, 0x12, 0x35, 0x0a, 0x06, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x18, 0x01, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x76, 0x32, 0x2e, 0x46, 0x69, + 0x65, 0x6c, 0x64, 0x52, 0x06, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x12, 0x42, 0x0a, 0x0a, 0x65, + 0x6e, 0x64, 0x69, 0x61, 0x6e, 0x6e, 0x65, 0x73, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, + 0x22, 0x2e, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x73, + 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x76, 0x32, 0x2e, 0x45, 0x6e, 0x64, 0x69, 0x61, 0x6e, 0x6e, + 0x65, 0x73, 0x73, 0x52, 0x0a, 0x65, 0x6e, 0x64, 0x69, 0x61, 0x6e, 0x6e, 0x65, 0x73, 0x73, 0x12, + 0x44, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x0b, 0x32, 0x28, 0x2e, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x76, 0x32, 0x2e, 0x4b, 0x65, 0x79, 0x56, 0x61, + 0x6c, 0x75, 0x65, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x52, 0x08, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x9e, 0x01, 0x0a, 0x06, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, + 0x12, 0x46, 0x0a, 0x0c, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x5f, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x23, 0x2e, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2e, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x76, 0x32, 0x2e, + 0x41, 0x72, 0x72, 0x6f, 0x77, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x52, 0x0b, 0x61, 0x72, 0x72, + 0x6f, 0x77, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x12, 0x4c, 0x0a, 0x0e, 0x73, 0x63, 0x68, 0x65, + 0x6d, 0x61, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, + 0x32, 0x25, 0x2e, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, + 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x76, 0x32, 0x2e, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, + 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x0d, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x1b, 0x0a, 0x07, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, + 0x75, 0x72, 0x69, 0x22, 0xb2, 0x03, 0x0a, 0x08, 0x4d, 0x61, 0x6e, 0x69, 0x66, 0x65, 0x73, 0x74, + 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x03, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x39, 0x0a, 0x07, 0x6f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x69, + 0x6c, 0x76, 0x75, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, + 0x67, 0x65, 0x76, 0x32, 0x2e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x36, 0x0a, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x18, + 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2e, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x76, 0x32, 0x2e, 0x53, + 0x63, 0x68, 0x65, 0x6d, 0x61, 0x52, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x12, 0x4b, 0x0a, + 0x10, 0x73, 0x63, 0x61, 0x6c, 0x61, 0x72, 0x5f, 0x66, 0x72, 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74, + 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x20, 0x2e, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, + 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x76, 0x32, + 0x2e, 0x46, 0x72, 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x0f, 0x73, 0x63, 0x61, 0x6c, 0x61, + 0x72, 0x46, 0x72, 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x4b, 0x0a, 0x10, 0x76, 0x65, + 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x66, 0x72, 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x05, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x20, 0x2e, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x76, 0x32, 0x2e, 0x46, 0x72, + 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x0f, 0x76, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x46, 0x72, + 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x4b, 0x0a, 0x10, 0x64, 0x65, 0x6c, 0x65, 0x74, + 0x65, 0x5f, 0x66, 0x72, 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x20, 0x2e, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x76, 0x32, 0x2e, 0x46, 0x72, 0x61, 0x67, 0x6d, + 0x65, 0x6e, 0x74, 0x52, 0x0f, 0x64, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x46, 0x72, 0x61, 0x67, 0x6d, + 0x65, 0x6e, 0x74, 0x73, 0x12, 0x32, 0x0a, 0x05, 0x62, 0x6c, 0x6f, 0x62, 0x73, 0x18, 0x07, 0x20, + 0x03, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x2e, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x76, 0x32, 0x2e, 0x42, 0x6c, 0x6f, + 0x62, 0x52, 0x05, 0x62, 0x6c, 0x6f, 0x62, 0x73, 0x22, 0x30, 0x0a, 0x08, 0x46, 0x72, 0x61, 0x67, + 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, + 0x52, 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x18, 0x02, 0x20, + 0x03, 0x28, 0x09, 0x52, 0x05, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x22, 0x42, 0x0a, 0x04, 0x42, 0x6c, + 0x6f, 0x62, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x03, 0x52, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x66, 0x69, + 0x6c, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x66, 0x69, 0x6c, 0x65, 0x2a, 0x9d, + 0x02, 0x0a, 0x09, 0x4c, 0x6f, 0x67, 0x69, 0x63, 0x54, 0x79, 0x70, 0x65, 0x12, 0x06, 0x0a, 0x02, + 0x4e, 0x41, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x4f, 0x4f, 0x4c, 0x10, 0x01, 0x12, 0x09, + 0x0a, 0x05, 0x55, 0x49, 0x4e, 0x54, 0x38, 0x10, 0x02, 0x12, 0x08, 0x0a, 0x04, 0x49, 0x4e, 0x54, + 0x38, 0x10, 0x03, 0x12, 0x0a, 0x0a, 0x06, 0x55, 0x49, 0x4e, 0x54, 0x31, 0x36, 0x10, 0x04, 0x12, + 0x09, 0x0a, 0x05, 0x49, 0x4e, 0x54, 0x31, 0x36, 0x10, 0x05, 0x12, 0x0a, 0x0a, 0x06, 0x55, 0x49, + 0x4e, 0x54, 0x33, 0x32, 0x10, 0x06, 0x12, 0x09, 0x0a, 0x05, 0x49, 0x4e, 0x54, 0x33, 0x32, 0x10, + 0x07, 0x12, 0x0a, 0x0a, 0x06, 0x55, 0x49, 0x4e, 0x54, 0x36, 0x34, 0x10, 0x08, 0x12, 0x09, 0x0a, + 0x05, 0x49, 0x4e, 0x54, 0x36, 0x34, 0x10, 0x09, 0x12, 0x0e, 0x0a, 0x0a, 0x48, 0x41, 0x4c, 0x46, + 0x5f, 0x46, 0x4c, 0x4f, 0x41, 0x54, 0x10, 0x0a, 0x12, 0x09, 0x0a, 0x05, 0x46, 0x4c, 0x4f, 0x41, + 0x54, 0x10, 0x0b, 0x12, 0x0a, 0x0a, 0x06, 0x44, 0x4f, 0x55, 0x42, 0x4c, 0x45, 0x10, 0x0c, 0x12, + 0x0a, 0x0a, 0x06, 0x53, 0x54, 0x52, 0x49, 0x4e, 0x47, 0x10, 0x0d, 0x12, 0x0a, 0x0a, 0x06, 0x42, + 0x49, 0x4e, 0x41, 0x52, 0x59, 0x10, 0x0e, 0x12, 0x15, 0x0a, 0x11, 0x46, 0x49, 0x58, 0x45, 0x44, + 0x5f, 0x53, 0x49, 0x5a, 0x45, 0x5f, 0x42, 0x49, 0x4e, 0x41, 0x52, 0x59, 0x10, 0x0f, 0x12, 0x08, + 0x0a, 0x04, 0x4c, 0x49, 0x53, 0x54, 0x10, 0x19, 0x12, 0x0a, 0x0a, 0x06, 0x53, 0x54, 0x52, 0x55, + 0x43, 0x54, 0x10, 0x1a, 0x12, 0x0e, 0x0a, 0x0a, 0x44, 0x49, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x41, + 0x52, 0x59, 0x10, 0x1d, 0x12, 0x07, 0x0a, 0x03, 0x4d, 0x41, 0x50, 0x10, 0x1e, 0x12, 0x13, 0x0a, + 0x0f, 0x46, 0x49, 0x58, 0x45, 0x44, 0x5f, 0x53, 0x49, 0x5a, 0x45, 0x5f, 0x4c, 0x49, 0x53, 0x54, + 0x10, 0x20, 0x12, 0x0a, 0x0a, 0x06, 0x4d, 0x41, 0x58, 0x5f, 0x49, 0x44, 0x10, 0x27, 0x2a, 0x21, + 0x0a, 0x0a, 0x45, 0x6e, 0x64, 0x69, 0x61, 0x6e, 0x6e, 0x65, 0x73, 0x73, 0x12, 0x0a, 0x0a, 0x06, + 0x4c, 0x69, 0x74, 0x74, 0x6c, 0x65, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x42, 0x69, 0x67, 0x10, + 0x01, 0x42, 0x33, 0x5a, 0x31, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, + 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2d, 0x69, 0x6f, 0x2f, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, + 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x73, 0x74, 0x6f, 0x72, 0x61, + 0x67, 0x65, 0x76, 0x32, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, +} + +var ( + file_storagev2_proto_rawDescOnce sync.Once + file_storagev2_proto_rawDescData = file_storagev2_proto_rawDesc +) + +func file_storagev2_proto_rawDescGZIP() []byte { + file_storagev2_proto_rawDescOnce.Do(func() { + file_storagev2_proto_rawDescData = protoimpl.X.CompressGZIP(file_storagev2_proto_rawDescData) + }) + return file_storagev2_proto_rawDescData +} + +var file_storagev2_proto_enumTypes = make([]protoimpl.EnumInfo, 2) +var file_storagev2_proto_msgTypes = make([]protoimpl.MessageInfo, 14) +var file_storagev2_proto_goTypes = []interface{}{ + (LogicType)(0), // 0: milvus.proto.storagev2.LogicType + (Endianness)(0), // 1: milvus.proto.storagev2.Endianness + (*FixedSizeBinaryType)(nil), // 2: milvus.proto.storagev2.FixedSizeBinaryType + (*FixedSizeListType)(nil), // 3: milvus.proto.storagev2.FixedSizeListType + (*DictionaryType)(nil), // 4: milvus.proto.storagev2.DictionaryType + (*MapType)(nil), // 5: milvus.proto.storagev2.MapType + (*DataType)(nil), // 6: milvus.proto.storagev2.DataType + (*KeyValueMetadata)(nil), // 7: milvus.proto.storagev2.KeyValueMetadata + (*Field)(nil), // 8: milvus.proto.storagev2.Field + (*SchemaOptions)(nil), // 9: milvus.proto.storagev2.SchemaOptions + (*ArrowSchema)(nil), // 10: milvus.proto.storagev2.ArrowSchema + (*Schema)(nil), // 11: milvus.proto.storagev2.Schema + (*Options)(nil), // 12: milvus.proto.storagev2.Options + (*Manifest)(nil), // 13: milvus.proto.storagev2.Manifest + (*Fragment)(nil), // 14: milvus.proto.storagev2.Fragment + (*Blob)(nil), // 15: milvus.proto.storagev2.Blob +} +var file_storagev2_proto_depIdxs = []int32{ + 6, // 0: milvus.proto.storagev2.DictionaryType.index_type:type_name -> milvus.proto.storagev2.DataType + 6, // 1: milvus.proto.storagev2.DictionaryType.value_type:type_name -> milvus.proto.storagev2.DataType + 2, // 2: milvus.proto.storagev2.DataType.fixed_size_binary_type:type_name -> milvus.proto.storagev2.FixedSizeBinaryType + 3, // 3: milvus.proto.storagev2.DataType.fixed_size_list_type:type_name -> milvus.proto.storagev2.FixedSizeListType + 4, // 4: milvus.proto.storagev2.DataType.dictionary_type:type_name -> milvus.proto.storagev2.DictionaryType + 5, // 5: milvus.proto.storagev2.DataType.map_type:type_name -> milvus.proto.storagev2.MapType + 0, // 6: milvus.proto.storagev2.DataType.logic_type:type_name -> milvus.proto.storagev2.LogicType + 8, // 7: milvus.proto.storagev2.DataType.children:type_name -> milvus.proto.storagev2.Field + 6, // 8: milvus.proto.storagev2.Field.data_type:type_name -> milvus.proto.storagev2.DataType + 7, // 9: milvus.proto.storagev2.Field.metadata:type_name -> milvus.proto.storagev2.KeyValueMetadata + 8, // 10: milvus.proto.storagev2.ArrowSchema.fields:type_name -> milvus.proto.storagev2.Field + 1, // 11: milvus.proto.storagev2.ArrowSchema.endianness:type_name -> milvus.proto.storagev2.Endianness + 7, // 12: milvus.proto.storagev2.ArrowSchema.metadata:type_name -> milvus.proto.storagev2.KeyValueMetadata + 10, // 13: milvus.proto.storagev2.Schema.arrow_schema:type_name -> milvus.proto.storagev2.ArrowSchema + 9, // 14: milvus.proto.storagev2.Schema.schema_options:type_name -> milvus.proto.storagev2.SchemaOptions + 12, // 15: milvus.proto.storagev2.Manifest.options:type_name -> milvus.proto.storagev2.Options + 11, // 16: milvus.proto.storagev2.Manifest.schema:type_name -> milvus.proto.storagev2.Schema + 14, // 17: milvus.proto.storagev2.Manifest.scalar_fragments:type_name -> milvus.proto.storagev2.Fragment + 14, // 18: milvus.proto.storagev2.Manifest.vector_fragments:type_name -> milvus.proto.storagev2.Fragment + 14, // 19: milvus.proto.storagev2.Manifest.delete_fragments:type_name -> milvus.proto.storagev2.Fragment + 15, // 20: milvus.proto.storagev2.Manifest.blobs:type_name -> milvus.proto.storagev2.Blob + 21, // [21:21] is the sub-list for method output_type + 21, // [21:21] is the sub-list for method input_type + 21, // [21:21] is the sub-list for extension type_name + 21, // [21:21] is the sub-list for extension extendee + 0, // [0:21] is the sub-list for field type_name +} + +func init() { file_storagev2_proto_init() } +func file_storagev2_proto_init() { + if File_storagev2_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_storagev2_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*FixedSizeBinaryType); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_storagev2_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*FixedSizeListType); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_storagev2_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*DictionaryType); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_storagev2_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*MapType); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_storagev2_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*DataType); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_storagev2_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*KeyValueMetadata); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_storagev2_proto_msgTypes[6].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Field); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_storagev2_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*SchemaOptions); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_storagev2_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*ArrowSchema); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_storagev2_proto_msgTypes[9].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Schema); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_storagev2_proto_msgTypes[10].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Options); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_storagev2_proto_msgTypes[11].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Manifest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_storagev2_proto_msgTypes[12].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Fragment); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_storagev2_proto_msgTypes[13].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Blob); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + file_storagev2_proto_msgTypes[4].OneofWrappers = []interface{}{ + (*DataType_FixedSizeBinaryType)(nil), + (*DataType_FixedSizeListType)(nil), + (*DataType_DictionaryType)(nil), + (*DataType_MapType)(nil), + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_storagev2_proto_rawDesc, + NumEnums: 2, + NumMessages: 14, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_storagev2_proto_goTypes, + DependencyIndexes: file_storagev2_proto_depIdxs, + EnumInfos: file_storagev2_proto_enumTypes, + MessageInfos: file_storagev2_proto_msgTypes, + }.Build() + File_storagev2_proto = out.File + file_storagev2_proto_rawDesc = nil + file_storagev2_proto_goTypes = nil + file_storagev2_proto_depIdxs = nil +} diff --git a/scripts/core_build.sh b/scripts/core_build.sh index 7169ce05c7da6..fecdc3acdf2c5 100755 --- a/scripts/core_build.sh +++ b/scripts/core_build.sh @@ -101,6 +101,7 @@ USE_ASAN="OFF" USE_DYNAMIC_SIMD="ON" USE_OPENDAL="OFF" INDEX_ENGINE="KNOWHERE" +ENABLE_AZURE_FS="OFF" : "${ENABLE_GCP_NATIVE:="OFF"}" while getopts "p:d:t:s:f:n:i:y:a:x:o:ulrcghzmebZ" arg; do @@ -257,7 +258,8 @@ ${CMAKE_EXTRA_ARGS} \ -DCPU_ARCH=${CPU_ARCH} \ -DUSE_OPENDAL=${USE_OPENDAL} \ -DINDEX_ENGINE=${INDEX_ENGINE} \ --DENABLE_GCP_NATIVE=${ENABLE_GCP_NATIVE} " +-DENABLE_GCP_NATIVE=${ENABLE_GCP_NATIVE} \ +-DENABLE_AZURE_FS=${ENABLE_AZURE_FS} " if [ -z "$BUILD_WITHOUT_AZURE" ]; then CMAKE_CMD=${CMAKE_CMD}"-DAZURE_BUILD_DIR=${AZURE_BUILD_DIR} \ -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} " diff --git a/scripts/generate_proto.sh b/scripts/generate_proto.sh index c8851fac705ab..0263fd1796f81 100755 --- a/scripts/generate_proto.sh +++ b/scripts/generate_proto.sh @@ -62,10 +62,12 @@ mkdir -p ./planpb mkdir -p ./workerpb mkdir -p ./messagespb mkdir -p ./streamingpb +mkdir -p ./storagev2pb mkdir -p $ROOT_DIR/cmd/tools/migration/legacy/legacypb protoc_opt="${PROTOC_BIN} --proto_path=${API_PROTO_DIR} --proto_path=." +${protoc_opt} --go_out=paths=source_relative:./storagev2pb --go-grpc_out=require_unimplemented_servers=false,paths=source_relative:./storagev2pb storagev2.proto || { echo 'generate storagev2.proto failed'; exit 1; } ${protoc_opt} --go_out=paths=source_relative:./etcdpb --go-grpc_out=require_unimplemented_servers=false,paths=source_relative:./etcdpb etcd_meta.proto || { echo 'generate etcd_meta.proto failed'; exit 1; } ${protoc_opt} --go_out=paths=source_relative:./indexcgopb --go-grpc_out=require_unimplemented_servers=false,paths=source_relative:./indexcgopb index_cgo_msg.proto || { echo 'generate index_cgo_msg failed '; exit 1; } ${protoc_opt} --go_out=paths=source_relative:./cgopb --go-grpc_out=require_unimplemented_servers=false,paths=source_relative:./cgopb cgo_msg.proto || { echo 'generate cgo_msg failed '; exit 1; }