Skip to content

Commit

Permalink
Variant arrow extension type
Browse files Browse the repository at this point in the history
  • Loading branch information
neilechao committed Feb 25, 2025
1 parent 3e9c4f7 commit 5096d74
Show file tree
Hide file tree
Showing 6 changed files with 155 additions and 2 deletions.
1 change: 1 addition & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,7 @@ set(ARROW_SRCS
extension/bool8.cc
extension/json.cc
extension/uuid.cc
extension/variant.cc
pretty_print.cc
record_batch.cc
result.cc
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/extension/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.

set(CANONICAL_EXTENSION_TESTS bool8_test.cc json_test.cc uuid_test.cc)
set(CANONICAL_EXTENSION_TESTS bool8_test.cc json_test.cc uuid_test.cc variant_test.cc)

if(ARROW_JSON)
list(APPEND CANONICAL_EXTENSION_TESTS fixed_shape_tensor_test.cc opaque_test.cc)
Expand Down
60 changes: 60 additions & 0 deletions cpp/src/arrow/extension/variant.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "arrow/extension/variant.h"

#include <string>

#include "arrow/extension_type.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/type_fwd.h"
#include "arrow/util/logging.h"

namespace arrow::extension {

bool isBinary(Type::type type) {
return type == Type::BINARY || type == Type::LARGE_BINARY;
}

bool VariantExtensionType::IsSupportedStorageType(
std::shared_ptr<DataType> storage_type) {
if (storage_type->id() == Type::STRUCT) {
// TODO(neilechao) assertions for binary types, and non-nullable first field for
// metadata
return storage_type->num_fields() == 3;
}

return false;
}

Result<std::shared_ptr<DataType>> VariantExtensionType::Make(
std::shared_ptr<DataType> storage_type) {
if (!IsSupportedStorageType(storage_type)) {
return Status::Invalid(
"Invalid storage type for VariantExtensionType, must be struct with binary "
"metadata, value, and typed_value fields: ",
storage_type->ToString());
}
return std::make_shared<VariantExtensionType>(std::move(storage_type));
}

std::shared_ptr<DataType> variant(std::shared_ptr<DataType> storage_type) {
return VariantExtensionType::Make(std::move(storage_type)).ValueOrDie();
}

} // namespace arrow::extension
58 changes: 58 additions & 0 deletions cpp/src/arrow/extension/variant.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <stdexcept>
#include <string>

#include "arrow/extension_type.h"
#include "arrow/result.h"
#include "arrow/type_fwd.h"
#include "arrow/util/visibility.h"

namespace arrow::extension {

class ARROW_EXPORT VariantExtensionType : public ExtensionType {
public:
explicit VariantExtensionType(const std::shared_ptr<DataType>& storage_type)
: ExtensionType(storage_type), storage_type_(storage_type) {}

std::string extension_name() const override { return "variant.json"; }

bool ExtensionEquals(const ExtensionType& other) const override;

Result<std::shared_ptr<DataType>> Deserialize(
std::shared_ptr<DataType> storage_type,
const std::string& serialized_data) const override;

std::string Serialize() const override;

std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;

static Result<std::shared_ptr<DataType>> Make(std::shared_ptr<DataType> storage_type);

static bool IsSupportedStorageType(std::shared_ptr<DataType> storage_type);

private:
std::shared_ptr<DataType> storage_type_;
};

/// \brief Return a VariantExtensionType instance.
ARROW_EXPORT std::shared_ptr<DataType> variant(std::shared_ptr<DataType> storage_type);

} // namespace arrow::extension
34 changes: 34 additions & 0 deletions cpp/src/arrow/extension/variant_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "arrow/extension/variant.h"

#include "arrow/array/validate.h"
#include "arrow/ipc/test_common.h"
#include "arrow/record_batch.h"
#include "arrow/testing/gtest_util.h"
#include "parquet/exception.h"

namespace arrow {

using arrow::ipc::test::RoundtripBatch;

class TestVariantExtensionType : public ::testing::Test {};

TEST_F(TestVariantExtensionType, VariantRoundtrip) { ASSERT_TRUE(false); }

} // namespace arrow
2 changes: 1 addition & 1 deletion cpp/src/parquet/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ class PARQUET_EXPORT Float16LogicalType : public LogicalType {
Float16LogicalType() = default;
};

/// \brief Allowed for physical type BYTE_ARRAY.
/// \brief Allowed for group nodes only.
class PARQUET_EXPORT VariantLogicalType : public LogicalType {
public:
static std::shared_ptr<const LogicalType> Make();
Expand Down

0 comments on commit 5096d74

Please sign in to comment.