Skip to content

Commit

Permalink
Add C++ tests
Browse files Browse the repository at this point in the history
  • Loading branch information
EnricoMi committed Jan 31, 2025
1 parent 1cd49b8 commit f564f29
Showing 1 changed file with 127 additions and 3 deletions.
130 changes: 127 additions & 3 deletions cpp/src/arrow/dataset/file_parquet_encryption_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@
// under the License.

#include <string_view>
#include <arrow/array/builder_binary.h>
#include <arrow/array/builder_nested.h>
#include <arrow/array/builder_primitive.h>
#include <arrow/util/logging.h>
#include <boost/container/container_fwd.hpp>

#include "gtest/gtest.h"

Expand Down Expand Up @@ -43,7 +48,7 @@ constexpr std::string_view kFooterKeyMasterKeyId = "footer_key";
constexpr std::string_view kFooterKeyName = "footer_key";
constexpr std::string_view kColumnMasterKey = "1234567890123450";
constexpr std::string_view kColumnMasterKeyId = "col_key";
constexpr std::string_view kColumnKeyMapping = "col_key: a";
constexpr std::string_view kColumnName = "a";
constexpr std::string_view kBaseDir = "";

using arrow::internal::checked_pointer_cast;
Expand Down Expand Up @@ -90,7 +95,9 @@ class DatasetEncryptionTestBase : public ::testing::Test {
auto encryption_config =
std::make_shared<parquet::encryption::EncryptionConfiguration>(
std::string(kFooterKeyName));
encryption_config->column_keys = kColumnKeyMapping;
std::stringstream column_key;
column_key << kColumnMasterKeyId << ": " << ColumnKey();
encryption_config->column_keys = column_key.str();
auto parquet_encryption_config = std::make_shared<ParquetEncryptionConfig>();
// Directly assign shared_ptr objects to ParquetEncryptionConfig members
parquet_encryption_config->crypto_factory = crypto_factory_;
Expand Down Expand Up @@ -118,6 +125,7 @@ class DatasetEncryptionTestBase : public ::testing::Test {
}

virtual void PrepareTableAndPartitioning() = 0;
virtual std::string_view ColumnKey() { return kColumnName; };

void TestScanDataset() {
// Create decryption properties.
Expand Down Expand Up @@ -179,7 +187,7 @@ class DatasetEncryptionTest : public DatasetEncryptionTestBase {
// The dataset is partitioned using a Hive partitioning scheme.
void PrepareTableAndPartitioning() override {
// Prepare table data.
auto table_schema = schema({field("a", int64()), field("c", int64()),
auto table_schema = schema({field(std::string(kColumnName), int64()), field("c", int64()),
field("e", int64()), field("part", utf8())});
table_ = TableFromJSON(table_schema, {R"([
[ 0, 9, 1, "a" ],
Expand Down Expand Up @@ -240,6 +248,122 @@ TEST_F(DatasetEncryptionTest, ReadSingleFile) {
ASSERT_EQ(checked_pointer_cast<Int64Array>(table->column(2)->chunk(0))->GetView(0), 1);
}

class NestedFieldsEncryptionTest : public DatasetEncryptionTestBase, public ::testing::WithParamInterface<std::string> {
public:
NestedFieldsEncryptionTest() : rand_gen(0) { }

// The dataset is partitioned using a Hive partitioning scheme.
void PrepareTableAndPartitioning() override {
// Prepare table and partitioning.
auto table_schema = schema({field("a", std::move(column_type_))});
table_ = arrow::Table::Make(table_schema, {column_data_});
partitioning_ = std::make_shared<dataset::DirectoryPartitioning>(arrow::schema({}));
}

std::string_view ColumnKey() override {
return GetParam();
}

protected:
std::shared_ptr<DataType> column_type_;
std::shared_ptr<Array> column_data_;
arrow::random::RandomArrayGenerator rand_gen;
};

class ListFieldEncryptionTest : public NestedFieldsEncryptionTest {
public:
explicit ListFieldEncryptionTest() : NestedFieldsEncryptionTest() {
arrow::MemoryPool* pool = arrow::default_memory_pool();
auto value_builder = std::make_shared<arrow::Int32Builder>(pool);
arrow::ListBuilder list_builder = arrow::ListBuilder(pool, value_builder);
ARROW_CHECK_OK(list_builder.Append());
ARROW_CHECK_OK(value_builder->Append(1));
ARROW_CHECK_OK(value_builder->Append(2));
ARROW_CHECK_OK(value_builder->Append(3));
ARROW_CHECK_OK(list_builder.Append());
ARROW_CHECK_OK(value_builder->Append(4));
ARROW_CHECK_OK(value_builder->Append(5));
ARROW_CHECK_OK(list_builder.Append());
ARROW_CHECK_OK(value_builder->Append(6));

std::shared_ptr<arrow::Array> list_array;
arrow::Status status = list_builder.Finish(&list_array);

column_type_ = list(int32());
column_data_ = list_array;
}
};

class MapFieldEncryptionTest : public NestedFieldsEncryptionTest {
public:
explicit MapFieldEncryptionTest() : NestedFieldsEncryptionTest() {
arrow::MemoryPool* pool = arrow::default_memory_pool();
auto map_type = map(utf8(), int32());
auto key_builder = std::make_shared<arrow::StringBuilder>(pool);
auto item_builder = std::make_shared<arrow::Int32Builder>(pool);
auto map_builder = std::make_shared<arrow::MapBuilder>(pool, key_builder, item_builder, map_type);
ARROW_CHECK_OK(map_builder->Append());
ARROW_CHECK_OK(key_builder->Append("one"));
ARROW_CHECK_OK(item_builder->Append(1));
ARROW_CHECK_OK(map_builder->Append());
ARROW_CHECK_OK(key_builder->Append("two"));
ARROW_CHECK_OK(item_builder->Append(2));
ARROW_CHECK_OK(map_builder->Append());
ARROW_CHECK_OK(key_builder->Append("three"));
ARROW_CHECK_OK(item_builder->Append(3));

std::shared_ptr<arrow::Array> map_array;
ARROW_CHECK_OK(map_builder->Finish(&map_array));

column_type_ = map_type;
column_data_ = map_array;
}
};

class StructFieldEncryptionTest : public NestedFieldsEncryptionTest {
public:
explicit StructFieldEncryptionTest() : NestedFieldsEncryptionTest() {
arrow::MemoryPool* pool = arrow::default_memory_pool();
auto struct_type = struct_({field("f1", int32()), field("f2", utf8())});
auto f1_builder = std::make_shared<arrow::Int32Builder>(pool);
auto f2_builder = std::make_shared<arrow::StringBuilder>(pool);
std::vector<std::shared_ptr<ArrayBuilder>> value_builders = {f1_builder, f2_builder};
auto struct_builder = std::make_shared<arrow::StructBuilder>(std::move(struct_type), pool, value_builders);
ARROW_CHECK_OK(struct_builder->Append());
ARROW_CHECK_OK(f1_builder->Append(1));
ARROW_CHECK_OK(f2_builder->Append("one"));
ARROW_CHECK_OK(struct_builder->Append());
ARROW_CHECK_OK(f1_builder->Append(2));
ARROW_CHECK_OK(f2_builder->Append("two"));
ARROW_CHECK_OK(struct_builder->Append());
ARROW_CHECK_OK(f1_builder->Append(3));
ARROW_CHECK_OK(f2_builder->Append("three"));

std::shared_ptr<arrow::Array> struct_array;
ARROW_CHECK_OK(struct_builder->Finish(&struct_array));

column_type_ = struct_type;
column_data_ = struct_array;
}
};

// Test writing and reading encrypted nested fields
INSTANTIATE_TEST_SUITE_P(List, ListFieldEncryptionTest, ::testing::Values("a", "a.list.element"));
INSTANTIATE_TEST_SUITE_P(Map, MapFieldEncryptionTest, ::testing::Values("a", "a.key", "a.value", "a.key_value.key", "a.key_value.value"));
INSTANTIATE_TEST_SUITE_P(Struct, StructFieldEncryptionTest, ::testing::Values("a", "a.f1", "a.f2"));

TEST_P(ListFieldEncryptionTest, ColumnKeys) {
TestScanDataset();
}

TEST_P(MapFieldEncryptionTest, ColumnKeys) {
TestScanDataset();
}

TEST_P(StructFieldEncryptionTest, ColumnKeys) {
TestScanDataset();
}

// GH-39444: This test covers the case where parquet dataset scanner crashes when
// processing encrypted datasets over 2^15 rows in multi-threaded mode.
class LargeRowEncryptionTest : public DatasetEncryptionTestBase {
Expand Down

0 comments on commit f564f29

Please sign in to comment.