From af82dccaba1f392e1ce3141494fff6e703193b6c Mon Sep 17 00:00:00 2001 From: Julien Phalip Date: Thu, 29 Dec 2022 11:12:52 -0600 Subject: [PATCH] Carry Hive's table and column comments over to the corresponding BQ descriptions --- .../bigquery/connector/BigQueryMetaHook.java | 5 +- .../utils/bq/BigQuerySchemaConverter.java | 8 +- .../connector/IntegrationTestsBase.java | 27 +++- ...dAndExternalHiveTableIntegrationTests.java | 12 +- .../connector/PartitionIntegrationTests.java | 15 ++- .../hive/bigquery/connector/TestUtils.java | 120 ++++++------------ 6 files changed, 85 insertions(+), 102 deletions(-) diff --git a/connector/src/main/java/com/google/cloud/hive/bigquery/connector/BigQueryMetaHook.java b/connector/src/main/java/com/google/cloud/hive/bigquery/connector/BigQueryMetaHook.java index 895d29c1..1a079ba6 100644 --- a/connector/src/main/java/com/google/cloud/hive/bigquery/connector/BigQueryMetaHook.java +++ b/connector/src/main/java/com/google/cloud/hive/bigquery/connector/BigQueryMetaHook.java @@ -235,7 +235,10 @@ public void preCreateTable(Table table) throws MetaException { } StandardTableDefinition tableDefinition = tableDefBuilder.build(); - createTableInfo = TableInfo.newBuilder(opts.getTableId(), tableDefinition).build(); + createTableInfo = + TableInfo.newBuilder(opts.getTableId(), tableDefinition) + .setDescription(table.getParameters().get("comment")) + .build(); } table diff --git a/connector/src/main/java/com/google/cloud/hive/bigquery/connector/utils/bq/BigQuerySchemaConverter.java b/connector/src/main/java/com/google/cloud/hive/bigquery/connector/utils/bq/BigQuerySchemaConverter.java index f9469b3c..405664ae 100644 --- a/connector/src/main/java/com/google/cloud/hive/bigquery/connector/utils/bq/BigQuerySchemaConverter.java +++ b/connector/src/main/java/com/google/cloud/hive/bigquery/connector/utils/bq/BigQuerySchemaConverter.java @@ -64,12 +64,12 @@ public static Schema toBigQuerySchema(StorageDescriptor sd) { List bigQueryFields = new ArrayList<>(); for (FieldSchema hiveField : sd.getCols()) { TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(hiveField.getType()); - bigQueryFields.add(buildBigQueryField(hiveField.getName(), typeInfo)); + bigQueryFields.add(buildBigQueryField(hiveField.getName(), typeInfo, hiveField.getComment())); } return Schema.of(bigQueryFields); } - private static Field buildBigQueryField(String fieldName, TypeInfo typeInfo) { + private static Field buildBigQueryField(String fieldName, TypeInfo typeInfo, String comment) { Field.Builder bigQueryFieldBuilder; Field.Mode mode = null; @@ -101,7 +101,8 @@ private static Field buildBigQueryField(String fieldName, TypeInfo typeInfo) { List subFieldNames = ((StructTypeInfo) typeInfo).getAllStructFieldNames(); List bigQuerySubFields = new ArrayList<>(); for (int i = 0; i < subFieldNames.size(); i++) { - Field bigQuerySubField = buildBigQueryField(subFieldNames.get(i), subFieldTypeInfos.get(i)); + Field bigQuerySubField = + buildBigQueryField(subFieldNames.get(i), subFieldTypeInfos.get(i), null); bigQuerySubFields.add(bigQuerySubField); } bigQueryFieldBuilder = @@ -112,6 +113,7 @@ private static Field buildBigQueryField(String fieldName, TypeInfo typeInfo) { } bigQueryFieldBuilder.setMode(mode); + bigQueryFieldBuilder.setDescription(comment); return bigQueryFieldBuilder.build(); } diff --git a/connector/src/test/java/com/google/cloud/hive/bigquery/connector/IntegrationTestsBase.java b/connector/src/test/java/com/google/cloud/hive/bigquery/connector/IntegrationTestsBase.java index 3a93f052..fdb1eed0 100644 --- a/connector/src/test/java/com/google/cloud/hive/bigquery/connector/IntegrationTestsBase.java +++ b/connector/src/test/java/com/google/cloud/hive/bigquery/connector/IntegrationTestsBase.java @@ -110,24 +110,25 @@ public String renderQueryTemplate(String queryTemplate) { } public void createHiveTable( - String tableName, String hiveDDL, boolean isExternal, String hiveProps) { + String tableName, String hiveDDL, boolean isExternal, String properties, String comment) { runHiveScript( String.join( "\n", "CREATE " + (isExternal ? "EXTERNAL" : "") + " TABLE " + tableName + " (", hiveDDL, ")", + comment != null ? "COMMENT \"" + comment + "\"" : "", "STORED BY" + " 'com.google.cloud.hive.bigquery.connector.BigQueryStorageHandler'", "TBLPROPERTIES (", " 'bq.project'='${project}',", " 'bq.dataset'='${dataset}',", " 'bq.table'='" + tableName + "'", - (hiveProps != null ? "," + hiveProps : ""), + properties != null ? "," + properties : "", ");")); } public void createExternalTable(String tableName, String hiveDDL) { - createHiveTable(tableName, hiveDDL, true, null); + createHiveTable(tableName, hiveDDL, true, null, null); } public void createExternalTable(String tableName, String hiveDDL, String bqDDL) { @@ -136,15 +137,27 @@ public void createExternalTable(String tableName, String hiveDDL, String bqDDL) } public void createManagedTable(String tableName, String hiveDDL) { - createHiveTable(tableName, hiveDDL, false, null); + createHiveTable(tableName, hiveDDL, false, null, null); } - public void createManagedTableWithProps(String tableName, String hiveDDL, String hiveProps) { - createHiveTable(tableName, hiveDDL, false, hiveProps); + public void createManagedTable( + String tableName, String hiveDDL, String properties, String comment) { + createHiveTable(tableName, hiveDDL, false, properties, comment); } public void createBqTable(String tableName, String bqDDL) { - runBqQuery("CREATE OR REPLACE TABLE ${dataset}." + tableName + " (" + bqDDL + ")"); + createBqTable(tableName, bqDDL, null); + } + + public void createBqTable(String tableName, String bqDDL, String description) { + runBqQuery( + String.join( + "\n", + "CREATE OR REPLACE TABLE ${dataset}." + tableName, + "(", + bqDDL, + ")", + description != null ? "OPTIONS ( description=\"" + description + "\" )" : "")); } public TableResult runBqQuery(String queryTemplate) { diff --git a/connector/src/test/java/com/google/cloud/hive/bigquery/connector/ManagedAndExternalHiveTableIntegrationTests.java b/connector/src/test/java/com/google/cloud/hive/bigquery/connector/ManagedAndExternalHiveTableIntegrationTests.java index 658adab4..04a41204 100644 --- a/connector/src/test/java/com/google/cloud/hive/bigquery/connector/ManagedAndExternalHiveTableIntegrationTests.java +++ b/connector/src/test/java/com/google/cloud/hive/bigquery/connector/ManagedAndExternalHiveTableIntegrationTests.java @@ -32,13 +32,15 @@ public void testCreateManagedTable() { dropBqTableIfExists(dataset, MANAGED_TEST_TABLE_NAME); assertFalse(bQTableExists(dataset, MANAGED_TEST_TABLE_NAME)); // Create the managed table using Hive - createManagedTable(MANAGED_TEST_TABLE_NAME, HIVE_MANAGED_TEST_TABLE_DDL); + createManagedTable( + MANAGED_TEST_TABLE_NAME, HIVE_ALL_TYPES_TABLE_DDL, null, "A table with lots of types"); // Create another BQ table with the same schema - createBqTable(ALL_TYPES_TABLE_NAME, BIGQUERY_ALL_TYPES_TABLE_DDL); + createBqTable(ALL_TYPES_TABLE_NAME, BIGQUERY_ALL_TYPES_TABLE_DDL, "A table with lots of types"); // Make sure that the managed table was created in BQ // and that the two schemas are the same TableInfo managedTableInfo = getTableInfo(dataset, MANAGED_TEST_TABLE_NAME); TableInfo allTypesTableInfo = getTableInfo(dataset, ALL_TYPES_TABLE_NAME); + assertEquals(managedTableInfo.getDescription(), allTypesTableInfo.getDescription()); assertEquals( managedTableInfo.getDefinition().getSchema(), allTypesTableInfo.getDefinition().getSchema()); @@ -51,12 +53,12 @@ public void testCreateManagedTable() { public void testCreateManagedTableAlreadyExists() { initHive(); // Create the table in BigQuery - createBqTable(MANAGED_TEST_TABLE_NAME, BIGQUERY_MANAGED_TEST_TABLE_DDL); + createBqTable(MANAGED_TEST_TABLE_NAME, BIGQUERY_ALL_TYPES_TABLE_DDL); // Try to create the managed table using Hive Throwable exception = assertThrows( IllegalArgumentException.class, - () -> createManagedTable(MANAGED_TEST_TABLE_NAME, HIVE_MANAGED_TEST_TABLE_DDL)); + () -> createManagedTable(MANAGED_TEST_TABLE_NAME, HIVE_ALL_TYPES_TABLE_DDL)); assertTrue(exception.getMessage().contains("BigQuery table already exists")); } @@ -69,7 +71,7 @@ public void testDropManagedTable() { dropBqTableIfExists(dataset, MANAGED_TEST_TABLE_NAME); assertFalse(bQTableExists(dataset, MANAGED_TEST_TABLE_NAME)); // Create the managed table using Hive - createManagedTable(MANAGED_TEST_TABLE_NAME, HIVE_MANAGED_TEST_TABLE_DDL); + createManagedTable(MANAGED_TEST_TABLE_NAME, HIVE_ALL_TYPES_TABLE_DDL); // Check that the table was created in BigQuery assertTrue(bQTableExists(dataset, MANAGED_TEST_TABLE_NAME)); // Drop the managed table using hive diff --git a/connector/src/test/java/com/google/cloud/hive/bigquery/connector/PartitionIntegrationTests.java b/connector/src/test/java/com/google/cloud/hive/bigquery/connector/PartitionIntegrationTests.java index 50c07834..354a9651 100644 --- a/connector/src/test/java/com/google/cloud/hive/bigquery/connector/PartitionIntegrationTests.java +++ b/connector/src/test/java/com/google/cloud/hive/bigquery/connector/PartitionIntegrationTests.java @@ -34,10 +34,11 @@ public void testFieldTimePartition() { // Make sure the BQ table doesn't exist dropBqTableIfExists(dataset, FIELD_TIME_PARTITIONED_TABLE_NAME); // Create the table using Hive - createManagedTableWithProps( + createManagedTable( FIELD_TIME_PARTITIONED_TABLE_NAME, HIVE_FIELD_TIME_PARTITIONED_TABLE_DDL, - HIVE_FIELD_TIME_PARTITIONED_TABLE_PROPS); + HIVE_FIELD_TIME_PARTITIONED_TABLE_PROPS, + null); // Verify that the BQ table has the right partition & clustering options StandardTableDefinition tableDef = getTableInfo(dataset, FIELD_TIME_PARTITIONED_TABLE_NAME).getDefinition(); @@ -55,10 +56,11 @@ public void testCreateIngestionTimePartition() { // Make sure the BQ table doesn't exist dropBqTableIfExists(dataset, INGESTION_TIME_PARTITIONED_TABLE_NAME); // Create the table using Hive - createManagedTableWithProps( + createManagedTable( INGESTION_TIME_PARTITIONED_TABLE_NAME, HIVE_INGESTION_TIME_PARTITIONED_DDL, - HIVE_INGESTION_TIME_PARTITIONED_PROPS); + HIVE_INGESTION_TIME_PARTITIONED_PROPS, + null); // Retrieve the table metadata from BigQuery StandardTableDefinition tableDef = getTableInfo(dataset, INGESTION_TIME_PARTITIONED_TABLE_NAME).getDefinition(); @@ -86,10 +88,11 @@ public void testQueryIngestionTimePartition() { // Make sure the BQ table doesn't exist dropBqTableIfExists(dataset, INGESTION_TIME_PARTITIONED_TABLE_NAME); // Create the table using Hive - createManagedTableWithProps( + createManagedTable( INGESTION_TIME_PARTITIONED_TABLE_NAME, HIVE_INGESTION_TIME_PARTITIONED_DDL, - HIVE_INGESTION_TIME_PARTITIONED_PROPS); + HIVE_INGESTION_TIME_PARTITIONED_PROPS, + null); runHiveScript( String.format( "SELECT * from %s WHERE `_PARTITIONTIME` > TIMESTAMP'2018-09-05 00:10:04.19'", diff --git a/connector/src/test/java/com/google/cloud/hive/bigquery/connector/TestUtils.java b/connector/src/test/java/com/google/cloud/hive/bigquery/connector/TestUtils.java index 4e52f1f8..957fa689 100644 --- a/connector/src/test/java/com/google/cloud/hive/bigquery/connector/TestUtils.java +++ b/connector/src/test/java/com/google/cloud/hive/bigquery/connector/TestUtils.java @@ -61,24 +61,28 @@ public class TestUtils { public static String BIGQUERY_ALL_TYPES_TABLE_DDL = String.join( "\n", - "tiny_int_val INT64,", - "small_int_val INT64,", - "int_val INT64,", - "big_int_val INT64,", - "bl BOOL,", - "fixed_char STRING,", - "var_char STRING,", - "str STRING,", - "day DATE,", - "ts TIMESTAMP,", - "bin BYTES,", - "fl FLOAT64,", - "dbl FLOAT64,", - "nums STRUCT,", - "int_arr ARRAY,", - "int_struct_arr ARRAY>,", - "float_struct STRUCT,", - "mp ARRAY>>>"); + "tiny_int_val INT64 OPTIONS (description = 'A description for a TINYINT'),", + "small_int_val INT64 OPTIONS (description = 'A description for a SMALLINT'),", + "int_val INT64 OPTIONS (description = 'A description for a INT'),", + "big_int_val INT64 OPTIONS (description = 'A description for a BIGINT'),", + "bl BOOL OPTIONS (description = 'A description for a BOOLEAN'),", + "fixed_char STRING OPTIONS (description = 'A description for a CHAR'),", + "var_char STRING OPTIONS (description = 'A description for a VARCHAR'),", + "str STRING OPTIONS (description = 'A description for a STRING'),", + "day DATE OPTIONS (description = 'A description for a DATE'),", + "ts TIMESTAMP OPTIONS (description = 'A description for a TIMESTAMP'),", + "bin BYTES OPTIONS (description = 'A description for a BINARY'),", + "fl FLOAT64 OPTIONS (description = 'A description for a FLOAT'),", + "dbl FLOAT64 OPTIONS (description = 'A description for a DOUBLE'),", + "nums STRUCT OPTIONS (description" + + " = 'A description for a STRUCT'),", + "int_arr ARRAY OPTIONS (description = 'A description for a ARRAY-BIGINT'),", + "int_struct_arr ARRAY> OPTIONS (description = 'A description for a" + + " ARRAY-STRUCT'),", + "float_struct STRUCT OPTIONS (description = 'A description for a" + + " STRUCT-FLOAT'),", + "mp ARRAY>>> OPTIONS" + + " (description = 'A description for a MAP')"); public static String BIGQUERY_BIGLAKE_TABLE_CREATE_QUERY = String.join( @@ -90,27 +94,6 @@ public class TestUtils { "uris = ['gs://" + getBigLakeBucket() + "/test.csv']", ")"); - public static String BIGQUERY_MANAGED_TEST_TABLE_DDL = - String.join( - "\n", - "tiny_int_val INT64,", - "small_int_val INT64,", - "int_val INT64,", - "big_int_val INT64,", - "bl BOOL,", - "fixed_char STRING,", - "var_char STRING,", - "str STRING,", - "day DATE,", - "ts TIMESTAMP,", - "bin BYTES,", - "fl FLOAT64,", - "dbl FLOAT64,", - "nums STRUCT,", - "int_arr ARRAY,", - "int_struct_arr ARRAY>,", - "mp ARRAY>>>"); - public static String HIVE_TEST_TABLE_DDL = String.join("\n", "number BIGINT,", "text STRING"); public static String HIVE_TEST_VIEW_DDL = String.join("\n", "number BIGINT,", "text STRING"); @@ -124,48 +107,25 @@ public class TestUtils { public static String HIVE_ALL_TYPES_TABLE_DDL = String.join( "\n", - "tiny_int_val TINYINT,", - "small_int_val SMALLINT,", - "int_val INT,", - "big_int_val BIGINT,", - "bl BOOLEAN,", - "fixed_char CHAR(10),", - "var_char VARCHAR(10),", - "str STRING,", - "day DATE,", - "ts TIMESTAMP,", - "bin BINARY,", - "fl FLOAT,", - "dbl DOUBLE,", - "nums STRUCT,", - "int_arr ARRAY,", - "int_struct_arr ARRAY>,", - "float_struct STRUCT,", - "mp MAP>"); - - public static String HIVE_MANAGED_TEST_TABLE_DDL = - String.join( - "\n", - "tiny_int_val TINYINT,", - "small_int_val SMALLINT,", - "int_val INT,", - "big_int_val BIGINT,", - "bl BOOLEAN,", - "fixed_char CHAR(10),", - "var_char VARCHAR(10),", - "str STRING,", - "day DATE,", - "ts TIMESTAMP,", - "bin BINARY,", - "fl FLOAT,", - "dbl DOUBLE,", + "tiny_int_val TINYINT COMMENT 'A description for a TINYINT',", + "small_int_val SMALLINT COMMENT 'A description for a SMALLINT',", + "int_val INT COMMENT 'A description for a INT',", + "big_int_val BIGINT COMMENT 'A description for a BIGINT',", + "bl BOOLEAN COMMENT 'A description for a BOOLEAN',", + "fixed_char CHAR(10) COMMENT 'A description for a CHAR',", + "var_char VARCHAR(10) COMMENT 'A description for a VARCHAR',", + "str STRING COMMENT 'A description for a STRING',", + "day DATE COMMENT 'A description for a DATE',", + "ts TIMESTAMP COMMENT 'A description for a TIMESTAMP',", + "bin BINARY COMMENT 'A description for a BINARY',", + "fl FLOAT COMMENT 'A description for a FLOAT',", + "dbl DOUBLE COMMENT 'A description for a DOUBLE',", "nums STRUCT,", - "int_arr ARRAY,", - "int_struct_arr ARRAY>,", - "float_struct STRUCT,", - "mp MAP>"); + + " DECIMAL(38,9), big_pi: DECIMAL(38,9)> COMMENT 'A description for a STRUCT',", + "int_arr ARRAY COMMENT 'A description for a ARRAY-BIGINT',", + "int_struct_arr ARRAY> COMMENT 'A description for a ARRAY-STRUCT',", + "float_struct STRUCT COMMENT 'A description for a STRUCT-FLOAT',", + "mp MAP> COMMENT 'A description for a MAP'"); public static String HIVE_FIELD_TIME_PARTITIONED_TABLE_DDL = String.join("\n", "int_val BIGINT,", "ts TIMESTAMP");