Skip to content

Commit

Permalink
Carry Hive's table and column comments over to the corresponding BQ d…
Browse files Browse the repository at this point in the history
…escriptions
  • Loading branch information
jphalip committed Dec 29, 2022
1 parent 8d3cc1b commit af82dcc
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 102 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,10 @@ public void preCreateTable(Table table) throws MetaException {
}

StandardTableDefinition tableDefinition = tableDefBuilder.build();
createTableInfo = TableInfo.newBuilder(opts.getTableId(), tableDefinition).build();
createTableInfo =
TableInfo.newBuilder(opts.getTableId(), tableDefinition)
.setDescription(table.getParameters().get("comment"))
.build();
}

table
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,12 @@ public static Schema toBigQuerySchema(StorageDescriptor sd) {
List<Field> bigQueryFields = new ArrayList<>();
for (FieldSchema hiveField : sd.getCols()) {
TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(hiveField.getType());
bigQueryFields.add(buildBigQueryField(hiveField.getName(), typeInfo));
bigQueryFields.add(buildBigQueryField(hiveField.getName(), typeInfo, hiveField.getComment()));
}
return Schema.of(bigQueryFields);
}

private static Field buildBigQueryField(String fieldName, TypeInfo typeInfo) {
private static Field buildBigQueryField(String fieldName, TypeInfo typeInfo, String comment) {
Field.Builder bigQueryFieldBuilder;

Field.Mode mode = null;
Expand Down Expand Up @@ -101,7 +101,8 @@ private static Field buildBigQueryField(String fieldName, TypeInfo typeInfo) {
List<String> subFieldNames = ((StructTypeInfo) typeInfo).getAllStructFieldNames();
List<Field> bigQuerySubFields = new ArrayList<>();
for (int i = 0; i < subFieldNames.size(); i++) {
Field bigQuerySubField = buildBigQueryField(subFieldNames.get(i), subFieldTypeInfos.get(i));
Field bigQuerySubField =
buildBigQueryField(subFieldNames.get(i), subFieldTypeInfos.get(i), null);
bigQuerySubFields.add(bigQuerySubField);
}
bigQueryFieldBuilder =
Expand All @@ -112,6 +113,7 @@ private static Field buildBigQueryField(String fieldName, TypeInfo typeInfo) {
}

bigQueryFieldBuilder.setMode(mode);
bigQueryFieldBuilder.setDescription(comment);
return bigQueryFieldBuilder.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,24 +110,25 @@ public String renderQueryTemplate(String queryTemplate) {
}

public void createHiveTable(
String tableName, String hiveDDL, boolean isExternal, String hiveProps) {
String tableName, String hiveDDL, boolean isExternal, String properties, String comment) {
runHiveScript(
String.join(
"\n",
"CREATE " + (isExternal ? "EXTERNAL" : "") + " TABLE " + tableName + " (",
hiveDDL,
")",
comment != null ? "COMMENT \"" + comment + "\"" : "",
"STORED BY" + " 'com.google.cloud.hive.bigquery.connector.BigQueryStorageHandler'",
"TBLPROPERTIES (",
" 'bq.project'='${project}',",
" 'bq.dataset'='${dataset}',",
" 'bq.table'='" + tableName + "'",
(hiveProps != null ? "," + hiveProps : ""),
properties != null ? "," + properties : "",
");"));
}

public void createExternalTable(String tableName, String hiveDDL) {
createHiveTable(tableName, hiveDDL, true, null);
createHiveTable(tableName, hiveDDL, true, null, null);
}

public void createExternalTable(String tableName, String hiveDDL, String bqDDL) {
Expand All @@ -136,15 +137,27 @@ public void createExternalTable(String tableName, String hiveDDL, String bqDDL)
}

public void createManagedTable(String tableName, String hiveDDL) {
createHiveTable(tableName, hiveDDL, false, null);
createHiveTable(tableName, hiveDDL, false, null, null);
}

public void createManagedTableWithProps(String tableName, String hiveDDL, String hiveProps) {
createHiveTable(tableName, hiveDDL, false, hiveProps);
public void createManagedTable(
String tableName, String hiveDDL, String properties, String comment) {
createHiveTable(tableName, hiveDDL, false, properties, comment);
}

public void createBqTable(String tableName, String bqDDL) {
runBqQuery("CREATE OR REPLACE TABLE ${dataset}." + tableName + " (" + bqDDL + ")");
createBqTable(tableName, bqDDL, null);
}

public void createBqTable(String tableName, String bqDDL, String description) {
runBqQuery(
String.join(
"\n",
"CREATE OR REPLACE TABLE ${dataset}." + tableName,
"(",
bqDDL,
")",
description != null ? "OPTIONS ( description=\"" + description + "\" )" : ""));
}

public TableResult runBqQuery(String queryTemplate) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,15 @@ public void testCreateManagedTable() {
dropBqTableIfExists(dataset, MANAGED_TEST_TABLE_NAME);
assertFalse(bQTableExists(dataset, MANAGED_TEST_TABLE_NAME));
// Create the managed table using Hive
createManagedTable(MANAGED_TEST_TABLE_NAME, HIVE_MANAGED_TEST_TABLE_DDL);
createManagedTable(
MANAGED_TEST_TABLE_NAME, HIVE_ALL_TYPES_TABLE_DDL, null, "A table with lots of types");
// Create another BQ table with the same schema
createBqTable(ALL_TYPES_TABLE_NAME, BIGQUERY_ALL_TYPES_TABLE_DDL);
createBqTable(ALL_TYPES_TABLE_NAME, BIGQUERY_ALL_TYPES_TABLE_DDL, "A table with lots of types");
// Make sure that the managed table was created in BQ
// and that the two schemas are the same
TableInfo managedTableInfo = getTableInfo(dataset, MANAGED_TEST_TABLE_NAME);
TableInfo allTypesTableInfo = getTableInfo(dataset, ALL_TYPES_TABLE_NAME);
assertEquals(managedTableInfo.getDescription(), allTypesTableInfo.getDescription());
assertEquals(
managedTableInfo.getDefinition().getSchema(),
allTypesTableInfo.getDefinition().getSchema());
Expand All @@ -51,12 +53,12 @@ public void testCreateManagedTable() {
public void testCreateManagedTableAlreadyExists() {
initHive();
// Create the table in BigQuery
createBqTable(MANAGED_TEST_TABLE_NAME, BIGQUERY_MANAGED_TEST_TABLE_DDL);
createBqTable(MANAGED_TEST_TABLE_NAME, BIGQUERY_ALL_TYPES_TABLE_DDL);
// Try to create the managed table using Hive
Throwable exception =
assertThrows(
IllegalArgumentException.class,
() -> createManagedTable(MANAGED_TEST_TABLE_NAME, HIVE_MANAGED_TEST_TABLE_DDL));
() -> createManagedTable(MANAGED_TEST_TABLE_NAME, HIVE_ALL_TYPES_TABLE_DDL));
assertTrue(exception.getMessage().contains("BigQuery table already exists"));
}

Expand All @@ -69,7 +71,7 @@ public void testDropManagedTable() {
dropBqTableIfExists(dataset, MANAGED_TEST_TABLE_NAME);
assertFalse(bQTableExists(dataset, MANAGED_TEST_TABLE_NAME));
// Create the managed table using Hive
createManagedTable(MANAGED_TEST_TABLE_NAME, HIVE_MANAGED_TEST_TABLE_DDL);
createManagedTable(MANAGED_TEST_TABLE_NAME, HIVE_ALL_TYPES_TABLE_DDL);
// Check that the table was created in BigQuery
assertTrue(bQTableExists(dataset, MANAGED_TEST_TABLE_NAME));
// Drop the managed table using hive
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,11 @@ public void testFieldTimePartition() {
// Make sure the BQ table doesn't exist
dropBqTableIfExists(dataset, FIELD_TIME_PARTITIONED_TABLE_NAME);
// Create the table using Hive
createManagedTableWithProps(
createManagedTable(
FIELD_TIME_PARTITIONED_TABLE_NAME,
HIVE_FIELD_TIME_PARTITIONED_TABLE_DDL,
HIVE_FIELD_TIME_PARTITIONED_TABLE_PROPS);
HIVE_FIELD_TIME_PARTITIONED_TABLE_PROPS,
null);
// Verify that the BQ table has the right partition & clustering options
StandardTableDefinition tableDef =
getTableInfo(dataset, FIELD_TIME_PARTITIONED_TABLE_NAME).getDefinition();
Expand All @@ -55,10 +56,11 @@ public void testCreateIngestionTimePartition() {
// Make sure the BQ table doesn't exist
dropBqTableIfExists(dataset, INGESTION_TIME_PARTITIONED_TABLE_NAME);
// Create the table using Hive
createManagedTableWithProps(
createManagedTable(
INGESTION_TIME_PARTITIONED_TABLE_NAME,
HIVE_INGESTION_TIME_PARTITIONED_DDL,
HIVE_INGESTION_TIME_PARTITIONED_PROPS);
HIVE_INGESTION_TIME_PARTITIONED_PROPS,
null);
// Retrieve the table metadata from BigQuery
StandardTableDefinition tableDef =
getTableInfo(dataset, INGESTION_TIME_PARTITIONED_TABLE_NAME).getDefinition();
Expand Down Expand Up @@ -86,10 +88,11 @@ public void testQueryIngestionTimePartition() {
// Make sure the BQ table doesn't exist
dropBqTableIfExists(dataset, INGESTION_TIME_PARTITIONED_TABLE_NAME);
// Create the table using Hive
createManagedTableWithProps(
createManagedTable(
INGESTION_TIME_PARTITIONED_TABLE_NAME,
HIVE_INGESTION_TIME_PARTITIONED_DDL,
HIVE_INGESTION_TIME_PARTITIONED_PROPS);
HIVE_INGESTION_TIME_PARTITIONED_PROPS,
null);
runHiveScript(
String.format(
"SELECT * from %s WHERE `_PARTITIONTIME` > TIMESTAMP'2018-09-05 00:10:04.19'",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,24 +61,28 @@ public class TestUtils {
public static String BIGQUERY_ALL_TYPES_TABLE_DDL =
String.join(
"\n",
"tiny_int_val INT64,",
"small_int_val INT64,",
"int_val INT64,",
"big_int_val INT64,",
"bl BOOL,",
"fixed_char STRING,",
"var_char STRING,",
"str STRING,",
"day DATE,",
"ts TIMESTAMP,",
"bin BYTES,",
"fl FLOAT64,",
"dbl FLOAT64,",
"nums STRUCT<min NUMERIC, max NUMERIC, pi NUMERIC, big_pi NUMERIC>,",
"int_arr ARRAY<int64>,",
"int_struct_arr ARRAY<STRUCT<i INT64>>,",
"float_struct STRUCT<float_field FLOAT64>,",
"mp ARRAY<STRUCT<key STRING, value ARRAY<STRUCT<key STRING, value INT64>>>>");
"tiny_int_val INT64 OPTIONS (description = 'A description for a TINYINT'),",
"small_int_val INT64 OPTIONS (description = 'A description for a SMALLINT'),",
"int_val INT64 OPTIONS (description = 'A description for a INT'),",
"big_int_val INT64 OPTIONS (description = 'A description for a BIGINT'),",
"bl BOOL OPTIONS (description = 'A description for a BOOLEAN'),",
"fixed_char STRING OPTIONS (description = 'A description for a CHAR'),",
"var_char STRING OPTIONS (description = 'A description for a VARCHAR'),",
"str STRING OPTIONS (description = 'A description for a STRING'),",
"day DATE OPTIONS (description = 'A description for a DATE'),",
"ts TIMESTAMP OPTIONS (description = 'A description for a TIMESTAMP'),",
"bin BYTES OPTIONS (description = 'A description for a BINARY'),",
"fl FLOAT64 OPTIONS (description = 'A description for a FLOAT'),",
"dbl FLOAT64 OPTIONS (description = 'A description for a DOUBLE'),",
"nums STRUCT<min NUMERIC, max NUMERIC, pi NUMERIC, big_pi NUMERIC> OPTIONS (description"
+ " = 'A description for a STRUCT'),",
"int_arr ARRAY<int64> OPTIONS (description = 'A description for a ARRAY-BIGINT'),",
"int_struct_arr ARRAY<STRUCT<i INT64>> OPTIONS (description = 'A description for a"
+ " ARRAY-STRUCT'),",
"float_struct STRUCT<float_field FLOAT64> OPTIONS (description = 'A description for a"
+ " STRUCT-FLOAT'),",
"mp ARRAY<STRUCT<key STRING, value ARRAY<STRUCT<key STRING, value INT64>>>> OPTIONS"
+ " (description = 'A description for a MAP')");

public static String BIGQUERY_BIGLAKE_TABLE_CREATE_QUERY =
String.join(
Expand All @@ -90,27 +94,6 @@ public class TestUtils {
"uris = ['gs://" + getBigLakeBucket() + "/test.csv']",
")");

public static String BIGQUERY_MANAGED_TEST_TABLE_DDL =
String.join(
"\n",
"tiny_int_val INT64,",
"small_int_val INT64,",
"int_val INT64,",
"big_int_val INT64,",
"bl BOOL,",
"fixed_char STRING,",
"var_char STRING,",
"str STRING,",
"day DATE,",
"ts TIMESTAMP,",
"bin BYTES,",
"fl FLOAT64,",
"dbl FLOAT64,",
"nums STRUCT<min NUMERIC, max NUMERIC, pi NUMERIC, big_pi NUMERIC>,",
"int_arr ARRAY<int64>,",
"int_struct_arr ARRAY<STRUCT<i INT64>>,",
"mp ARRAY<STRUCT<key STRING, value ARRAY<STRUCT<key STRING, value INT64>>>>");

public static String HIVE_TEST_TABLE_DDL = String.join("\n", "number BIGINT,", "text STRING");

public static String HIVE_TEST_VIEW_DDL = String.join("\n", "number BIGINT,", "text STRING");
Expand All @@ -124,48 +107,25 @@ public class TestUtils {
public static String HIVE_ALL_TYPES_TABLE_DDL =
String.join(
"\n",
"tiny_int_val TINYINT,",
"small_int_val SMALLINT,",
"int_val INT,",
"big_int_val BIGINT,",
"bl BOOLEAN,",
"fixed_char CHAR(10),",
"var_char VARCHAR(10),",
"str STRING,",
"day DATE,",
"ts TIMESTAMP,",
"bin BINARY,",
"fl FLOAT,",
"dbl DOUBLE,",
"nums STRUCT<min: DECIMAL(38,9), max: DECIMAL(38,9), pi:"
+ " DECIMAL(38,9), big_pi: DECIMAL(38,9)>,",
"int_arr ARRAY<BIGINT>,",
"int_struct_arr ARRAY<STRUCT<i: BIGINT>>,",
"float_struct STRUCT<float_field:FLOAT>,",
"mp MAP<STRING,MAP<STRING,INT>>");

public static String HIVE_MANAGED_TEST_TABLE_DDL =
String.join(
"\n",
"tiny_int_val TINYINT,",
"small_int_val SMALLINT,",
"int_val INT,",
"big_int_val BIGINT,",
"bl BOOLEAN,",
"fixed_char CHAR(10),",
"var_char VARCHAR(10),",
"str STRING,",
"day DATE,",
"ts TIMESTAMP,",
"bin BINARY,",
"fl FLOAT,",
"dbl DOUBLE,",
"tiny_int_val TINYINT COMMENT 'A description for a TINYINT',",
"small_int_val SMALLINT COMMENT 'A description for a SMALLINT',",
"int_val INT COMMENT 'A description for a INT',",
"big_int_val BIGINT COMMENT 'A description for a BIGINT',",
"bl BOOLEAN COMMENT 'A description for a BOOLEAN',",
"fixed_char CHAR(10) COMMENT 'A description for a CHAR',",
"var_char VARCHAR(10) COMMENT 'A description for a VARCHAR',",
"str STRING COMMENT 'A description for a STRING',",
"day DATE COMMENT 'A description for a DATE',",
"ts TIMESTAMP COMMENT 'A description for a TIMESTAMP',",
"bin BINARY COMMENT 'A description for a BINARY',",
"fl FLOAT COMMENT 'A description for a FLOAT',",
"dbl DOUBLE COMMENT 'A description for a DOUBLE',",
"nums STRUCT<min: DECIMAL(38,9), max: DECIMAL(38,9), pi:"
+ " DECIMAL(38,9), big_pi: DECIMAL(38,9)>,",
"int_arr ARRAY<BIGINT>,",
"int_struct_arr ARRAY<STRUCT<i: BIGINT>>,",
"float_struct STRUCT<float_field:FLOAT>,",
"mp MAP<STRING,MAP<STRING,INT>>");
+ " DECIMAL(38,9), big_pi: DECIMAL(38,9)> COMMENT 'A description for a STRUCT',",
"int_arr ARRAY<BIGINT> COMMENT 'A description for a ARRAY-BIGINT',",
"int_struct_arr ARRAY<STRUCT<i: BIGINT>> COMMENT 'A description for a ARRAY-STRUCT',",
"float_struct STRUCT<float_field:FLOAT> COMMENT 'A description for a STRUCT-FLOAT',",
"mp MAP<STRING,MAP<STRING,INT>> COMMENT 'A description for a MAP'");

public static String HIVE_FIELD_TIME_PARTITIONED_TABLE_DDL =
String.join("\n", "int_val BIGINT,", "ts TIMESTAMP");
Expand Down

0 comments on commit af82dcc

Please sign in to comment.