From dca3e46bf031eff5b655bc6c9c3fae1bdd4085b6 Mon Sep 17 00:00:00 2001 From: Timothy Brown Date: Sat, 28 Dec 2024 09:51:02 -0800 Subject: [PATCH 01/12] move format code into independent modules --- pom.xml | 4 + xtable-core/pom.xml | 75 ++---- .../java/org/apache/xtable/GenericTable.java | 87 ------ .../TestConversionTargetFactory.java | 2 + xtable-delta/pom.xml | 172 ++++++++++++ .../xtable/delta/DeltaActionsConverter.java | 0 .../xtable/delta/DeltaConversionSource.java | 0 .../delta/DeltaConversionSourceProvider.java | 0 .../xtable/delta/DeltaConversionTarget.java | 0 .../xtable/delta/DeltaConversionUtils.java | 0 .../xtable/delta/DeltaDataFileExtractor.java | 0 .../delta/DeltaDataFileUpdatesExtractor.java | 0 .../delta/DeltaIncrementalChangesState.java | 0 .../xtable/delta/DeltaPartitionExtractor.java | 0 .../xtable/delta/DeltaSchemaExtractor.java | 0 .../xtable/delta/DeltaStatsExtractor.java | 0 .../xtable/delta/DeltaTableExtractor.java | 0 .../xtable/delta/DeltaValueConverter.java | 0 .../org/apache/xtable/delta/ScalaUtils.java | 0 ...rg.apache.xtable.spi.sync.ConversionTarget | 19 ++ .../delta/ITDeltaConversionTargetSource.java | 1 - .../delta/ITDeltaDeleteVectorConvert.java | 1 - .../delta/TestDeltaActionsConverter.java | 0 .../apache/xtable/delta/TestDeltaHelper.java | 0 .../delta/TestDeltaPartitionExtractor.java | 0 .../delta/TestDeltaSchemaExtractor.java | 0 .../xtable/delta/TestDeltaStatsExtractor.java | 0 .../apache/xtable/delta/TestDeltaSync.java | 0 .../xtable/delta/TestDeltaValueConverter.java | 0 .../xtable/delta}/TestSparkDeltaTable.java | 4 +- .../xtable-hudi-support-extensions/pom.xml | 20 ++ xtable-hudi/pom.xml | 176 +++++++++++++ .../xtable/avro/AvroSchemaConverter.java | 0 .../xtable/hudi/BaseFileUpdatesExtractor.java | 0 ...figurationBasedPartitionSpecExtractor.java | 0 .../xtable/hudi/HudiConversionSource.java | 0 .../hudi/HudiConversionSourceProvider.java | 0 .../xtable/hudi/HudiConversionTarget.java | 0 .../xtable/hudi/HudiDataFileExtractor.java | 0 .../org/apache/xtable/hudi/HudiFileStats.java | 0 .../xtable/hudi/HudiFileStatsExtractor.java | 0 .../apache/xtable/hudi/HudiInstantUtils.java | 15 +- .../hudi/HudiPartitionValuesExtractor.java | 0 .../org/apache/xtable/hudi/HudiPathUtils.java | 0 .../xtable/hudi/HudiSchemaExtractor.java | 0 .../apache/xtable/hudi/HudiSourceConfig.java | 0 .../HudiSourcePartitionSpecExtractor.java | 0 .../xtable/hudi/HudiTableExtractor.java | 0 .../apache/xtable/hudi/HudiTableManager.java | 0 ...rg.apache.xtable.spi.sync.ConversionTarget | 19 ++ .../xtable/avro/TestAvroSchemaConverter.java | 0 .../org/apache/xtable/hudi/HudiTestUtil.java | 0 .../hudi/ITHudiConversionSourceSource.java | 2 - .../hudi/ITHudiConversionSourceTarget.java | 31 ++- .../xtable/hudi}/TestAbstractHudiTable.java | 21 +- .../hudi/TestBaseFileUpdatesExtractor.java | 33 +-- .../hudi/TestHudiConversionSourceTarget.java | 0 .../hudi/TestHudiFileStatsExtractor.java | 1 - .../xtable/hudi/TestHudiInstantUtils.java | 0 .../TestHudiPartitionValuesExtractor.java | 0 .../xtable/hudi/TestHudiSchemaExtractor.java | 0 .../xtable/hudi/TestHudiTableManager.java | 0 .../xtable/hudi}/TestJavaHudiTable.java | 2 +- .../xtable/hudi}/TestSparkHudiTable.java | 2 +- xtable-iceberg/pom.xml | 172 ++++++++++++ .../xtable/iceberg/IcebergCatalogConfig.java | 0 .../iceberg/IcebergColumnStatsConverter.java | 0 .../iceberg/IcebergConversionSource.java | 0 .../IcebergConversionSourceProvider.java | 0 .../iceberg/IcebergConversionTarget.java | 0 .../iceberg/IcebergDataFileExtractor.java | 0 .../iceberg/IcebergDataFileUpdatesSync.java | 0 .../IcebergPartitionSpecExtractor.java | 0 .../iceberg/IcebergPartitionSpecSync.java | 0 .../IcebergPartitionValueConverter.java | 2 - .../IcebergPartitionValueExtractor.java | 0 .../iceberg/IcebergSchemaExtractor.java | 0 .../xtable/iceberg/IcebergSchemaSync.java | 0 .../xtable/iceberg/IcebergTableManager.java | 0 ...rg.apache.xtable.spi.sync.ConversionTarget | 1 - .../ITIcebergConversionTargetSource.java | 1 - .../xtable/iceberg/IcebergTestUtils.java | 0 .../apache/xtable/iceberg/StubCatalog.java | 0 .../TestIcebergColumnStatsConverter.java | 0 .../TestIcebergConversionTargetSource.java | 0 .../xtable/iceberg/TestIcebergDataHelper.java | 0 .../TestIcebergPartitionSpecExtractor.java | 0 .../iceberg/TestIcebergPartitionSpecSync.java | 0 .../TestIcebergPartitionValueConverter.java | 0 .../iceberg/TestIcebergSchemaExtractor.java | 0 .../xtable/iceberg/TestIcebergSchemaSync.java | 0 .../xtable/iceberg/TestIcebergSync.java | 5 +- .../xtable/iceberg}/TestIcebergTable.java | 8 +- .../iceberg/TestIcebergTableManager.java | 0 xtable-integration-tests/pom.xml | 247 ++++++++++++++++++ .../apache/xtable/GenericTableFactory.java | 113 ++++++++ .../apache/xtable/ITConversionController.java | 228 ++++++++++------ .../org/apache/xtable/loadtest/LoadTest.java | 2 +- xtable-utilities/pom.xml | 6 + .../org/apache/xtable/utilities/RunSync.java | 3 +- 100 files changed, 1173 insertions(+), 302 deletions(-) create mode 100644 xtable-delta/pom.xml rename {xtable-core => xtable-delta}/src/main/java/org/apache/xtable/delta/DeltaActionsConverter.java (100%) rename {xtable-core => xtable-delta}/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java (100%) rename {xtable-core => xtable-delta}/src/main/java/org/apache/xtable/delta/DeltaConversionSourceProvider.java (100%) rename {xtable-core => xtable-delta}/src/main/java/org/apache/xtable/delta/DeltaConversionTarget.java (100%) rename {xtable-core => xtable-delta}/src/main/java/org/apache/xtable/delta/DeltaConversionUtils.java (100%) rename {xtable-core => xtable-delta}/src/main/java/org/apache/xtable/delta/DeltaDataFileExtractor.java (100%) rename {xtable-core => xtable-delta}/src/main/java/org/apache/xtable/delta/DeltaDataFileUpdatesExtractor.java (100%) rename {xtable-core => xtable-delta}/src/main/java/org/apache/xtable/delta/DeltaIncrementalChangesState.java (100%) rename {xtable-core => xtable-delta}/src/main/java/org/apache/xtable/delta/DeltaPartitionExtractor.java (100%) rename {xtable-core => xtable-delta}/src/main/java/org/apache/xtable/delta/DeltaSchemaExtractor.java (100%) rename {xtable-core => xtable-delta}/src/main/java/org/apache/xtable/delta/DeltaStatsExtractor.java (100%) rename {xtable-core => xtable-delta}/src/main/java/org/apache/xtable/delta/DeltaTableExtractor.java (100%) rename {xtable-core => xtable-delta}/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java (100%) rename {xtable-core => xtable-delta}/src/main/java/org/apache/xtable/delta/ScalaUtils.java (100%) create mode 100644 xtable-delta/src/main/resources/META-INF/services/org.apache.xtable.spi.sync.ConversionTarget rename {xtable-core => xtable-delta}/src/test/java/org/apache/xtable/delta/ITDeltaConversionTargetSource.java (99%) rename {xtable-core => xtable-delta}/src/test/java/org/apache/xtable/delta/ITDeltaDeleteVectorConvert.java (99%) rename {xtable-core => xtable-delta}/src/test/java/org/apache/xtable/delta/TestDeltaActionsConverter.java (100%) rename {xtable-core => xtable-delta}/src/test/java/org/apache/xtable/delta/TestDeltaHelper.java (100%) rename {xtable-core => xtable-delta}/src/test/java/org/apache/xtable/delta/TestDeltaPartitionExtractor.java (100%) rename {xtable-core => xtable-delta}/src/test/java/org/apache/xtable/delta/TestDeltaSchemaExtractor.java (100%) rename {xtable-core => xtable-delta}/src/test/java/org/apache/xtable/delta/TestDeltaStatsExtractor.java (100%) rename {xtable-core => xtable-delta}/src/test/java/org/apache/xtable/delta/TestDeltaSync.java (100%) rename {xtable-core => xtable-delta}/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java (100%) rename {xtable-core/src/test/java/org/apache/xtable => xtable-delta/src/test/java/org/apache/xtable/delta}/TestSparkDeltaTable.java (99%) create mode 100644 xtable-hudi/pom.xml rename {xtable-core => xtable-hudi}/src/main/java/org/apache/xtable/avro/AvroSchemaConverter.java (100%) rename {xtable-core => xtable-hudi}/src/main/java/org/apache/xtable/hudi/BaseFileUpdatesExtractor.java (100%) rename {xtable-core => xtable-hudi}/src/main/java/org/apache/xtable/hudi/ConfigurationBasedPartitionSpecExtractor.java (100%) rename {xtable-core => xtable-hudi}/src/main/java/org/apache/xtable/hudi/HudiConversionSource.java (100%) rename {xtable-core => xtable-hudi}/src/main/java/org/apache/xtable/hudi/HudiConversionSourceProvider.java (100%) rename {xtable-core => xtable-hudi}/src/main/java/org/apache/xtable/hudi/HudiConversionTarget.java (100%) rename {xtable-core => xtable-hudi}/src/main/java/org/apache/xtable/hudi/HudiDataFileExtractor.java (100%) rename {xtable-core => xtable-hudi}/src/main/java/org/apache/xtable/hudi/HudiFileStats.java (100%) rename {xtable-core => xtable-hudi}/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java (100%) rename {xtable-core => xtable-hudi}/src/main/java/org/apache/xtable/hudi/HudiInstantUtils.java (81%) rename {xtable-core => xtable-hudi}/src/main/java/org/apache/xtable/hudi/HudiPartitionValuesExtractor.java (100%) rename {xtable-core => xtable-hudi}/src/main/java/org/apache/xtable/hudi/HudiPathUtils.java (100%) rename {xtable-core => xtable-hudi}/src/main/java/org/apache/xtable/hudi/HudiSchemaExtractor.java (100%) rename {xtable-core => xtable-hudi}/src/main/java/org/apache/xtable/hudi/HudiSourceConfig.java (100%) rename {xtable-core => xtable-hudi}/src/main/java/org/apache/xtable/hudi/HudiSourcePartitionSpecExtractor.java (100%) rename {xtable-core => xtable-hudi}/src/main/java/org/apache/xtable/hudi/HudiTableExtractor.java (100%) rename {xtable-core => xtable-hudi}/src/main/java/org/apache/xtable/hudi/HudiTableManager.java (100%) create mode 100644 xtable-hudi/src/main/resources/services/org.apache.xtable.spi.sync.ConversionTarget rename {xtable-core => xtable-hudi}/src/test/java/org/apache/xtable/avro/TestAvroSchemaConverter.java (100%) rename {xtable-core => xtable-hudi}/src/test/java/org/apache/xtable/hudi/HudiTestUtil.java (100%) rename {xtable-core => xtable-hudi}/src/test/java/org/apache/xtable/hudi/ITHudiConversionSourceSource.java (99%) rename {xtable-core => xtable-hudi}/src/test/java/org/apache/xtable/hudi/ITHudiConversionSourceTarget.java (96%) rename {xtable-core/src/test/java/org/apache/xtable => xtable-hudi/src/test/java/org/apache/xtable/hudi}/TestAbstractHudiTable.java (97%) rename {xtable-core => xtable-hudi}/src/test/java/org/apache/xtable/hudi/TestBaseFileUpdatesExtractor.java (95%) rename {xtable-core => xtable-hudi}/src/test/java/org/apache/xtable/hudi/TestHudiConversionSourceTarget.java (100%) rename {xtable-core => xtable-hudi}/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java (99%) rename {xtable-core => xtable-hudi}/src/test/java/org/apache/xtable/hudi/TestHudiInstantUtils.java (100%) rename {xtable-core => xtable-hudi}/src/test/java/org/apache/xtable/hudi/TestHudiPartitionValuesExtractor.java (100%) rename {xtable-core => xtable-hudi}/src/test/java/org/apache/xtable/hudi/TestHudiSchemaExtractor.java (100%) rename {xtable-core => xtable-hudi}/src/test/java/org/apache/xtable/hudi/TestHudiTableManager.java (100%) rename {xtable-core/src/test/java/org/apache/xtable => xtable-hudi/src/test/java/org/apache/xtable/hudi}/TestJavaHudiTable.java (99%) rename {xtable-core/src/test/java/org/apache/xtable => xtable-hudi/src/test/java/org/apache/xtable/hudi}/TestSparkHudiTable.java (99%) create mode 100644 xtable-iceberg/pom.xml rename {xtable-core => xtable-iceberg}/src/main/java/org/apache/xtable/iceberg/IcebergCatalogConfig.java (100%) rename {xtable-core => xtable-iceberg}/src/main/java/org/apache/xtable/iceberg/IcebergColumnStatsConverter.java (100%) rename {xtable-core => xtable-iceberg}/src/main/java/org/apache/xtable/iceberg/IcebergConversionSource.java (100%) rename {xtable-core => xtable-iceberg}/src/main/java/org/apache/xtable/iceberg/IcebergConversionSourceProvider.java (100%) rename {xtable-core => xtable-iceberg}/src/main/java/org/apache/xtable/iceberg/IcebergConversionTarget.java (100%) rename {xtable-core => xtable-iceberg}/src/main/java/org/apache/xtable/iceberg/IcebergDataFileExtractor.java (100%) rename {xtable-core => xtable-iceberg}/src/main/java/org/apache/xtable/iceberg/IcebergDataFileUpdatesSync.java (100%) rename {xtable-core => xtable-iceberg}/src/main/java/org/apache/xtable/iceberg/IcebergPartitionSpecExtractor.java (100%) rename {xtable-core => xtable-iceberg}/src/main/java/org/apache/xtable/iceberg/IcebergPartitionSpecSync.java (100%) rename {xtable-core => xtable-iceberg}/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueConverter.java (98%) rename {xtable-core => xtable-iceberg}/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueExtractor.java (100%) rename {xtable-core => xtable-iceberg}/src/main/java/org/apache/xtable/iceberg/IcebergSchemaExtractor.java (100%) rename {xtable-core => xtable-iceberg}/src/main/java/org/apache/xtable/iceberg/IcebergSchemaSync.java (100%) rename {xtable-core => xtable-iceberg}/src/main/java/org/apache/xtable/iceberg/IcebergTableManager.java (100%) rename {xtable-core/src/main/resources/META-INF => xtable-iceberg/src/main/resources}/services/org.apache.xtable.spi.sync.ConversionTarget (95%) rename {xtable-core => xtable-iceberg}/src/test/java/org/apache/xtable/iceberg/ITIcebergConversionTargetSource.java (99%) rename {xtable-core => xtable-iceberg}/src/test/java/org/apache/xtable/iceberg/IcebergTestUtils.java (100%) rename {xtable-core => xtable-iceberg}/src/test/java/org/apache/xtable/iceberg/StubCatalog.java (100%) rename {xtable-core => xtable-iceberg}/src/test/java/org/apache/xtable/iceberg/TestIcebergColumnStatsConverter.java (100%) rename {xtable-core => xtable-iceberg}/src/test/java/org/apache/xtable/iceberg/TestIcebergConversionTargetSource.java (100%) rename {xtable-core => xtable-iceberg}/src/test/java/org/apache/xtable/iceberg/TestIcebergDataHelper.java (100%) rename {xtable-core => xtable-iceberg}/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionSpecExtractor.java (100%) rename {xtable-core => xtable-iceberg}/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionSpecSync.java (100%) rename {xtable-core => xtable-iceberg}/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionValueConverter.java (100%) rename {xtable-core => xtable-iceberg}/src/test/java/org/apache/xtable/iceberg/TestIcebergSchemaExtractor.java (100%) rename {xtable-core => xtable-iceberg}/src/test/java/org/apache/xtable/iceberg/TestIcebergSchemaSync.java (100%) rename {xtable-core => xtable-iceberg}/src/test/java/org/apache/xtable/iceberg/TestIcebergSync.java (99%) rename {xtable-core/src/test/java/org/apache/xtable => xtable-iceberg/src/test/java/org/apache/xtable/iceberg}/TestIcebergTable.java (98%) rename {xtable-core => xtable-iceberg}/src/test/java/org/apache/xtable/iceberg/TestIcebergTableManager.java (100%) create mode 100644 xtable-integration-tests/pom.xml create mode 100644 xtable-integration-tests/src/test/java/org/apache/xtable/GenericTableFactory.java rename {xtable-core => xtable-integration-tests}/src/test/java/org/apache/xtable/ITConversionController.java (84%) rename {xtable-core => xtable-integration-tests}/src/test/java/org/apache/xtable/loadtest/LoadTest.java (99%) diff --git a/pom.xml b/pom.xml index 7a5973428..360f696a2 100644 --- a/pom.xml +++ b/pom.xml @@ -51,6 +51,10 @@ xtable-hudi-support xtable-core xtable-utilities + xtable-delta + xtable-integration-tests + xtable-hudi + xtable-iceberg diff --git a/xtable-core/pom.xml b/xtable-core/pom.xml index f277495e7..723ddeec9 100644 --- a/xtable-core/pom.xml +++ b/xtable-core/pom.xml @@ -56,54 +56,12 @@ guava - - - org.apache.avro - avro - - org.scala-lang scala-library - - - org.apache.hudi - hudi-spark${spark.version.prefix}-bundle_${scala.binary.version} - test - - - org.apache.hudi - hudi-common - - - org.apache.hudi - hudi-java-client - - - - - org.apache.iceberg - iceberg-core - - - org.apache.iceberg - iceberg-api - - - - - io.delta - delta-core_${scala.binary.version} - - - io.delta - delta-standalone_${scala.binary.version} - test - - org.apache.hadoop @@ -121,23 +79,6 @@ log4j-1.2-api - - - org.apache.iceberg - iceberg-spark-runtime-${spark.version.prefix}_${scala.binary.version} - test - - - org.apache.spark - spark-core_${scala.binary.version} - provided - - - org.apache.spark - spark-sql_${scala.binary.version} - provided - - org.mockito @@ -174,4 +115,20 @@ test + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + diff --git a/xtable-core/src/test/java/org/apache/xtable/GenericTable.java b/xtable-core/src/test/java/org/apache/xtable/GenericTable.java index dce0f21ab..98be8d15b 100644 --- a/xtable-core/src/test/java/org/apache/xtable/GenericTable.java +++ b/xtable-core/src/test/java/org/apache/xtable/GenericTable.java @@ -18,20 +18,10 @@ package org.apache.xtable; -import static org.apache.xtable.model.storage.TableFormat.DELTA; -import static org.apache.xtable.model.storage.TableFormat.HUDI; -import static org.apache.xtable.model.storage.TableFormat.ICEBERG; - -import java.nio.file.Path; import java.util.Arrays; import java.util.List; import java.util.UUID; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.SparkSession; - -import org.apache.hudi.common.model.HoodieTableType; - public interface GenericTable extends AutoCloseable { // A list of values for the level field which serves as a basic field to partition on for tests List LEVEL_VALUES = Arrays.asList("INFO", "WARN", "ERROR"); @@ -66,83 +56,6 @@ default String getDataPath() { String getFilterQuery(); - static GenericTable getInstance( - String tableName, - Path tempDir, - SparkSession sparkSession, - JavaSparkContext jsc, - String sourceFormat, - boolean isPartitioned) { - switch (sourceFormat) { - case HUDI: - return TestSparkHudiTable.forStandardSchemaAndPartitioning( - tableName, tempDir, jsc, isPartitioned); - case DELTA: - return TestSparkDeltaTable.forStandardSchemaAndPartitioning( - tableName, tempDir, sparkSession, isPartitioned ? "level" : null); - case ICEBERG: - return TestIcebergTable.forStandardSchemaAndPartitioning( - tableName, isPartitioned ? "level" : null, tempDir, jsc.hadoopConfiguration()); - default: - throw new IllegalArgumentException("Unsupported source format: " + sourceFormat); - } - } - - static GenericTable getInstanceWithAdditionalColumns( - String tableName, - Path tempDir, - SparkSession sparkSession, - JavaSparkContext jsc, - String sourceFormat, - boolean isPartitioned) { - switch (sourceFormat) { - case HUDI: - return TestSparkHudiTable.forSchemaWithAdditionalColumnsAndPartitioning( - tableName, tempDir, jsc, isPartitioned); - case DELTA: - return TestSparkDeltaTable.forSchemaWithAdditionalColumnsAndPartitioning( - tableName, tempDir, sparkSession, isPartitioned ? "level" : null); - case ICEBERG: - return TestIcebergTable.forSchemaWithAdditionalColumnsAndPartitioning( - tableName, isPartitioned ? "level" : null, tempDir, jsc.hadoopConfiguration()); - default: - throw new IllegalArgumentException("Unsupported source format: " + sourceFormat); - } - } - - static GenericTable getInstanceWithCustomPartitionConfig( - String tableName, - Path tempDir, - JavaSparkContext jsc, - String sourceFormat, - String partitionConfig) { - switch (sourceFormat) { - case HUDI: - return TestSparkHudiTable.forStandardSchema( - tableName, tempDir, jsc, partitionConfig, HoodieTableType.COPY_ON_WRITE); - default: - throw new IllegalArgumentException( - String.format( - "Unsupported source format: %s for custom partition config", sourceFormat)); - } - } - - static GenericTable getInstanceWithUUIDColumns( - String tableName, - Path tempDir, - SparkSession sparkSession, - JavaSparkContext jsc, - String sourceFormat, - boolean isPartitioned) { - switch (sourceFormat) { - case ICEBERG: - return TestIcebergTable.forSchemaWithUUIDColumns( - tableName, isPartitioned ? "level" : null, tempDir, jsc.hadoopConfiguration()); - default: - throw new IllegalArgumentException("Unsupported source format: " + sourceFormat); - } - } - static String getTableName() { return "test_table_" + UUID.randomUUID().toString().replaceAll("-", "_"); } diff --git a/xtable-core/src/test/java/org/apache/xtable/conversion/TestConversionTargetFactory.java b/xtable-core/src/test/java/org/apache/xtable/conversion/TestConversionTargetFactory.java index 0984b42be..cb5efacbc 100644 --- a/xtable-core/src/test/java/org/apache/xtable/conversion/TestConversionTargetFactory.java +++ b/xtable-core/src/test/java/org/apache/xtable/conversion/TestConversionTargetFactory.java @@ -25,12 +25,14 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import org.apache.hadoop.conf.Configuration; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.apache.xtable.exception.NotSupportedException; import org.apache.xtable.model.storage.TableFormat; import org.apache.xtable.spi.sync.ConversionTarget; +@Disabled("TODO - figure out what to do with these cases") public class TestConversionTargetFactory { @Test diff --git a/xtable-delta/pom.xml b/xtable-delta/pom.xml new file mode 100644 index 000000000..886f7db8b --- /dev/null +++ b/xtable-delta/pom.xml @@ -0,0 +1,172 @@ + + + + 4.0.0 + + org.apache.xtable + xtable + 0.2.0-SNAPSHOT + + + xtable-delta_${scala.binary.version} + XTable Project Delta + + + + org.apache.xtable + xtable-api + ${project.version} + + + org.apache.xtable + xtable-core_${scala.binary.version} + ${project.version} + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.module + jackson-module-scala_${scala.binary.version} + + + com.google.guava + guava + + + + + org.apache.avro + avro + + + + + org.scala-lang + scala-library + + + + + io.delta + delta-core_${scala.binary.version} + + + io.delta + delta-standalone_${scala.binary.version} + test + + + + + org.apache.hadoop + hadoop-common + provided + + + + + org.apache.logging.log4j + log4j-api + + + org.apache.logging.log4j + log4j-1.2-api + + + + + org.apache.spark + spark-core_${scala.binary.version} + provided + + + org.apache.spark + spark-sql_${scala.binary.version} + provided + + + + + org.mockito + mockito-core + test + + + + + org.junit.jupiter + junit-jupiter-api + test + + + org.junit.jupiter + junit-jupiter-params + test + + + org.junit.jupiter + junit-jupiter-engine + test + + + + + org.apache.logging.log4j + log4j-core + test + + + org.apache.logging.log4j + log4j-slf4j2-impl + test + + + + org.apache.xtable + xtable-core_${scala.binary.version} + ${project.version} + tests + test-jar + test + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + + diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaActionsConverter.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaActionsConverter.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaActionsConverter.java rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaActionsConverter.java diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionSourceProvider.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaConversionSourceProvider.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionSourceProvider.java rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaConversionSourceProvider.java diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionTarget.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaConversionTarget.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionTarget.java rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaConversionTarget.java diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionUtils.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaConversionUtils.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaConversionUtils.java rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaConversionUtils.java diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaDataFileExtractor.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaDataFileExtractor.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaDataFileExtractor.java rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaDataFileExtractor.java diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaDataFileUpdatesExtractor.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaDataFileUpdatesExtractor.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaDataFileUpdatesExtractor.java rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaDataFileUpdatesExtractor.java diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaIncrementalChangesState.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaIncrementalChangesState.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaIncrementalChangesState.java rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaIncrementalChangesState.java diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaPartitionExtractor.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaPartitionExtractor.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaPartitionExtractor.java rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaPartitionExtractor.java diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaSchemaExtractor.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaSchemaExtractor.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaSchemaExtractor.java rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaSchemaExtractor.java diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaStatsExtractor.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaStatsExtractor.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaStatsExtractor.java rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaStatsExtractor.java diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaTableExtractor.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaTableExtractor.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaTableExtractor.java rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaTableExtractor.java diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java b/xtable-delta/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java rename to xtable-delta/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/ScalaUtils.java b/xtable-delta/src/main/java/org/apache/xtable/delta/ScalaUtils.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/delta/ScalaUtils.java rename to xtable-delta/src/main/java/org/apache/xtable/delta/ScalaUtils.java diff --git a/xtable-delta/src/main/resources/META-INF/services/org.apache.xtable.spi.sync.ConversionTarget b/xtable-delta/src/main/resources/META-INF/services/org.apache.xtable.spi.sync.ConversionTarget new file mode 100644 index 000000000..cea6bd3ed --- /dev/null +++ b/xtable-delta/src/main/resources/META-INF/services/org.apache.xtable.spi.sync.ConversionTarget @@ -0,0 +1,19 @@ +########################################################################## +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +########################################################################## + +org.apache.xtable.delta.DeltaConversionTarget diff --git a/xtable-core/src/test/java/org/apache/xtable/delta/ITDeltaConversionTargetSource.java b/xtable-delta/src/test/java/org/apache/xtable/delta/ITDeltaConversionTargetSource.java similarity index 99% rename from xtable-core/src/test/java/org/apache/xtable/delta/ITDeltaConversionTargetSource.java rename to xtable-delta/src/test/java/org/apache/xtable/delta/ITDeltaConversionTargetSource.java index ca8bc3fa0..34e48f4bd 100644 --- a/xtable-core/src/test/java/org/apache/xtable/delta/ITDeltaConversionTargetSource.java +++ b/xtable-delta/src/test/java/org/apache/xtable/delta/ITDeltaConversionTargetSource.java @@ -52,7 +52,6 @@ import org.junit.jupiter.params.provider.MethodSource; import org.apache.xtable.GenericTable; -import org.apache.xtable.TestSparkDeltaTable; import org.apache.xtable.ValidationTestHelper; import org.apache.xtable.conversion.SourceTable; import org.apache.xtable.model.CommitsBacklog; diff --git a/xtable-core/src/test/java/org/apache/xtable/delta/ITDeltaDeleteVectorConvert.java b/xtable-delta/src/test/java/org/apache/xtable/delta/ITDeltaDeleteVectorConvert.java similarity index 99% rename from xtable-core/src/test/java/org/apache/xtable/delta/ITDeltaDeleteVectorConvert.java rename to xtable-delta/src/test/java/org/apache/xtable/delta/ITDeltaDeleteVectorConvert.java index ed02893e3..eb37a8d16 100644 --- a/xtable-core/src/test/java/org/apache/xtable/delta/ITDeltaDeleteVectorConvert.java +++ b/xtable-delta/src/test/java/org/apache/xtable/delta/ITDeltaDeleteVectorConvert.java @@ -42,7 +42,6 @@ import scala.Option; import org.apache.xtable.GenericTable; -import org.apache.xtable.TestSparkDeltaTable; import org.apache.xtable.ValidationTestHelper; import org.apache.xtable.conversion.SourceTable; import org.apache.xtable.model.CommitsBacklog; diff --git a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaActionsConverter.java b/xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaActionsConverter.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaActionsConverter.java rename to xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaActionsConverter.java diff --git a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaHelper.java b/xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaHelper.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaHelper.java rename to xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaHelper.java diff --git a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaPartitionExtractor.java b/xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaPartitionExtractor.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaPartitionExtractor.java rename to xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaPartitionExtractor.java diff --git a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaSchemaExtractor.java b/xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaSchemaExtractor.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaSchemaExtractor.java rename to xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaSchemaExtractor.java diff --git a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaStatsExtractor.java b/xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaStatsExtractor.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaStatsExtractor.java rename to xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaStatsExtractor.java diff --git a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaSync.java b/xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaSync.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaSync.java rename to xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaSync.java diff --git a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java b/xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java rename to xtable-delta/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java diff --git a/xtable-core/src/test/java/org/apache/xtable/TestSparkDeltaTable.java b/xtable-delta/src/test/java/org/apache/xtable/delta/TestSparkDeltaTable.java similarity index 99% rename from xtable-core/src/test/java/org/apache/xtable/TestSparkDeltaTable.java rename to xtable-delta/src/test/java/org/apache/xtable/delta/TestSparkDeltaTable.java index ee5b1ccdd..38057874f 100644 --- a/xtable-core/src/test/java/org/apache/xtable/TestSparkDeltaTable.java +++ b/xtable-delta/src/test/java/org/apache/xtable/delta/TestSparkDeltaTable.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.xtable; +package org.apache.xtable.delta; import java.io.Closeable; import java.io.IOException; @@ -44,7 +44,7 @@ import io.delta.tables.DeltaTable; -import org.apache.xtable.delta.TestDeltaHelper; +import org.apache.xtable.GenericTable; @Getter public class TestSparkDeltaTable implements GenericTable, Closeable { diff --git a/xtable-hudi-support/xtable-hudi-support-extensions/pom.xml b/xtable-hudi-support/xtable-hudi-support-extensions/pom.xml index fba3fe0a9..775484d39 100644 --- a/xtable-hudi-support/xtable-hudi-support-extensions/pom.xml +++ b/xtable-hudi-support/xtable-hudi-support-extensions/pom.xml @@ -42,6 +42,12 @@ ${project.version} + + org.apache.xtable + xtable-hudi + ${project.version} + + org.slf4j @@ -180,6 +186,20 @@ log4j-slf4j2-impl test + + + org.apache.xtable + xtable-delta_${scala.binary.version} + ${project.version} + test + + + + org.apache.xtable + xtable-iceberg + ${project.version} + test + diff --git a/xtable-hudi/pom.xml b/xtable-hudi/pom.xml new file mode 100644 index 000000000..89629bddf --- /dev/null +++ b/xtable-hudi/pom.xml @@ -0,0 +1,176 @@ + + + + 4.0.0 + + org.apache.xtable + xtable + 0.2.0-SNAPSHOT + + + xtable-hudi + XTable Project Hudi + + + + org.apache.xtable + xtable-api + ${project.version} + + + org.apache.xtable + xtable-core_${scala.binary.version} + ${project.version} + + + org.apache.xtable + xtable-hudi-support-utils + ${project.version} + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.module + jackson-module-scala_${scala.binary.version} + + + com.google.guava + guava + + + + + org.apache.avro + avro + + + + + org.apache.hudi + hudi-spark${spark.version.prefix}-bundle_${scala.binary.version} + test + + + org.apache.hudi + hudi-common + + + org.apache.hudi + hudi-java-client + + + + + org.apache.hadoop + hadoop-common + provided + + + + + org.apache.logging.log4j + log4j-api + + + org.apache.logging.log4j + log4j-1.2-api + + + + + org.apache.spark + spark-core_${scala.binary.version} + provided + + + org.apache.spark + spark-sql_${scala.binary.version} + provided + + + + + org.mockito + mockito-core + test + + + + + org.junit.jupiter + junit-jupiter-api + test + + + org.junit.jupiter + junit-jupiter-params + test + + + org.junit.jupiter + junit-jupiter-engine + test + + + + + org.apache.logging.log4j + log4j-core + test + + + org.apache.logging.log4j + log4j-slf4j2-impl + test + + + + org.apache.xtable + xtable-core_${scala.binary.version} + ${project.version} + tests + test-jar + test + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + + + diff --git a/xtable-core/src/main/java/org/apache/xtable/avro/AvroSchemaConverter.java b/xtable-hudi/src/main/java/org/apache/xtable/avro/AvroSchemaConverter.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/avro/AvroSchemaConverter.java rename to xtable-hudi/src/main/java/org/apache/xtable/avro/AvroSchemaConverter.java diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/BaseFileUpdatesExtractor.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/BaseFileUpdatesExtractor.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/hudi/BaseFileUpdatesExtractor.java rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/BaseFileUpdatesExtractor.java diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/ConfigurationBasedPartitionSpecExtractor.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/ConfigurationBasedPartitionSpecExtractor.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/hudi/ConfigurationBasedPartitionSpecExtractor.java rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/ConfigurationBasedPartitionSpecExtractor.java diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiConversionSource.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiConversionSource.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiConversionSource.java rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiConversionSource.java diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiConversionSourceProvider.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiConversionSourceProvider.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiConversionSourceProvider.java rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiConversionSourceProvider.java diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiConversionTarget.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiConversionTarget.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiConversionTarget.java rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiConversionTarget.java diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiDataFileExtractor.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiDataFileExtractor.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiDataFileExtractor.java rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiDataFileExtractor.java diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiFileStats.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiFileStats.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiFileStats.java rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiFileStats.java diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiInstantUtils.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiInstantUtils.java similarity index 81% rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiInstantUtils.java rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiInstantUtils.java index 85cb19c07..4b2968dc5 100644 --- a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiInstantUtils.java +++ b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiInstantUtils.java @@ -18,10 +18,6 @@ package org.apache.xtable.hudi; -import static org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator.MILLIS_INSTANT_TIMESTAMP_FORMAT_LENGTH; -import static org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator.SECS_INSTANT_ID_LENGTH; -import static org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator.SECS_INSTANT_TIMESTAMP_FORMAT; - import java.time.Instant; import java.time.LocalDateTime; import java.time.ZoneId; @@ -41,7 +37,7 @@ class HudiInstantUtils { // https://bugs.openjdk.java.net/browse/JDK-8031085. hence have to do appendValue() private static final DateTimeFormatter MILLIS_INSTANT_TIME_FORMATTER = new DateTimeFormatterBuilder() - .appendPattern(SECS_INSTANT_TIMESTAMP_FORMAT) + .appendPattern(HoodieInstantTimeGenerator.SECS_INSTANT_TIMESTAMP_FORMAT) .appendValue(ChronoField.MILLI_OF_SECOND, 3) .toFormatter() .withZone(ZONE_ID); @@ -59,8 +55,11 @@ static Instant parseFromInstantTime(String timestamp) { String timestampInMillis = timestamp; if (isSecondGranularity(timestamp)) { timestampInMillis = timestamp + "999"; - } else if (timestamp.length() > MILLIS_INSTANT_TIMESTAMP_FORMAT_LENGTH) { - timestampInMillis = timestamp.substring(0, MILLIS_INSTANT_TIMESTAMP_FORMAT_LENGTH); + } else if (timestamp.length() + > HoodieInstantTimeGenerator.MILLIS_INSTANT_TIMESTAMP_FORMAT_LENGTH) { + timestampInMillis = + timestamp.substring( + 0, HoodieInstantTimeGenerator.MILLIS_INSTANT_TIMESTAMP_FORMAT_LENGTH); } LocalDateTime dt = LocalDateTime.parse(timestampInMillis, MILLIS_INSTANT_TIME_FORMATTER); @@ -76,6 +75,6 @@ static String convertInstantToCommit(Instant instant) { } private static boolean isSecondGranularity(String instant) { - return instant.length() == SECS_INSTANT_ID_LENGTH; + return instant.length() == HoodieInstantTimeGenerator.SECS_INSTANT_ID_LENGTH; } } diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiPartitionValuesExtractor.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiPartitionValuesExtractor.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiPartitionValuesExtractor.java rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiPartitionValuesExtractor.java diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiPathUtils.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiPathUtils.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiPathUtils.java rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiPathUtils.java diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiSchemaExtractor.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiSchemaExtractor.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiSchemaExtractor.java rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiSchemaExtractor.java diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiSourceConfig.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiSourceConfig.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiSourceConfig.java rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiSourceConfig.java diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiSourcePartitionSpecExtractor.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiSourcePartitionSpecExtractor.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiSourcePartitionSpecExtractor.java rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiSourcePartitionSpecExtractor.java diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiTableExtractor.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiTableExtractor.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiTableExtractor.java rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiTableExtractor.java diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiTableManager.java b/xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiTableManager.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/hudi/HudiTableManager.java rename to xtable-hudi/src/main/java/org/apache/xtable/hudi/HudiTableManager.java diff --git a/xtable-hudi/src/main/resources/services/org.apache.xtable.spi.sync.ConversionTarget b/xtable-hudi/src/main/resources/services/org.apache.xtable.spi.sync.ConversionTarget new file mode 100644 index 000000000..2bea153b1 --- /dev/null +++ b/xtable-hudi/src/main/resources/services/org.apache.xtable.spi.sync.ConversionTarget @@ -0,0 +1,19 @@ +########################################################################## +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +########################################################################## + +org.apache.xtable.hudi.HudiConversionTarget diff --git a/xtable-core/src/test/java/org/apache/xtable/avro/TestAvroSchemaConverter.java b/xtable-hudi/src/test/java/org/apache/xtable/avro/TestAvroSchemaConverter.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/avro/TestAvroSchemaConverter.java rename to xtable-hudi/src/test/java/org/apache/xtable/avro/TestAvroSchemaConverter.java diff --git a/xtable-core/src/test/java/org/apache/xtable/hudi/HudiTestUtil.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/HudiTestUtil.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/hudi/HudiTestUtil.java rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/HudiTestUtil.java diff --git a/xtable-core/src/test/java/org/apache/xtable/hudi/ITHudiConversionSourceSource.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/ITHudiConversionSourceSource.java similarity index 99% rename from xtable-core/src/test/java/org/apache/xtable/hudi/ITHudiConversionSourceSource.java rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/ITHudiConversionSourceSource.java index 3debf9043..b5d05385e 100644 --- a/xtable-core/src/test/java/org/apache/xtable/hudi/ITHudiConversionSourceSource.java +++ b/xtable-hudi/src/test/java/org/apache/xtable/hudi/ITHudiConversionSourceSource.java @@ -62,8 +62,6 @@ import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.xtable.GenericTable; -import org.apache.xtable.TestJavaHudiTable; -import org.apache.xtable.TestSparkHudiTable; import org.apache.xtable.ValidationTestHelper; import org.apache.xtable.model.CommitsBacklog; import org.apache.xtable.model.InstantsForIncrementalSync; diff --git a/xtable-core/src/test/java/org/apache/xtable/hudi/ITHudiConversionSourceTarget.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/ITHudiConversionSourceTarget.java similarity index 96% rename from xtable-core/src/test/java/org/apache/xtable/hudi/ITHudiConversionSourceTarget.java rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/ITHudiConversionSourceTarget.java index 128855672..e63cab855 100644 --- a/xtable-core/src/test/java/org/apache/xtable/hudi/ITHudiConversionSourceTarget.java +++ b/xtable-hudi/src/test/java/org/apache/xtable/hudi/ITHudiConversionSourceTarget.java @@ -21,7 +21,6 @@ import static org.apache.xtable.hudi.HudiTestUtil.createWriteStatus; import static org.apache.xtable.hudi.HudiTestUtil.getHoodieWriteConfig; import static org.apache.xtable.hudi.HudiTestUtil.initTableAndGetMetaClient; -import static org.junit.jupiter.api.Assertions.assertEquals; import java.nio.file.Path; import java.time.Duration; @@ -368,7 +367,7 @@ CONTEXT, getHoodieWriteConfig(metaClient).getMetadataConfig(), tableBasePath, tr assertColStats(hoodieBackedTableMetadata, partitionPath, fileName4); } // the first commit to the timeline should be archived - assertEquals( + Assertions.assertEquals( 2, metaClient.getArchivedTimeline().reload().filterCompletedInstants().countInstants()); } @@ -428,7 +427,7 @@ private void assertSchema(HoodieTableMetaClient metaClient, boolean includeMetaF .requiredString(OTHER_FIELD_NAME) .endRecord(); } - assertEquals(expected, actual); + Assertions.assertEquals(expected, actual); } private void assertFileGroupCorrectness( @@ -446,15 +445,15 @@ private void assertFileGroupCorrectness( .getAllFileGroups(partitionPath) .sorted(Comparator.comparing(HoodieFileGroup::getFileGroupId)) .collect(Collectors.toList()); - assertEquals(fileIdAndPath.size(), fileGroups.size()); + Assertions.assertEquals(fileIdAndPath.size(), fileGroups.size()); for (int i = 0; i < fileIdAndPath.size(); i++) { HoodieFileGroup fileGroup = fileGroups.get(i); String expectedFileId = fileIdAndPath.get(i).getLeft(); String expectedFilePath = fileIdAndPath.get(i).getRight(); - assertEquals(expectedFileId, fileGroup.getFileGroupId().getFileId()); - assertEquals(partitionPath, fileGroup.getPartitionPath()); + Assertions.assertEquals(expectedFileId, fileGroup.getFileGroupId().getFileId()); + Assertions.assertEquals(partitionPath, fileGroup.getPartitionPath()); HoodieBaseFile baseFile = fileGroup.getAllBaseFiles().findFirst().get(); - assertEquals( + Assertions.assertEquals( metaClient.getBasePathV2().toString() + "/" + expectedFilePath, baseFile.getPath()); } fsView.close(); @@ -518,16 +517,16 @@ private void assertColStatsForField( Map, HoodieMetadataColumnStats> fieldColStats = hoodieBackedTableMetadata.getColumnStats( Collections.singletonList(Pair.of(partitionPath, fileName)), fieldName); - assertEquals(1, fieldColStats.size()); + Assertions.assertEquals(1, fieldColStats.size()); HoodieMetadataColumnStats columnStats = fieldColStats.get(Pair.of(partitionPath, fileName)); - assertEquals(fieldName, columnStats.getColumnName()); - assertEquals(fileName, columnStats.getFileName()); - assertEquals(new StringWrapper(minValue), columnStats.getMinValue()); - assertEquals(new StringWrapper(maxValue), columnStats.getMaxValue()); - assertEquals(valueCount, columnStats.getValueCount()); - assertEquals(nullCount, columnStats.getNullCount()); - assertEquals(totalSize, columnStats.getTotalSize()); - assertEquals(-1, columnStats.getTotalUncompressedSize()); + Assertions.assertEquals(fieldName, columnStats.getColumnName()); + Assertions.assertEquals(fileName, columnStats.getFileName()); + Assertions.assertEquals(new StringWrapper(minValue), columnStats.getMinValue()); + Assertions.assertEquals(new StringWrapper(maxValue), columnStats.getMaxValue()); + Assertions.assertEquals(valueCount, columnStats.getValueCount()); + Assertions.assertEquals(nullCount, columnStats.getNullCount()); + Assertions.assertEquals(totalSize, columnStats.getTotalSize()); + Assertions.assertEquals(-1, columnStats.getTotalUncompressedSize()); } private InternalDataFile getTestFile(String partitionPath, String fileName) { diff --git a/xtable-core/src/test/java/org/apache/xtable/TestAbstractHudiTable.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestAbstractHudiTable.java similarity index 97% rename from xtable-core/src/test/java/org/apache/xtable/TestAbstractHudiTable.java rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/TestAbstractHudiTable.java index 3e9a133a2..0d134df65 100644 --- a/xtable-core/src/test/java/org/apache/xtable/TestAbstractHudiTable.java +++ b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestAbstractHudiTable.java @@ -16,9 +16,8 @@ * limitations under the License. */ -package org.apache.xtable; +package org.apache.xtable.hudi; -import static org.apache.hudi.keygen.constant.KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME; import static org.apache.xtable.hudi.HudiTestUtil.getHoodieWriteConfig; import static org.junit.jupiter.api.Assertions.assertAll; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -104,6 +103,8 @@ import com.google.common.base.Preconditions; +import org.apache.xtable.GenericTable; + public abstract class TestAbstractHudiTable implements GenericTable, String> { @@ -160,17 +161,19 @@ public abstract class TestAbstractHudiTable String[] partitionFieldConfigs = partitionConfig.split(","); if (partitionFieldConfigs.length == 1 && !partitionFieldConfigs[0].contains(".")) { typedProperties.put( - PARTITIONPATH_FIELD_NAME.key(), partitionFieldConfigs[0].split(":")[0]); + KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), + partitionFieldConfigs[0].split(":")[0]); if (partitionFieldConfigs[0].contains(".")) { // nested field this.keyGenerator = new CustomKeyGenerator(typedProperties); } else if (partitionFieldConfigs[0].contains("SIMPLE")) { // top level field this.keyGenerator = new SimpleKeyGenerator(typedProperties); } else { // top level timestamp field - typedProperties.put(PARTITIONPATH_FIELD_NAME.key(), partitionConfig); + typedProperties.put( + KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), partitionConfig); this.keyGenerator = new TimestampBasedKeyGenerator(typedProperties); } } else { - typedProperties.put(PARTITIONPATH_FIELD_NAME.key(), partitionConfig); + typedProperties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), partitionConfig); this.keyGenerator = new CustomKeyGenerator(typedProperties); } this.partitionFieldNames = @@ -431,7 +434,9 @@ protected HoodieWriteConfig generateWriteConfig(Schema schema, TypedProperties k // enable col stats only on un-partitioned data due to bug in Hudi // https://issues.apache.org/jira/browse/HUDI-6954 .withMetadataIndexColumnStats( - !keyGenProperties.getString(PARTITIONPATH_FIELD_NAME.key(), "").isEmpty()) + !keyGenProperties + .getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "") + .isEmpty()) .withColumnStatsIndexForColumns(getColumnsFromSchema(schema)) .build(); Properties lockProperties = new Properties(); @@ -645,7 +650,7 @@ private GenericRecord generateGenericRecord( value = System.currentTimeMillis(); } else if (fieldName.equals("level")) { // a simple string field to be used for basic partitioning if required - value = LEVEL_VALUES.get(RANDOM.nextInt(LEVEL_VALUES.size())); + value = GenericTable.LEVEL_VALUES.get(RANDOM.nextInt(GenericTable.LEVEL_VALUES.size())); } else if (fieldName.equals("severity")) { // a bounded integer field to be used for partition testing value = RANDOM.nextBoolean() ? null : RANDOM.nextInt(3); @@ -778,7 +783,7 @@ public void upsertRows(List> records) { @Override public List> insertRecordsForSpecialPartition(int numRecords) { - return insertRecords(numRecords, SPECIAL_PARTITION_VALUE, true); + return insertRecords(numRecords, GenericTable.SPECIAL_PARTITION_VALUE, true); } @Override diff --git a/xtable-core/src/test/java/org/apache/xtable/hudi/TestBaseFileUpdatesExtractor.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestBaseFileUpdatesExtractor.java similarity index 95% rename from xtable-core/src/test/java/org/apache/xtable/hudi/TestBaseFileUpdatesExtractor.java rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/TestBaseFileUpdatesExtractor.java index 8f3b3f7e1..64364a2a8 100644 --- a/xtable-core/src/test/java/org/apache/xtable/hudi/TestBaseFileUpdatesExtractor.java +++ b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestBaseFileUpdatesExtractor.java @@ -18,10 +18,6 @@ package org.apache.xtable.hudi; -import static org.apache.xtable.hudi.HudiTestUtil.createWriteStatus; -import static org.apache.xtable.hudi.HudiTestUtil.getHoodieWriteConfig; -import static org.apache.xtable.hudi.HudiTestUtil.initTableAndGetMetaClient; -import static org.apache.xtable.testutil.ColumnStatMapUtil.getColumnStats; import static org.junit.jupiter.api.Assertions.assertEquals; import java.io.IOException; @@ -98,13 +94,15 @@ void convertDiff() { String fileName2 = "file2.parquet"; InternalDataFile addedFile2 = createFile( - String.format("%s/%s/%s", tableBasePath, partitionPath2, fileName2), getColumnStats()); + String.format("%s/%s/%s", tableBasePath, partitionPath2, fileName2), + ColumnStatMapUtil.getColumnStats()); // remove files 3 files from two different partitions String fileName3 = "file3.parquet"; InternalDataFile removedFile1 = createFile( - String.format("%s/%s/%s", tableBasePath, partitionPath1, fileName3), getColumnStats()); + String.format("%s/%s/%s", tableBasePath, partitionPath1, fileName3), + ColumnStatMapUtil.getColumnStats()); // create file that matches hudi format to mimic that a file create by hudi is now being removed // by another system String fileIdForFile4 = "d1cf0980-445c-4c74-bdeb-b7e5d18779f5-0"; @@ -168,13 +166,15 @@ void extractSnapshotChanges_emptyTargetTable() throws IOException { String fileName2 = "file2.parquet"; InternalDataFile addedFile2 = createFile( - String.format("%s/%s/%s", tableBasePath, partitionPath1, fileName2), getColumnStats()); + String.format("%s/%s/%s", tableBasePath, partitionPath1, fileName2), + ColumnStatMapUtil.getColumnStats()); // create file in a second partition String partitionPath2 = "partition2"; String fileName3 = "file3.parquet"; InternalDataFile addedFile3 = createFile( - String.format("%s/%s/%s", tableBasePath, partitionPath2, fileName3), getColumnStats()); + String.format("%s/%s/%s", tableBasePath, partitionPath2, fileName3), + ColumnStatMapUtil.getColumnStats()); BaseFileUpdatesExtractor extractor = BaseFileUpdatesExtractor.of(CONTEXT, new CachingPath(tableBasePath)); @@ -217,8 +217,8 @@ void extractSnapshotChanges_emptyTargetTable() throws IOException { void extractSnapshotChanges_existingPartitionedTargetTable() { String tableBasePath = tempDir.resolve(UUID.randomUUID().toString()).toString(); HoodieTableMetaClient setupMetaClient = - initTableAndGetMetaClient(tableBasePath, "partition_field"); - HoodieWriteConfig writeConfig = getHoodieWriteConfig(setupMetaClient); + HudiTestUtil.initTableAndGetMetaClient(tableBasePath, "partition_field"); + HoodieWriteConfig writeConfig = HudiTestUtil.getHoodieWriteConfig(setupMetaClient); String partitionPath1 = "partition1"; String partitionPath2 = "partition2"; @@ -264,7 +264,7 @@ void extractSnapshotChanges_existingPartitionedTargetTable() { InternalDataFile addedFile2 = createFile( String.format("%s/%s/%s", tableBasePath, partitionPath3, newFileName2), - getColumnStats()); + ColumnStatMapUtil.getColumnStats()); // InternalDataFile for one of the existing files in partition2 InternalDataFile existingFile = createFile( @@ -316,8 +316,9 @@ void extractSnapshotChanges_existingPartitionedTargetTable() { @Test void extractSnapshotChanges_existingNonPartitionedTargetTable() { String tableBasePath = tempDir.resolve(UUID.randomUUID().toString()).toString(); - HoodieTableMetaClient setupMetaClient = initTableAndGetMetaClient(tableBasePath, ""); - HoodieWriteConfig writeConfig = getHoodieWriteConfig(setupMetaClient); + HoodieTableMetaClient setupMetaClient = + HudiTestUtil.initTableAndGetMetaClient(tableBasePath, ""); + HoodieWriteConfig writeConfig = HudiTestUtil.getHoodieWriteConfig(setupMetaClient); // initialize the table with 2 files String existingFileName1 = "existing_file_1.parquet"; @@ -350,7 +351,9 @@ void extractSnapshotChanges_existingNonPartitionedTargetTable() { // create a snapshot with a new file added along with one of the existing files String newFileName1 = "new_file_1.parquet"; InternalDataFile addedFile1 = - createFile(String.format("%s/%s", tableBasePath, newFileName1), getColumnStats()); + createFile( + String.format("%s/%s", tableBasePath, newFileName1), + ColumnStatMapUtil.getColumnStats()); // InternalDataFile for one of the existing files in partition2 InternalDataFile existingFile = createFile( @@ -402,7 +405,7 @@ private WriteStatus getExpectedWriteStatus( String fileName, String partitionPath, Map> recordStats) { - return createWriteStatus( + return HudiTestUtil.createWriteStatus( fileName, partitionPath, COMMIT_TIME, RECORD_COUNT, FILE_SIZE, recordStats); } diff --git a/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiConversionSourceTarget.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiConversionSourceTarget.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiConversionSourceTarget.java rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiConversionSourceTarget.java diff --git a/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java similarity index 99% rename from xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java index 82149c8b6..d6b613092 100644 --- a/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java +++ b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java @@ -65,7 +65,6 @@ import org.apache.hudi.metadata.HoodieTableMetadata; import org.apache.xtable.GenericTable; -import org.apache.xtable.TestJavaHudiTable; import org.apache.xtable.model.schema.InternalField; import org.apache.xtable.model.schema.InternalSchema; import org.apache.xtable.model.schema.InternalType; diff --git a/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiInstantUtils.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiInstantUtils.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiInstantUtils.java rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiInstantUtils.java diff --git a/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiPartitionValuesExtractor.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiPartitionValuesExtractor.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiPartitionValuesExtractor.java rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiPartitionValuesExtractor.java diff --git a/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiSchemaExtractor.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiSchemaExtractor.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiSchemaExtractor.java rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiSchemaExtractor.java diff --git a/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiTableManager.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiTableManager.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiTableManager.java rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiTableManager.java diff --git a/xtable-core/src/test/java/org/apache/xtable/TestJavaHudiTable.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestJavaHudiTable.java similarity index 99% rename from xtable-core/src/test/java/org/apache/xtable/TestJavaHudiTable.java rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/TestJavaHudiTable.java index ce3b25bda..6a5f8a63d 100644 --- a/xtable-core/src/test/java/org/apache/xtable/TestJavaHudiTable.java +++ b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestJavaHudiTable.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.xtable; +package org.apache.xtable.hudi; import java.io.IOException; import java.io.UncheckedIOException; diff --git a/xtable-core/src/test/java/org/apache/xtable/TestSparkHudiTable.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestSparkHudiTable.java similarity index 99% rename from xtable-core/src/test/java/org/apache/xtable/TestSparkHudiTable.java rename to xtable-hudi/src/test/java/org/apache/xtable/hudi/TestSparkHudiTable.java index 79316f5d9..5f62cc631 100644 --- a/xtable-core/src/test/java/org/apache/xtable/TestSparkHudiTable.java +++ b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestSparkHudiTable.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.xtable; +package org.apache.xtable.hudi; import java.nio.file.Path; import java.time.Instant; diff --git a/xtable-iceberg/pom.xml b/xtable-iceberg/pom.xml new file mode 100644 index 000000000..0e34b6f71 --- /dev/null +++ b/xtable-iceberg/pom.xml @@ -0,0 +1,172 @@ + + + + 4.0.0 + + org.apache.xtable + xtable + 0.2.0-SNAPSHOT + + + xtable-iceberg + XTable Project Iceberg + + + + org.apache.xtable + xtable-api + ${project.version} + + + org.apache.xtable + xtable-core_${scala.binary.version} + ${project.version} + + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.module + jackson-module-scala_${scala.binary.version} + + + com.google.guava + guava + + + + + org.apache.avro + avro + + + + + org.apache.iceberg + iceberg-core + + + org.apache.iceberg + iceberg-api + + + + + org.apache.hadoop + hadoop-common + provided + + + + + org.apache.logging.log4j + log4j-api + + + org.apache.logging.log4j + log4j-1.2-api + + + + + org.apache.iceberg + iceberg-spark-runtime-${spark.version.prefix}_${scala.binary.version} + test + + + org.apache.spark + spark-core_${scala.binary.version} + provided + + + org.apache.spark + spark-sql_${scala.binary.version} + provided + + + + + org.mockito + mockito-core + test + + + + + org.junit.jupiter + junit-jupiter-api + test + + + org.junit.jupiter + junit-jupiter-params + test + + + org.junit.jupiter + junit-jupiter-engine + test + + + + + org.apache.logging.log4j + log4j-core + test + + + org.apache.logging.log4j + log4j-slf4j2-impl + test + + + + org.apache.xtable + xtable-core_${scala.binary.version} + ${project.version} + tests + test-jar + test + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + + + diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergCatalogConfig.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergCatalogConfig.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergCatalogConfig.java rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergCatalogConfig.java diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergColumnStatsConverter.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergColumnStatsConverter.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergColumnStatsConverter.java rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergColumnStatsConverter.java diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergConversionSource.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergConversionSource.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergConversionSource.java rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergConversionSource.java diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergConversionSourceProvider.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergConversionSourceProvider.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergConversionSourceProvider.java rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergConversionSourceProvider.java diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergConversionTarget.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergConversionTarget.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergConversionTarget.java rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergConversionTarget.java diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergDataFileExtractor.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergDataFileExtractor.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergDataFileExtractor.java rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergDataFileExtractor.java diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergDataFileUpdatesSync.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergDataFileUpdatesSync.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergDataFileUpdatesSync.java rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergDataFileUpdatesSync.java diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergPartitionSpecExtractor.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergPartitionSpecExtractor.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergPartitionSpecExtractor.java rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergPartitionSpecExtractor.java diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergPartitionSpecSync.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergPartitionSpecSync.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergPartitionSpecSync.java rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergPartitionSpecSync.java diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueConverter.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueConverter.java similarity index 98% rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueConverter.java rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueConverter.java index a6abd2a91..738f19b07 100644 --- a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueConverter.java +++ b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueConverter.java @@ -42,7 +42,6 @@ import org.apache.iceberg.transforms.Transforms; import org.apache.iceberg.types.Types; -import org.apache.xtable.avro.AvroSchemaConverter; import org.apache.xtable.exception.NotSupportedException; import org.apache.xtable.model.InternalTable; import org.apache.xtable.model.schema.InternalField; @@ -58,7 +57,6 @@ public class IcebergPartitionValueConverter { private static final OffsetDateTime EPOCH = Instant.ofEpochSecond(0).atOffset(ZoneOffset.UTC); private static final IcebergPartitionValueConverter INSTANCE = new IcebergPartitionValueConverter(); - private static final AvroSchemaConverter SCHEMA_CONVERTER = AvroSchemaConverter.getInstance(); private static final String DOT = "."; private static final String DOT_REPLACEMENT = "_x2E"; private static final String YEAR = "year"; diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueExtractor.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueExtractor.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueExtractor.java rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueExtractor.java diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergSchemaExtractor.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergSchemaExtractor.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergSchemaExtractor.java rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergSchemaExtractor.java diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergSchemaSync.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergSchemaSync.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergSchemaSync.java rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergSchemaSync.java diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergTableManager.java b/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergTableManager.java similarity index 100% rename from xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergTableManager.java rename to xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergTableManager.java diff --git a/xtable-core/src/main/resources/META-INF/services/org.apache.xtable.spi.sync.ConversionTarget b/xtable-iceberg/src/main/resources/services/org.apache.xtable.spi.sync.ConversionTarget similarity index 95% rename from xtable-core/src/main/resources/META-INF/services/org.apache.xtable.spi.sync.ConversionTarget rename to xtable-iceberg/src/main/resources/services/org.apache.xtable.spi.sync.ConversionTarget index a00b41e08..f71a3991c 100644 --- a/xtable-core/src/main/resources/META-INF/services/org.apache.xtable.spi.sync.ConversionTarget +++ b/xtable-iceberg/src/main/resources/services/org.apache.xtable.spi.sync.ConversionTarget @@ -17,5 +17,4 @@ ########################################################################## org.apache.xtable.hudi.HudiConversionTarget -org.apache.xtable.delta.DeltaConversionTarget org.apache.xtable.iceberg.IcebergConversionTarget diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/ITIcebergConversionTargetSource.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/ITIcebergConversionTargetSource.java similarity index 99% rename from xtable-core/src/test/java/org/apache/xtable/iceberg/ITIcebergConversionTargetSource.java rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/ITIcebergConversionTargetSource.java index acd886887..ba66ddb36 100644 --- a/xtable-core/src/test/java/org/apache/xtable/iceberg/ITIcebergConversionTargetSource.java +++ b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/ITIcebergConversionTargetSource.java @@ -47,7 +47,6 @@ import org.apache.iceberg.Snapshot; import org.apache.iceberg.data.Record; -import org.apache.xtable.TestIcebergTable; import org.apache.xtable.conversion.SourceTable; import org.apache.xtable.model.CommitsBacklog; import org.apache.xtable.model.InstantsForIncrementalSync; diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/IcebergTestUtils.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/IcebergTestUtils.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/iceberg/IcebergTestUtils.java rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/IcebergTestUtils.java diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/StubCatalog.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/StubCatalog.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/iceberg/StubCatalog.java rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/StubCatalog.java diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergColumnStatsConverter.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergColumnStatsConverter.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergColumnStatsConverter.java rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergColumnStatsConverter.java diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergConversionTargetSource.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergConversionTargetSource.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergConversionTargetSource.java rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergConversionTargetSource.java diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergDataHelper.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergDataHelper.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergDataHelper.java rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergDataHelper.java diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionSpecExtractor.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionSpecExtractor.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionSpecExtractor.java rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionSpecExtractor.java diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionSpecSync.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionSpecSync.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionSpecSync.java rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionSpecSync.java diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionValueConverter.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionValueConverter.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionValueConverter.java rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionValueConverter.java diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergSchemaExtractor.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergSchemaExtractor.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergSchemaExtractor.java rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergSchemaExtractor.java diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergSchemaSync.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergSchemaSync.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergSchemaSync.java rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergSchemaSync.java diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergSync.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergSync.java similarity index 99% rename from xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergSync.java rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergSync.java index bd36dde91..4913979c5 100644 --- a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergSync.java +++ b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergSync.java @@ -82,7 +82,6 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Sets; -import org.apache.xtable.ITConversionController; import org.apache.xtable.conversion.TargetTable; import org.apache.xtable.model.InternalSnapshot; import org.apache.xtable.model.InternalTable; @@ -103,8 +102,8 @@ import org.apache.xtable.spi.sync.TableFormatSync; /** - * Validates that the metadata for the table is properly created/updated. {@link - * ITConversionController} validates that the table and its data can be properly read. + * Validates that the metadata for the table is properly created/updated. ITConversionController + * validates that the table and its data can be properly read. */ public class TestIcebergSync { private static final Random RANDOM = new Random(); diff --git a/xtable-core/src/test/java/org/apache/xtable/TestIcebergTable.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergTable.java similarity index 98% rename from xtable-core/src/test/java/org/apache/xtable/TestIcebergTable.java rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergTable.java index 0c8336fef..05c9c5d91 100644 --- a/xtable-core/src/test/java/org/apache/xtable/TestIcebergTable.java +++ b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergTable.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.xtable; +package org.apache.xtable.iceberg; import static org.apache.iceberg.SnapshotSummary.TOTAL_RECORDS_PROP; import static org.junit.jupiter.api.Assertions.*; @@ -65,7 +65,7 @@ import com.google.common.base.Preconditions; -import org.apache.xtable.iceberg.TestIcebergDataHelper; +import org.apache.xtable.GenericTable; @Getter public class TestIcebergTable implements GenericTable { @@ -172,7 +172,7 @@ public List insertRecordsForPartition(int numRows, String partitionValue @Override public List insertRecordsForSpecialPartition(int numRows) { - return insertRecordsForPartition(numRows, SPECIAL_PARTITION_VALUE); + return insertRecordsForPartition(numRows, GenericTable.SPECIAL_PARTITION_VALUE); } @Override @@ -241,7 +241,7 @@ public void deletePartition(String partitionValue) { @Override public void deleteSpecialPartition() { - deletePartition(SPECIAL_PARTITION_VALUE); + deletePartition(GenericTable.SPECIAL_PARTITION_VALUE); } @Override diff --git a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergTableManager.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergTableManager.java similarity index 100% rename from xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergTableManager.java rename to xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergTableManager.java diff --git a/xtable-integration-tests/pom.xml b/xtable-integration-tests/pom.xml new file mode 100644 index 000000000..3b1d66afa --- /dev/null +++ b/xtable-integration-tests/pom.xml @@ -0,0 +1,247 @@ + + + + 4.0.0 + + org.apache.xtable + xtable + 0.2.0-SNAPSHOT + + + xtable-integration-tests + XTable Project Integration Test Suite + + + + org.apache.xtable + xtable-api + ${project.version} + + + org.apache.xtable + xtable-core_${scala.binary.version} + ${project.version} + + + org.apache.xtable + xtable-delta_${scala.binary.version} + ${project.version} + + + org.apache.xtable + xtable-iceberg + ${project.version} + + + org.apache.xtable + xtable-hudi + ${project.version} + + + org.apache.xtable + xtable-hudi-support-utils + ${project.version} + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.module + jackson-module-scala_${scala.binary.version} + + + com.google.guava + guava + + + + + org.apache.avro + avro + + + + + org.scala-lang + scala-library + + + + + org.apache.hudi + hudi-spark${spark.version.prefix}-bundle_${scala.binary.version} + test + + + org.apache.hudi + hudi-common + + + org.apache.hudi + hudi-java-client + + + + + org.apache.iceberg + iceberg-core + + + org.apache.iceberg + iceberg-api + + + + + io.delta + delta-core_${scala.binary.version} + + + io.delta + delta-standalone_${scala.binary.version} + test + + + + + org.apache.hadoop + hadoop-common + provided + + + + + org.apache.logging.log4j + log4j-api + + + org.apache.logging.log4j + log4j-1.2-api + + + + + org.apache.iceberg + iceberg-spark-runtime-${spark.version.prefix}_${scala.binary.version} + test + + + org.apache.spark + spark-core_${scala.binary.version} + provided + + + org.apache.spark + spark-sql_${scala.binary.version} + provided + + + + + org.mockito + mockito-core + test + + + + + org.junit.jupiter + junit-jupiter-api + test + + + org.junit.jupiter + junit-jupiter-params + test + + + org.junit.jupiter + junit-jupiter-engine + test + + + + + org.apache.logging.log4j + log4j-core + test + + + org.apache.logging.log4j + log4j-slf4j2-impl + test + + + + org.apache.xtable + xtable-core_${scala.binary.version} + ${project.version} + tests + test-jar + test + + + + org.apache.xtable + xtable-delta_${scala.binary.version} + ${project.version} + tests + test-jar + test + + + + org.apache.xtable + xtable-hudi + ${project.version} + tests + test-jar + test + + + + org.apache.xtable + xtable-iceberg + ${project.version} + tests + test-jar + test + + + + + + + org.apache.maven.plugins + maven-deploy-plugin + ${maven-deploy-plugin.version} + + + true + + + + + diff --git a/xtable-integration-tests/src/test/java/org/apache/xtable/GenericTableFactory.java b/xtable-integration-tests/src/test/java/org/apache/xtable/GenericTableFactory.java new file mode 100644 index 000000000..fd62578ac --- /dev/null +++ b/xtable-integration-tests/src/test/java/org/apache/xtable/GenericTableFactory.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.xtable; + +import static org.apache.xtable.model.storage.TableFormat.DELTA; +import static org.apache.xtable.model.storage.TableFormat.HUDI; +import static org.apache.xtable.model.storage.TableFormat.ICEBERG; + +import java.nio.file.Path; + +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.SparkSession; + +import org.apache.hudi.common.model.HoodieTableType; + +import org.apache.xtable.delta.TestSparkDeltaTable; +import org.apache.xtable.hudi.TestSparkHudiTable; +import org.apache.xtable.iceberg.TestIcebergTable; + +public class GenericTableFactory { + static GenericTable getInstance( + String tableName, + Path tempDir, + SparkSession sparkSession, + JavaSparkContext jsc, + String sourceFormat, + boolean isPartitioned) { + switch (sourceFormat) { + case HUDI: + return TestSparkHudiTable.forStandardSchemaAndPartitioning( + tableName, tempDir, jsc, isPartitioned); + case DELTA: + return TestSparkDeltaTable.forStandardSchemaAndPartitioning( + tableName, tempDir, sparkSession, isPartitioned ? "level" : null); + case ICEBERG: + return TestIcebergTable.forStandardSchemaAndPartitioning( + tableName, isPartitioned ? "level" : null, tempDir, jsc.hadoopConfiguration()); + default: + throw new IllegalArgumentException("Unsupported source format: " + sourceFormat); + } + } + + static GenericTable getInstanceWithAdditionalColumns( + String tableName, + Path tempDir, + SparkSession sparkSession, + JavaSparkContext jsc, + String sourceFormat, + boolean isPartitioned) { + switch (sourceFormat) { + case HUDI: + return TestSparkHudiTable.forSchemaWithAdditionalColumnsAndPartitioning( + tableName, tempDir, jsc, isPartitioned); + case DELTA: + return TestSparkDeltaTable.forSchemaWithAdditionalColumnsAndPartitioning( + tableName, tempDir, sparkSession, isPartitioned ? "level" : null); + case ICEBERG: + return TestIcebergTable.forSchemaWithAdditionalColumnsAndPartitioning( + tableName, isPartitioned ? "level" : null, tempDir, jsc.hadoopConfiguration()); + default: + throw new IllegalArgumentException("Unsupported source format: " + sourceFormat); + } + } + + static GenericTable getInstanceWithCustomPartitionConfig( + String tableName, + Path tempDir, + JavaSparkContext jsc, + String sourceFormat, + String partitionConfig) { + switch (sourceFormat) { + case HUDI: + return TestSparkHudiTable.forStandardSchema( + tableName, tempDir, jsc, partitionConfig, HoodieTableType.COPY_ON_WRITE); + default: + throw new IllegalArgumentException( + String.format( + "Unsupported source format: %s for custom partition config", sourceFormat)); + } + } + + static GenericTable getInstanceWithUUIDColumns( + String tableName, + Path tempDir, + SparkSession sparkSession, + JavaSparkContext jsc, + String sourceFormat, + boolean isPartitioned) { + switch (sourceFormat) { + case ICEBERG: + return TestIcebergTable.forSchemaWithUUIDColumns( + tableName, isPartitioned ? "level" : null, tempDir, jsc.hadoopConfiguration()); + default: + throw new IllegalArgumentException("Unsupported source format: " + sourceFormat); + } + } +} diff --git a/xtable-core/src/test/java/org/apache/xtable/ITConversionController.java b/xtable-integration-tests/src/test/java/org/apache/xtable/ITConversionController.java similarity index 84% rename from xtable-core/src/test/java/org/apache/xtable/ITConversionController.java rename to xtable-integration-tests/src/test/java/org/apache/xtable/ITConversionController.java index 3d539766a..479eea7ed 100644 --- a/xtable-core/src/test/java/org/apache/xtable/ITConversionController.java +++ b/xtable-integration-tests/src/test/java/org/apache/xtable/ITConversionController.java @@ -18,14 +18,7 @@ package org.apache.xtable; -import static org.apache.xtable.GenericTable.getTableName; -import static org.apache.xtable.hudi.HudiSourceConfig.PARTITION_FIELD_SPEC_CONFIG; import static org.apache.xtable.hudi.HudiTestUtil.PartitionConfig; -import static org.apache.xtable.model.storage.TableFormat.DELTA; -import static org.apache.xtable.model.storage.TableFormat.HUDI; -import static org.apache.xtable.model.storage.TableFormat.ICEBERG; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; import java.net.URI; import java.nio.ByteBuffer; @@ -98,7 +91,10 @@ import org.apache.xtable.conversion.TargetTable; import org.apache.xtable.delta.DeltaConversionSourceProvider; import org.apache.xtable.hudi.HudiConversionSourceProvider; +import org.apache.xtable.hudi.HudiSourceConfig; import org.apache.xtable.hudi.HudiTestUtil; +import org.apache.xtable.hudi.TestJavaHudiTable; +import org.apache.xtable.hudi.TestSparkHudiTable; import org.apache.xtable.iceberg.IcebergConversionSourceProvider; import org.apache.xtable.model.storage.TableFormat; import org.apache.xtable.model.sync.SyncMode; @@ -140,7 +136,8 @@ private static Stream testCasesWithPartitioningAndSyncModes() { private static Stream generateTestParametersForFormatsSyncModesAndPartitioning() { List arguments = new ArrayList<>(); - for (String sourceTableFormat : Arrays.asList(HUDI, DELTA, ICEBERG)) { + for (String sourceTableFormat : + Arrays.asList(TableFormat.HUDI, TableFormat.DELTA, TableFormat.ICEBERG)) { for (SyncMode syncMode : SyncMode.values()) { for (boolean isPartitioned : new boolean[] {true, false}) { arguments.add(Arguments.of(sourceTableFormat, syncMode, isPartitioned)); @@ -156,8 +153,9 @@ private static Stream generateTestParametersForUUID() { for (boolean isPartitioned : new boolean[] {true, false}) { // TODO: Add Hudi UUID support later (https://github.com/apache/incubator-xtable/issues/543) // Current spark parquet reader can not handle fix-size byte array with UUID logic type - List targetTableFormats = Arrays.asList(DELTA); - arguments.add(Arguments.of(ICEBERG, targetTableFormats, syncMode, isPartitioned)); + List targetTableFormats = Arrays.asList(TableFormat.DELTA); + arguments.add( + Arguments.of(TableFormat.ICEBERG, targetTableFormats, syncMode, isPartitioned)); } } return arguments.stream(); @@ -168,17 +166,17 @@ private static Stream testCasesWithSyncModes() { } private ConversionSourceProvider getConversionSourceProvider(String sourceTableFormat) { - if (sourceTableFormat.equalsIgnoreCase(HUDI)) { + if (sourceTableFormat.equalsIgnoreCase(TableFormat.HUDI)) { ConversionSourceProvider hudiConversionSourceProvider = new HudiConversionSourceProvider(); hudiConversionSourceProvider.init(jsc.hadoopConfiguration()); return hudiConversionSourceProvider; - } else if (sourceTableFormat.equalsIgnoreCase(DELTA)) { + } else if (sourceTableFormat.equalsIgnoreCase(TableFormat.DELTA)) { ConversionSourceProvider deltaConversionSourceProvider = new DeltaConversionSourceProvider(); deltaConversionSourceProvider.init(jsc.hadoopConfiguration()); return deltaConversionSourceProvider; - } else if (sourceTableFormat.equalsIgnoreCase(ICEBERG)) { + } else if (sourceTableFormat.equalsIgnoreCase(TableFormat.ICEBERG)) { ConversionSourceProvider icebergConversionSourceProvider = new IcebergConversionSourceProvider(); icebergConversionSourceProvider.init(jsc.hadoopConfiguration()); @@ -202,7 +200,7 @@ private ConversionSourceProvider getConversionSourceProvider(String sourceTab @MethodSource("generateTestParametersForFormatsSyncModesAndPartitioning") public void testVariousOperations( String sourceTableFormat, SyncMode syncMode, boolean isPartitioned) { - String tableName = getTableName(); + String tableName = GenericTable.getTableName(); ConversionController conversionController = new ConversionController(jsc.hadoopConfiguration()); List targetTableFormats = getOtherFormats(sourceTableFormat); String partitionConfig = null; @@ -213,7 +211,7 @@ public void testVariousOperations( getConversionSourceProvider(sourceTableFormat); List insertRecords; try (GenericTable table = - GenericTable.getInstance( + GenericTableFactory.getInstance( tableName, tempDir, sparkSession, jsc, sourceTableFormat, isPartitioned)) { insertRecords = table.insertRows(100); @@ -243,7 +241,7 @@ public void testVariousOperations( } try (GenericTable tableWithUpdatedSchema = - GenericTable.getInstanceWithAdditionalColumns( + GenericTableFactory.getInstanceWithAdditionalColumns( tableName, tempDir, sparkSession, jsc, sourceTableFormat, isPartitioned)) { ConversionConfig conversionConfig = getTableSyncConfig( @@ -291,7 +289,7 @@ public void testVariousOperationsWithUUID( List targetTableFormats, SyncMode syncMode, boolean isPartitioned) { - String tableName = getTableName(); + String tableName = GenericTable.getTableName(); ConversionController conversionController = new ConversionController(jsc.hadoopConfiguration()); String partitionConfig = null; if (isPartitioned) { @@ -301,7 +299,7 @@ public void testVariousOperationsWithUUID( getConversionSourceProvider(sourceTableFormat); List insertRecords; try (GenericTable table = - GenericTable.getInstanceWithUUIDColumns( + GenericTableFactory.getInstanceWithUUIDColumns( tableName, tempDir, sparkSession, jsc, sourceTableFormat, isPartitioned)) { insertRecords = table.insertRows(100); @@ -334,9 +332,10 @@ public void testVariousOperationsWithUUID( @MethodSource("testCasesWithPartitioningAndSyncModes") public void testConcurrentInsertWritesInSource( SyncMode syncMode, PartitionConfig partitionConfig) { - String tableName = getTableName(); - ConversionSourceProvider conversionSourceProvider = getConversionSourceProvider(HUDI); - List targetTableFormats = getOtherFormats(HUDI); + String tableName = GenericTable.getTableName(); + ConversionSourceProvider conversionSourceProvider = + getConversionSourceProvider(TableFormat.HUDI); + List targetTableFormats = getOtherFormats(TableFormat.HUDI); try (TestJavaHudiTable table = TestJavaHudiTable.forStandardSchema( tableName, tempDir, partitionConfig.getHudiConfig(), HoodieTableType.COPY_ON_WRITE)) { @@ -351,7 +350,7 @@ public void testConcurrentInsertWritesInSource( ConversionConfig conversionConfig = getTableSyncConfig( - HUDI, + TableFormat.HUDI, syncMode, tableName, table, @@ -362,10 +361,10 @@ public void testConcurrentInsertWritesInSource( new ConversionController(jsc.hadoopConfiguration()); conversionController.sync(conversionConfig, conversionSourceProvider); - checkDatasetEquivalence(HUDI, table, targetTableFormats, 50); + checkDatasetEquivalence(TableFormat.HUDI, table, targetTableFormats, 50); table.insertRecordsWithCommitAlreadyStarted(insertsForCommit1, commitInstant1, true); conversionController.sync(conversionConfig, conversionSourceProvider); - checkDatasetEquivalence(HUDI, table, targetTableFormats, 100); + checkDatasetEquivalence(TableFormat.HUDI, table, targetTableFormats, 100); } } @@ -374,9 +373,10 @@ public void testConcurrentInsertWritesInSource( public void testConcurrentInsertsAndTableServiceWrites( SyncMode syncMode, PartitionConfig partitionConfig) { HoodieTableType tableType = HoodieTableType.MERGE_ON_READ; - ConversionSourceProvider conversionSourceProvider = getConversionSourceProvider(HUDI); - List targetTableFormats = getOtherFormats(HUDI); - String tableName = getTableName(); + ConversionSourceProvider conversionSourceProvider = + getConversionSourceProvider(TableFormat.HUDI); + List targetTableFormats = getOtherFormats(TableFormat.HUDI); + String tableName = GenericTable.getTableName(); try (TestSparkHudiTable table = TestSparkHudiTable.forStandardSchema( tableName, tempDir, jsc, partitionConfig.getHudiConfig(), tableType)) { @@ -384,7 +384,7 @@ public void testConcurrentInsertsAndTableServiceWrites( ConversionConfig conversionConfig = getTableSyncConfig( - HUDI, + TableFormat.HUDI, syncMode, tableName, table, @@ -394,7 +394,7 @@ public void testConcurrentInsertsAndTableServiceWrites( ConversionController conversionController = new ConversionController(jsc.hadoopConfiguration()); conversionController.sync(conversionConfig, conversionSourceProvider); - checkDatasetEquivalence(HUDI, table, targetTableFormats, 50); + checkDatasetEquivalence(TableFormat.HUDI, table, targetTableFormats, 50); table.deleteRecords(insertedRecords1.subList(0, 20), true); // At this point table should have 30 records but only after compaction. @@ -406,26 +406,37 @@ public void testConcurrentInsertsAndTableServiceWrites( Collections.singletonMap("hoodie.datasource.query.type", "read_optimized"); // Because compaction is not completed yet and read optimized query, there are 100 records. checkDatasetEquivalence( - HUDI, table, sourceHudiOptions, targetTableFormats, Collections.emptyMap(), 100); + TableFormat.HUDI, + table, + sourceHudiOptions, + targetTableFormats, + Collections.emptyMap(), + 100); table.insertRecords(50, true); conversionController.sync(conversionConfig, conversionSourceProvider); // Because compaction is not completed yet and read optimized query, there are 150 records. checkDatasetEquivalence( - HUDI, table, sourceHudiOptions, targetTableFormats, Collections.emptyMap(), 150); + TableFormat.HUDI, + table, + sourceHudiOptions, + targetTableFormats, + Collections.emptyMap(), + 150); table.completeScheduledCompaction(scheduledCompactionInstant); conversionController.sync(conversionConfig, conversionSourceProvider); - checkDatasetEquivalence(HUDI, table, targetTableFormats, 130); + checkDatasetEquivalence(TableFormat.HUDI, table, targetTableFormats, 130); } } @ParameterizedTest - @ValueSource(strings = {HUDI, DELTA, ICEBERG}) + @ValueSource(strings = {TableFormat.HUDI, TableFormat.DELTA, TableFormat.ICEBERG}) public void testTimeTravelQueries(String sourceTableFormat) throws Exception { - String tableName = getTableName(); + String tableName = GenericTable.getTableName(); try (GenericTable table = - GenericTable.getInstance(tableName, tempDir, sparkSession, jsc, sourceTableFormat, false)) { + GenericTableFactory.getInstance( + tableName, tempDir, sparkSession, jsc, sourceTableFormat, false)) { table.insertRows(50); List targetTableFormats = getOtherFormats(sourceTableFormat); ConversionConfig conversionConfig = @@ -500,32 +511,44 @@ private static Stream provideArgsForPartitionTesting() { return Stream.of( Arguments.of( buildArgsForPartition( - HUDI, Arrays.asList(ICEBERG, DELTA), "level:SIMPLE", "level:VALUE", levelFilter)), + TableFormat.HUDI, + Arrays.asList(TableFormat.ICEBERG, TableFormat.DELTA), + "level:SIMPLE", + "level:VALUE", + levelFilter)), Arguments.of( buildArgsForPartition( - DELTA, Arrays.asList(ICEBERG, HUDI), null, "level:VALUE", levelFilter)), + TableFormat.DELTA, + Arrays.asList(TableFormat.ICEBERG, TableFormat.HUDI), + null, + "level:VALUE", + levelFilter)), Arguments.of( buildArgsForPartition( - ICEBERG, Arrays.asList(DELTA, HUDI), null, "level:VALUE", levelFilter)), + TableFormat.ICEBERG, + Arrays.asList(TableFormat.DELTA, TableFormat.HUDI), + null, + "level:VALUE", + levelFilter)), Arguments.of( // Delta Lake does not currently support nested partition columns buildArgsForPartition( - HUDI, - Arrays.asList(ICEBERG), + TableFormat.HUDI, + Arrays.asList(TableFormat.ICEBERG), "nested_record.level:SIMPLE", "nested_record.level:VALUE", nestedLevelFilter)), Arguments.of( buildArgsForPartition( - HUDI, - Arrays.asList(ICEBERG, DELTA), + TableFormat.HUDI, + Arrays.asList(TableFormat.ICEBERG, TableFormat.DELTA), "severity:SIMPLE", "severity:VALUE", severityFilter)), Arguments.of( buildArgsForPartition( - HUDI, - Arrays.asList(ICEBERG, DELTA), + TableFormat.HUDI, + Arrays.asList(TableFormat.ICEBERG, TableFormat.DELTA), "timestamp_micros_nullable_field:TIMESTAMP,level:SIMPLE", "timestamp_micros_nullable_field:DAY:yyyy/MM/dd,level:VALUE", timestampAndLevelFilter))); @@ -534,7 +557,7 @@ private static Stream provideArgsForPartitionTesting() { @ParameterizedTest @MethodSource("provideArgsForPartitionTesting") public void testPartitionedData(TableFormatPartitionDataHolder tableFormatPartitionDataHolder) { - String tableName = getTableName(); + String tableName = GenericTable.getTableName(); String sourceTableFormat = tableFormatPartitionDataHolder.getSourceTableFormat(); List targetTableFormats = tableFormatPartitionDataHolder.getTargetTableFormats(); Optional hudiPartitionConfig = tableFormatPartitionDataHolder.getHudiSourceConfig(); @@ -545,11 +568,12 @@ public void testPartitionedData(TableFormatPartitionDataHolder tableFormatPartit GenericTable table; if (hudiPartitionConfig.isPresent()) { table = - GenericTable.getInstanceWithCustomPartitionConfig( + GenericTableFactory.getInstanceWithCustomPartitionConfig( tableName, tempDir, jsc, sourceTableFormat, hudiPartitionConfig.get()); } else { table = - GenericTable.getInstance(tableName, tempDir, sparkSession, jsc, sourceTableFormat, true); + GenericTableFactory.getInstance( + tableName, tempDir, sparkSession, jsc, sourceTableFormat, true); } try (GenericTable tableToClose = table) { ConversionConfig conversionConfig = @@ -577,8 +601,9 @@ public void testPartitionedData(TableFormatPartitionDataHolder tableFormatPartit @ParameterizedTest @EnumSource(value = SyncMode.class) public void testSyncWithSingleFormat(SyncMode syncMode) { - String tableName = getTableName(); - ConversionSourceProvider conversionSourceProvider = getConversionSourceProvider(HUDI); + String tableName = GenericTable.getTableName(); + ConversionSourceProvider conversionSourceProvider = + getConversionSourceProvider(TableFormat.HUDI); try (TestJavaHudiTable table = TestJavaHudiTable.forStandardSchema( tableName, tempDir, null, HoodieTableType.COPY_ON_WRITE)) { @@ -586,42 +611,66 @@ public void testSyncWithSingleFormat(SyncMode syncMode) { ConversionConfig conversionConfigIceberg = getTableSyncConfig( - HUDI, syncMode, tableName, table, ImmutableList.of(ICEBERG), null, null); + TableFormat.HUDI, + syncMode, + tableName, + table, + ImmutableList.of(TableFormat.ICEBERG), + null, + null); ConversionConfig conversionConfigDelta = - getTableSyncConfig(HUDI, syncMode, tableName, table, ImmutableList.of(DELTA), null, null); + getTableSyncConfig( + TableFormat.HUDI, + syncMode, + tableName, + table, + ImmutableList.of(TableFormat.DELTA), + null, + null); ConversionController conversionController = new ConversionController(jsc.hadoopConfiguration()); conversionController.sync(conversionConfigIceberg, conversionSourceProvider); - checkDatasetEquivalence(HUDI, table, Collections.singletonList(ICEBERG), 100); + checkDatasetEquivalence( + TableFormat.HUDI, table, Collections.singletonList(TableFormat.ICEBERG), 100); conversionController.sync(conversionConfigDelta, conversionSourceProvider); - checkDatasetEquivalence(HUDI, table, Collections.singletonList(DELTA), 100); + checkDatasetEquivalence( + TableFormat.HUDI, table, Collections.singletonList(TableFormat.DELTA), 100); table.insertRecords(100, true); conversionController.sync(conversionConfigIceberg, conversionSourceProvider); - checkDatasetEquivalence(HUDI, table, Collections.singletonList(ICEBERG), 200); + checkDatasetEquivalence( + TableFormat.HUDI, table, Collections.singletonList(TableFormat.ICEBERG), 200); conversionController.sync(conversionConfigDelta, conversionSourceProvider); - checkDatasetEquivalence(HUDI, table, Collections.singletonList(DELTA), 200); + checkDatasetEquivalence( + TableFormat.HUDI, table, Collections.singletonList(TableFormat.DELTA), 200); } } @Test public void testOutOfSyncIncrementalSyncs() { - String tableName = getTableName(); - ConversionSourceProvider conversionSourceProvider = getConversionSourceProvider(HUDI); + String tableName = GenericTable.getTableName(); + ConversionSourceProvider conversionSourceProvider = + getConversionSourceProvider(TableFormat.HUDI); try (TestJavaHudiTable table = TestJavaHudiTable.forStandardSchema( tableName, tempDir, null, HoodieTableType.COPY_ON_WRITE)) { ConversionConfig singleTableConfig = getTableSyncConfig( - HUDI, SyncMode.INCREMENTAL, tableName, table, ImmutableList.of(ICEBERG), null, null); + TableFormat.HUDI, + SyncMode.INCREMENTAL, + tableName, + table, + ImmutableList.of(TableFormat.ICEBERG), + null, + null); ConversionConfig dualTableConfig = getTableSyncConfig( - HUDI, + TableFormat.HUDI, SyncMode.INCREMENTAL, tableName, table, - Arrays.asList(ICEBERG, DELTA), + Arrays.asList(TableFormat.ICEBERG, TableFormat.DELTA), null, null); @@ -630,12 +679,14 @@ public void testOutOfSyncIncrementalSyncs() { new ConversionController(jsc.hadoopConfiguration()); // sync iceberg only conversionController.sync(singleTableConfig, conversionSourceProvider); - checkDatasetEquivalence(HUDI, table, Collections.singletonList(ICEBERG), 50); + checkDatasetEquivalence( + TableFormat.HUDI, table, Collections.singletonList(TableFormat.ICEBERG), 50); // insert more records table.insertRecords(50, true); // iceberg will be an incremental sync and delta will need to bootstrap with snapshot sync conversionController.sync(dualTableConfig, conversionSourceProvider); - checkDatasetEquivalence(HUDI, table, Arrays.asList(ICEBERG, DELTA), 100); + checkDatasetEquivalence( + TableFormat.HUDI, table, Arrays.asList(TableFormat.ICEBERG, TableFormat.DELTA), 100); // insert more records table.insertRecords(50, true); @@ -643,20 +694,23 @@ public void testOutOfSyncIncrementalSyncs() { table.insertRecords(50, true); // incremental sync for two commits for iceberg only conversionController.sync(singleTableConfig, conversionSourceProvider); - checkDatasetEquivalence(HUDI, table, Collections.singletonList(ICEBERG), 200); + checkDatasetEquivalence( + TableFormat.HUDI, table, Collections.singletonList(TableFormat.ICEBERG), 200); // insert more records table.insertRecords(50, true); // incremental sync for one commit for iceberg and three commits for delta conversionController.sync(dualTableConfig, conversionSourceProvider); - checkDatasetEquivalence(HUDI, table, Arrays.asList(ICEBERG, DELTA), 250); + checkDatasetEquivalence( + TableFormat.HUDI, table, Arrays.asList(TableFormat.ICEBERG, TableFormat.DELTA), 250); } } @Test public void testIcebergCorruptedSnapshotRecovery() throws Exception { - String tableName = getTableName(); - ConversionSourceProvider conversionSourceProvider = getConversionSourceProvider(HUDI); + String tableName = GenericTable.getTableName(); + ConversionSourceProvider conversionSourceProvider = + getConversionSourceProvider(TableFormat.HUDI); try (TestJavaHudiTable table = TestJavaHudiTable.forStandardSchema( tableName, tempDir, null, HoodieTableType.COPY_ON_WRITE)) { @@ -665,11 +719,11 @@ public void testIcebergCorruptedSnapshotRecovery() throws Exception { new ConversionController(jsc.hadoopConfiguration()); ConversionConfig conversionConfig = getTableSyncConfig( - HUDI, + TableFormat.HUDI, SyncMode.INCREMENTAL, tableName, table, - Collections.singletonList(ICEBERG), + Collections.singletonList(TableFormat.ICEBERG), null, null); conversionController.sync(conversionConfig, conversionSourceProvider); @@ -687,24 +741,26 @@ public void testIcebergCorruptedSnapshotRecovery() throws Exception { Paths.get(URI.create(icebergTable.snapshot(previousSnapshotId).manifestListLocation()))); table.insertRows(10); conversionController.sync(conversionConfig, conversionSourceProvider); - checkDatasetEquivalence(HUDI, table, Collections.singletonList(ICEBERG), 50); + checkDatasetEquivalence( + TableFormat.HUDI, table, Collections.singletonList(TableFormat.ICEBERG), 50); } } @Test public void testMetadataRetention() throws Exception { - String tableName = getTableName(); - ConversionSourceProvider conversionSourceProvider = getConversionSourceProvider(HUDI); + String tableName = GenericTable.getTableName(); + ConversionSourceProvider conversionSourceProvider = + getConversionSourceProvider(TableFormat.HUDI); try (TestJavaHudiTable table = TestJavaHudiTable.forStandardSchema( tableName, tempDir, null, HoodieTableType.COPY_ON_WRITE)) { ConversionConfig conversionConfig = getTableSyncConfig( - HUDI, + TableFormat.HUDI, SyncMode.INCREMENTAL, tableName, table, - Arrays.asList(ICEBERG, DELTA), + Arrays.asList(TableFormat.ICEBERG, TableFormat.DELTA), null, Duration.ofHours(0)); // force cleanup ConversionController conversionController = @@ -728,7 +784,7 @@ public void testMetadataRetention() throws Exception { sparkSession .read() .format("hudi") - .options(getTimeTravelOption(HUDI, instantAfterFirstCommit)) + .options(getTimeTravelOption(TableFormat.HUDI, instantAfterFirstCommit)) .load(table.getBasePath()) .collectAsList(); Assertions.assertEquals(10, rows.size()); @@ -746,13 +802,13 @@ public void testMetadataRetention() throws Exception { private Map getTimeTravelOption(String tableFormat, Instant time) { Map options = new HashMap<>(); switch (tableFormat) { - case HUDI: + case TableFormat.HUDI: options.put("as.of.instant", DATE_FORMAT.format(time)); break; - case ICEBERG: + case TableFormat.ICEBERG: options.put("as-of-timestamp", String.valueOf(time.toEpochMilli())); break; - case DELTA: + case TableFormat.DELTA: options.put("timestampAsOf", DATE_FORMAT.format(time)); break; default: @@ -832,7 +888,7 @@ private void checkDatasetEquivalence( targetFormat -> { Map finalTargetOptions = targetOptions.getOrDefault(targetFormat, Collections.emptyMap()); - if (targetFormat.equals(HUDI)) { + if (targetFormat.equals(TableFormat.HUDI)) { finalTargetOptions = new HashMap<>(finalTargetOptions); finalTargetOptions.put(HoodieMetadataConfig.ENABLE.key(), "true"); finalTargetOptions.put( @@ -853,7 +909,7 @@ private void checkDatasetEquivalence( (format, targetRows) -> { List dataset2Rows = targetRows.selectExpr(selectColumnsArr).toJSON().collectAsList(); - assertEquals( + Assertions.assertEquals( dataset1Rows.size(), dataset2Rows.size(), String.format( @@ -861,16 +917,16 @@ private void checkDatasetEquivalence( sourceFormat, format)); // sanity check the count to ensure test is set up properly if (expectedCount != null) { - assertEquals(expectedCount, dataset1Rows.size()); + Assertions.assertEquals(expectedCount, dataset1Rows.size()); } else { // if count is not known ahead of time, ensure datasets are non-empty - assertFalse(dataset1Rows.isEmpty()); + Assertions.assertFalse(dataset1Rows.isEmpty()); } if (containsUUIDFields(dataset1Rows) && containsUUIDFields(dataset2Rows)) { compareDatasetWithUUID(dataset1Rows, dataset2Rows); } else { - assertEquals( + Assertions.assertEquals( dataset1Rows, dataset2Rows, String.format( @@ -907,7 +963,7 @@ private void compareDatasetWithUUID(List dataset1Rows, List data ByteBuffer bb = ByteBuffer.wrap(bytes); UUID uuid2 = new UUID(bb.getLong(), bb.getLong()); String uuidStr2 = uuid2.toString(); - assertEquals( + Assertions.assertEquals( uuidStr1, uuidStr2, String.format( @@ -917,7 +973,7 @@ private void compareDatasetWithUUID(List dataset1Rows, List data // check other fields ((ObjectNode) node1).remove("uuid_field"); ((ObjectNode) node2).remove("uuid_field"); - assertEquals( + Assertions.assertEquals( node1.toString(), node2.toString(), String.format( @@ -927,7 +983,7 @@ private void compareDatasetWithUUID(List dataset1Rows, List data throw new RuntimeException(e); } } else { - assertEquals( + Assertions.assertEquals( row1, row2, String.format( @@ -995,7 +1051,7 @@ private static ConversionConfig getTableSyncConfig( Duration metadataRetention) { Properties sourceProperties = new Properties(); if (partitionConfig != null) { - sourceProperties.put(PARTITION_FIELD_SPEC_CONFIG, partitionConfig); + sourceProperties.put(HudiSourceConfig.PARTITION_FIELD_SPEC_CONFIG, partitionConfig); } SourceTable sourceTable = SourceTable.builder() diff --git a/xtable-core/src/test/java/org/apache/xtable/loadtest/LoadTest.java b/xtable-integration-tests/src/test/java/org/apache/xtable/loadtest/LoadTest.java similarity index 99% rename from xtable-core/src/test/java/org/apache/xtable/loadtest/LoadTest.java rename to xtable-integration-tests/src/test/java/org/apache/xtable/loadtest/LoadTest.java index 341b2cb02..63c83bda7 100644 --- a/xtable-core/src/test/java/org/apache/xtable/loadtest/LoadTest.java +++ b/xtable-integration-tests/src/test/java/org/apache/xtable/loadtest/LoadTest.java @@ -38,13 +38,13 @@ import org.apache.hudi.config.HoodieArchivalConfig; import org.apache.xtable.GenericTable; -import org.apache.xtable.TestJavaHudiTable; import org.apache.xtable.conversion.ConversionConfig; import org.apache.xtable.conversion.ConversionController; import org.apache.xtable.conversion.ConversionSourceProvider; import org.apache.xtable.conversion.SourceTable; import org.apache.xtable.conversion.TargetTable; import org.apache.xtable.hudi.HudiConversionSourceProvider; +import org.apache.xtable.hudi.TestJavaHudiTable; import org.apache.xtable.model.storage.TableFormat; import org.apache.xtable.model.sync.SyncMode; diff --git a/xtable-utilities/pom.xml b/xtable-utilities/pom.xml index 8191af3c0..27aed333f 100644 --- a/xtable-utilities/pom.xml +++ b/xtable-utilities/pom.xml @@ -35,6 +35,12 @@ ${project.version} + + org.apache.xtable + xtable-iceberg + ${project.version} + + commons-cli diff --git a/xtable-utilities/src/main/java/org/apache/xtable/utilities/RunSync.java b/xtable-utilities/src/main/java/org/apache/xtable/utilities/RunSync.java index c84753de5..32f496669 100644 --- a/xtable-utilities/src/main/java/org/apache/xtable/utilities/RunSync.java +++ b/xtable-utilities/src/main/java/org/apache/xtable/utilities/RunSync.java @@ -51,7 +51,6 @@ import org.apache.xtable.conversion.ConversionSourceProvider; import org.apache.xtable.conversion.SourceTable; import org.apache.xtable.conversion.TargetTable; -import org.apache.xtable.hudi.HudiSourceConfig; import org.apache.xtable.iceberg.IcebergCatalogConfig; import org.apache.xtable.model.storage.TableFormat; import org.apache.xtable.model.sync.SyncMode; @@ -153,7 +152,7 @@ public static void main(String[] args) throws IOException { Properties sourceProperties = new Properties(); if (table.getPartitionSpec() != null) { sourceProperties.put( - HudiSourceConfig.PARTITION_FIELD_SPEC_CONFIG, table.getPartitionSpec()); + "xtable.hudi.source.partition_field_spec_config", table.getPartitionSpec()); } SourceTable sourceTable = SourceTable.builder() From f080876d668ed482975d80303070f95ef7d510db Mon Sep 17 00:00:00 2001 From: Timothy Brown Date: Mon, 6 Jan 2025 13:47:54 -0600 Subject: [PATCH 02/12] move conversion factory test, fix services file for iceberg --- .../services/org.apache.xtable.spi.sync.ConversionTarget | 1 - .../apache/xtable/conversion/TestConversionTargetFactory.java | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) rename xtable-iceberg/src/main/resources/{ => META-INF}/services/org.apache.xtable.spi.sync.ConversionTarget (95%) rename {xtable-core => xtable-integration-tests}/src/test/java/org/apache/xtable/conversion/TestConversionTargetFactory.java (96%) diff --git a/xtable-iceberg/src/main/resources/services/org.apache.xtable.spi.sync.ConversionTarget b/xtable-iceberg/src/main/resources/META-INF/services/org.apache.xtable.spi.sync.ConversionTarget similarity index 95% rename from xtable-iceberg/src/main/resources/services/org.apache.xtable.spi.sync.ConversionTarget rename to xtable-iceberg/src/main/resources/META-INF/services/org.apache.xtable.spi.sync.ConversionTarget index f71a3991c..c876b47ac 100644 --- a/xtable-iceberg/src/main/resources/services/org.apache.xtable.spi.sync.ConversionTarget +++ b/xtable-iceberg/src/main/resources/META-INF/services/org.apache.xtable.spi.sync.ConversionTarget @@ -16,5 +16,4 @@ # limitations under the License. ########################################################################## -org.apache.xtable.hudi.HudiConversionTarget org.apache.xtable.iceberg.IcebergConversionTarget diff --git a/xtable-core/src/test/java/org/apache/xtable/conversion/TestConversionTargetFactory.java b/xtable-integration-tests/src/test/java/org/apache/xtable/conversion/TestConversionTargetFactory.java similarity index 96% rename from xtable-core/src/test/java/org/apache/xtable/conversion/TestConversionTargetFactory.java rename to xtable-integration-tests/src/test/java/org/apache/xtable/conversion/TestConversionTargetFactory.java index cb5efacbc..99cef9991 100644 --- a/xtable-core/src/test/java/org/apache/xtable/conversion/TestConversionTargetFactory.java +++ b/xtable-integration-tests/src/test/java/org/apache/xtable/conversion/TestConversionTargetFactory.java @@ -32,7 +32,9 @@ import org.apache.xtable.model.storage.TableFormat; import org.apache.xtable.spi.sync.ConversionTarget; -@Disabled("TODO - figure out what to do with these cases") +/** + * This test is not in xtable-core because we want to test that the service loader can detect the implementations from the other modules. + */ public class TestConversionTargetFactory { @Test From 8c55ecd2857d6e4daed982a664465213a6acb542 Mon Sep 17 00:00:00 2001 From: Timothy Brown Date: Mon, 6 Jan 2025 14:19:15 -0600 Subject: [PATCH 03/12] fix location of resources, minor build updates for ordering --- .github/workflows/mvn-ci-build.yml | 2 +- pom.xml | 7 + ...rg.apache.xtable.spi.sync.ConversionTarget | 0 .../test/resources/schemas/basic_schema.avsc | 197 ++++++++++++++++++ .../xtable/iceberg/TestIcebergDataHelper.java | 2 +- .../test/resources/schemas/catalog_sales.json | 40 ++++ 6 files changed, 246 insertions(+), 2 deletions(-) rename xtable-hudi/src/main/resources/{ => META-INF}/services/org.apache.xtable.spi.sync.ConversionTarget (100%) create mode 100644 xtable-hudi/src/test/resources/schemas/basic_schema.avsc create mode 100644 xtable-iceberg/src/test/resources/schemas/catalog_sales.json diff --git a/.github/workflows/mvn-ci-build.yml b/.github/workflows/mvn-ci-build.yml index c9337e81a..5320647cc 100644 --- a/.github/workflows/mvn-ci-build.yml +++ b/.github/workflows/mvn-ci-build.yml @@ -44,4 +44,4 @@ jobs: cache: maven - name: Build all module with Maven - run: mvn clean install -ntp -B + run: mvn clean install -ntp -B -T 2C diff --git a/pom.xml b/pom.xml index 360f696a2..adf958b02 100644 --- a/pom.xml +++ b/pom.xml @@ -651,6 +651,13 @@ org.apache.maven.plugins maven-surefire-plugin + + + + test + + + ${skipUTs} true diff --git a/xtable-hudi/src/main/resources/services/org.apache.xtable.spi.sync.ConversionTarget b/xtable-hudi/src/main/resources/META-INF/services/org.apache.xtable.spi.sync.ConversionTarget similarity index 100% rename from xtable-hudi/src/main/resources/services/org.apache.xtable.spi.sync.ConversionTarget rename to xtable-hudi/src/main/resources/META-INF/services/org.apache.xtable.spi.sync.ConversionTarget diff --git a/xtable-hudi/src/test/resources/schemas/basic_schema.avsc b/xtable-hudi/src/test/resources/schemas/basic_schema.avsc new file mode 100644 index 000000000..613094c12 --- /dev/null +++ b/xtable-hudi/src/test/resources/schemas/basic_schema.avsc @@ -0,0 +1,197 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +{ + "type": "record", + "name": "Sample", + "namespace": "test", + "fields": [ + { + "name": "key", + "type": "string" + }, + { + "name": "ts", + "type": "long" + }, + { + "name": "level", + "type": "string" + }, + { + "name": "severity", + "type": ["null", "int"], + "default": null + }, + { + "name": "double_field", + "type": "double", + "default": 0.0 + }, + { + "name": "float_field", + "type": "float", + "default": 0.0 + }, + { + "name": "int_field", + "type": "int", + "default": 0 + }, + { + "name": "long_field", + "type": "long", + "default": 0 + }, + { + "name": "boolean_field", + "type": "boolean", + "default": false + }, + { + "name": "string_field", + "type": { + "type": "string", + "avro.java.string": "String" + }, + "default": "" + }, + { + "name": "bytes_field", + "type": "bytes", + "default": "" + }, + { + "name": "decimal_field", + "type": { + "type": "fixed", + "name": "decimal_field_testing", + "size": 10, + "logicalType": "decimal", + "precision": 20, + "scale": 2 + }, + "default": "\u0000" + }, + { + "name": "nested_record", + "type": [ + "null", + { + "type": "record", + "name": "Nested", + "namespace": "test", + "fields": [ + { + "name": "nested_int", + "type": "int", + "default": 0 + }, + { + "name": "level", + "type": "string" + } + ] + } + ], + "default": null + }, + { + "name":"nullable_map_field", + "type":["null", { + "type":"map", + "values": "Nested" + }], + "default": null + }, + { + "name": "array_field", + "type": { + "type": "array", + "items": "Nested" + }, + "default": [] + }, + { + "name": "enum_field", + "type": { + "type": "enum", + "name": "SampleEnum", + "symbols": [ + "FIRST", + "SECOND" + ] + }, + "default": "FIRST" + }, + { + "name": "date_nullable_field", + "type": [ + "null", + { + "type": "int", + "logicalType": "date" + } + ], + "default": null + }, + { + "name": "timestamp_millis_nullable_field", + "type": [ + "null", + { + "type": "long", + "logicalType": "timestamp-millis" + } + ], + "default": null + }, + { + "name": "timestamp_micros_nullable_field", + "type": [ + "null", + { + "type": "long", + "logicalType": "timestamp-micros" + } + ], + "default": null + }, + { + "name": "timestamp_local_millis_nullable_field", + "type": [ + "null", + { + "type": "long", + "logicalType": "local-timestamp-millis" + } + ], + "default": null + }, + { + "name": "timestamp_local_micros_nullable_field", + "type": [ + "null", + { + "type": "long", + "logicalType": "local-timestamp-micros" + } + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergDataHelper.java b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergDataHelper.java index d90ba169f..247b2a0fd 100644 --- a/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergDataHelper.java +++ b/xtable-iceberg/src/test/java/org/apache/xtable/iceberg/TestIcebergDataHelper.java @@ -126,7 +126,7 @@ public class TestIcebergDataHelper { String recordKeyField; List partitionFieldNames; - public static enum SchemaType { + public enum SchemaType { BASIC, COMMON, COMMON_WITH_ADDITIONAL_COLUMNS, diff --git a/xtable-iceberg/src/test/resources/schemas/catalog_sales.json b/xtable-iceberg/src/test/resources/schemas/catalog_sales.json new file mode 100644 index 000000000..1e4e59b05 --- /dev/null +++ b/xtable-iceberg/src/test/resources/schemas/catalog_sales.json @@ -0,0 +1,40 @@ +{ + "type" : "struct", + "schema-id" : 0, + "fields" : [ { + "id" : 1, + "name" : "cs_sold_time_sk", + "required" : false, + "type" : "int" + }, { + "id" : 2, + "name" : "cs_ship_date_sk", + "required" : false, + "type" : "int" + }, { + "id" : 3, + "name" : "cs_order_number", + "required" : false, + "type" : "long" + }, { + "id" : 4, + "name" : "cs_quantity", + "required" : false, + "type" : "int" + }, { + "id" : 5, + "name" : "cs_wholesale_cost", + "required" : false, + "type" : "decimal(7, 2)" + }, { + "id" : 6, + "name" : "cs_list_price", + "required" : false, + "type" : "decimal(7, 2)" + }, { + "id" : 7, + "name" : "cs_sold_date_sk", + "required" : true, + "type" : "int" + } ] +} \ No newline at end of file From 53d2f1a9347300ff613627879c4f9a0346efa23b Mon Sep 17 00:00:00 2001 From: Timothy Brown Date: Mon, 6 Jan 2025 14:24:39 -0600 Subject: [PATCH 04/12] mark core,api dependencies as provided in table format modules, move resource files for tests --- .../test/resources/schemas/basic_schema.avsc | 197 ------------------ .../test/resources/schemas/catalog_sales.json | 40 ---- xtable-delta/pom.xml | 2 + xtable-hudi/pom.xml | 2 + xtable-iceberg/pom.xml | 2 + .../partition_specs/catalog_sales.json | 0 6 files changed, 6 insertions(+), 237 deletions(-) delete mode 100644 xtable-core/src/test/resources/schemas/basic_schema.avsc delete mode 100644 xtable-core/src/test/resources/schemas/catalog_sales.json rename {xtable-core => xtable-iceberg}/src/test/resources/partition_specs/catalog_sales.json (100%) diff --git a/xtable-core/src/test/resources/schemas/basic_schema.avsc b/xtable-core/src/test/resources/schemas/basic_schema.avsc deleted file mode 100644 index 613094c12..000000000 --- a/xtable-core/src/test/resources/schemas/basic_schema.avsc +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -{ - "type": "record", - "name": "Sample", - "namespace": "test", - "fields": [ - { - "name": "key", - "type": "string" - }, - { - "name": "ts", - "type": "long" - }, - { - "name": "level", - "type": "string" - }, - { - "name": "severity", - "type": ["null", "int"], - "default": null - }, - { - "name": "double_field", - "type": "double", - "default": 0.0 - }, - { - "name": "float_field", - "type": "float", - "default": 0.0 - }, - { - "name": "int_field", - "type": "int", - "default": 0 - }, - { - "name": "long_field", - "type": "long", - "default": 0 - }, - { - "name": "boolean_field", - "type": "boolean", - "default": false - }, - { - "name": "string_field", - "type": { - "type": "string", - "avro.java.string": "String" - }, - "default": "" - }, - { - "name": "bytes_field", - "type": "bytes", - "default": "" - }, - { - "name": "decimal_field", - "type": { - "type": "fixed", - "name": "decimal_field_testing", - "size": 10, - "logicalType": "decimal", - "precision": 20, - "scale": 2 - }, - "default": "\u0000" - }, - { - "name": "nested_record", - "type": [ - "null", - { - "type": "record", - "name": "Nested", - "namespace": "test", - "fields": [ - { - "name": "nested_int", - "type": "int", - "default": 0 - }, - { - "name": "level", - "type": "string" - } - ] - } - ], - "default": null - }, - { - "name":"nullable_map_field", - "type":["null", { - "type":"map", - "values": "Nested" - }], - "default": null - }, - { - "name": "array_field", - "type": { - "type": "array", - "items": "Nested" - }, - "default": [] - }, - { - "name": "enum_field", - "type": { - "type": "enum", - "name": "SampleEnum", - "symbols": [ - "FIRST", - "SECOND" - ] - }, - "default": "FIRST" - }, - { - "name": "date_nullable_field", - "type": [ - "null", - { - "type": "int", - "logicalType": "date" - } - ], - "default": null - }, - { - "name": "timestamp_millis_nullable_field", - "type": [ - "null", - { - "type": "long", - "logicalType": "timestamp-millis" - } - ], - "default": null - }, - { - "name": "timestamp_micros_nullable_field", - "type": [ - "null", - { - "type": "long", - "logicalType": "timestamp-micros" - } - ], - "default": null - }, - { - "name": "timestamp_local_millis_nullable_field", - "type": [ - "null", - { - "type": "long", - "logicalType": "local-timestamp-millis" - } - ], - "default": null - }, - { - "name": "timestamp_local_micros_nullable_field", - "type": [ - "null", - { - "type": "long", - "logicalType": "local-timestamp-micros" - } - ], - "default": null - } - ] -} \ No newline at end of file diff --git a/xtable-core/src/test/resources/schemas/catalog_sales.json b/xtable-core/src/test/resources/schemas/catalog_sales.json deleted file mode 100644 index 1e4e59b05..000000000 --- a/xtable-core/src/test/resources/schemas/catalog_sales.json +++ /dev/null @@ -1,40 +0,0 @@ -{ - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "cs_sold_time_sk", - "required" : false, - "type" : "int" - }, { - "id" : 2, - "name" : "cs_ship_date_sk", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "cs_order_number", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "cs_quantity", - "required" : false, - "type" : "int" - }, { - "id" : 5, - "name" : "cs_wholesale_cost", - "required" : false, - "type" : "decimal(7, 2)" - }, { - "id" : 6, - "name" : "cs_list_price", - "required" : false, - "type" : "decimal(7, 2)" - }, { - "id" : 7, - "name" : "cs_sold_date_sk", - "required" : true, - "type" : "int" - } ] -} \ No newline at end of file diff --git a/xtable-delta/pom.xml b/xtable-delta/pom.xml index 886f7db8b..ba2b15524 100644 --- a/xtable-delta/pom.xml +++ b/xtable-delta/pom.xml @@ -33,11 +33,13 @@ org.apache.xtable xtable-api ${project.version} + provided org.apache.xtable xtable-core_${scala.binary.version} ${project.version} + provided com.fasterxml.jackson.core diff --git a/xtable-hudi/pom.xml b/xtable-hudi/pom.xml index 89629bddf..02233c871 100644 --- a/xtable-hudi/pom.xml +++ b/xtable-hudi/pom.xml @@ -33,11 +33,13 @@ org.apache.xtable xtable-api ${project.version} + provided org.apache.xtable xtable-core_${scala.binary.version} ${project.version} + provided org.apache.xtable diff --git a/xtable-iceberg/pom.xml b/xtable-iceberg/pom.xml index 0e34b6f71..59a2dbe88 100644 --- a/xtable-iceberg/pom.xml +++ b/xtable-iceberg/pom.xml @@ -33,11 +33,13 @@ org.apache.xtable xtable-api ${project.version} + provided org.apache.xtable xtable-core_${scala.binary.version} ${project.version} + provided diff --git a/xtable-core/src/test/resources/partition_specs/catalog_sales.json b/xtable-iceberg/src/test/resources/partition_specs/catalog_sales.json similarity index 100% rename from xtable-core/src/test/resources/partition_specs/catalog_sales.json rename to xtable-iceberg/src/test/resources/partition_specs/catalog_sales.json From a3610a7609bac36ef6e325dc65c9051a4a71e4c7 Mon Sep 17 00:00:00 2001 From: Timothy Brown Date: Mon, 6 Jan 2025 14:56:37 -0600 Subject: [PATCH 05/12] spotless --- .../apache/xtable/conversion/TestConversionTargetFactory.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xtable-integration-tests/src/test/java/org/apache/xtable/conversion/TestConversionTargetFactory.java b/xtable-integration-tests/src/test/java/org/apache/xtable/conversion/TestConversionTargetFactory.java index 99cef9991..8891fefb9 100644 --- a/xtable-integration-tests/src/test/java/org/apache/xtable/conversion/TestConversionTargetFactory.java +++ b/xtable-integration-tests/src/test/java/org/apache/xtable/conversion/TestConversionTargetFactory.java @@ -25,7 +25,6 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import org.apache.hadoop.conf.Configuration; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.apache.xtable.exception.NotSupportedException; @@ -33,7 +32,8 @@ import org.apache.xtable.spi.sync.ConversionTarget; /** - * This test is not in xtable-core because we want to test that the service loader can detect the implementations from the other modules. + * This test is not in xtable-core because we want to test that the service loader can detect the + * implementations from the other modules. */ public class TestConversionTargetFactory { From 7975e44c5f10658f2a2ad4e5180ed743dc50db3f Mon Sep 17 00:00:00 2001 From: Timothy Brown Date: Mon, 6 Jan 2025 20:50:21 -0600 Subject: [PATCH 06/12] try to prevent npe in test --- .../src/test/java/org/apache/xtable/hudi/HudiTestUtil.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xtable-hudi/src/test/java/org/apache/xtable/hudi/HudiTestUtil.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/HudiTestUtil.java index c701a1d54..0a4ee282b 100644 --- a/xtable-hudi/src/test/java/org/apache/xtable/hudi/HudiTestUtil.java +++ b/xtable-hudi/src/test/java/org/apache/xtable/hudi/HudiTestUtil.java @@ -67,7 +67,7 @@ public static HoodieWriteConfig getHoodieWriteConfig(HoodieTableMetaClient metaC return getHoodieWriteConfig(metaClient, null); } - static HoodieWriteConfig getHoodieWriteConfig(HoodieTableMetaClient metaClient, Schema schema) { + synchronized static HoodieWriteConfig getHoodieWriteConfig(HoodieTableMetaClient metaClient, Schema schema) { Properties properties = new Properties(); properties.setProperty(HoodieMetadataConfig.AUTO_INITIALIZE.key(), "false"); return HoodieWriteConfig.newBuilder() From a10c49ce2ca5cffa5a24875e5cc911bcf872894b Mon Sep 17 00:00:00 2001 From: Timothy Brown Date: Mon, 6 Jan 2025 21:03:04 -0600 Subject: [PATCH 07/12] fix ordering --- .../src/test/java/org/apache/xtable/hudi/HudiTestUtil.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xtable-hudi/src/test/java/org/apache/xtable/hudi/HudiTestUtil.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/HudiTestUtil.java index 0a4ee282b..b7bccff24 100644 --- a/xtable-hudi/src/test/java/org/apache/xtable/hudi/HudiTestUtil.java +++ b/xtable-hudi/src/test/java/org/apache/xtable/hudi/HudiTestUtil.java @@ -67,7 +67,8 @@ public static HoodieWriteConfig getHoodieWriteConfig(HoodieTableMetaClient metaC return getHoodieWriteConfig(metaClient, null); } - synchronized static HoodieWriteConfig getHoodieWriteConfig(HoodieTableMetaClient metaClient, Schema schema) { + static synchronized HoodieWriteConfig getHoodieWriteConfig( + HoodieTableMetaClient metaClient, Schema schema) { Properties properties = new Properties(); properties.setProperty(HoodieMetadataConfig.AUTO_INITIALIZE.key(), "false"); return HoodieWriteConfig.newBuilder() From 6d7b0dcd4d00352e784c04ec125c4d26c957b131 Mon Sep 17 00:00:00 2001 From: Timothy Brown Date: Wed, 8 Jan 2025 10:45:21 -0600 Subject: [PATCH 08/12] remove unused dependencies, start building bundles --- pom.xml | 11 ++ xtable-core/pom.xml | 37 +----- xtable-delta/pom.xml | 86 ++++++++++++-- xtable-hudi/pom.xml | 111 ++++++++++++++---- .../hudi/TestHudiFileStatsExtractor.java | 2 - xtable-iceberg/pom.xml | 101 ++++++++++------ xtable-integration-tests/pom.xml | 109 ++++++----------- 7 files changed, 289 insertions(+), 168 deletions(-) diff --git a/pom.xml b/pom.xml index 321434905..806f37682 100644 --- a/pom.xml +++ b/pom.xml @@ -154,6 +154,11 @@ parquet-avro ${parquet.version} + + org.apache.parquet + parquet-column + ${parquet.version} + @@ -322,6 +327,12 @@ ${spark.version} provided + + org.apache.spark + spark-catalyst_${scala.binary.version} + ${spark.version} + provided + commons-cli diff --git a/xtable-core/pom.xml b/xtable-core/pom.xml index 723ddeec9..d06100dd4 100644 --- a/xtable-core/pom.xml +++ b/xtable-core/pom.xml @@ -34,33 +34,6 @@ xtable-api ${project.version} - - org.apache.xtable - xtable-hudi-support-utils - ${project.version} - - - com.fasterxml.jackson.core - jackson-core - - - com.fasterxml.jackson.core - jackson-databind - - - com.fasterxml.jackson.module - jackson-module-scala_${scala.binary.version} - - - com.google.guava - guava - - - - - org.scala-lang - scala-library - @@ -74,10 +47,6 @@ org.apache.logging.log4j log4j-api - - org.apache.logging.log4j - log4j-1.2-api - @@ -114,6 +83,12 @@ log4j-slf4j2-impl test + + + com.google.guava + guava + test + diff --git a/xtable-delta/pom.xml b/xtable-delta/pom.xml index ba2b15524..0ec5dab08 100644 --- a/xtable-delta/pom.xml +++ b/xtable-delta/pom.xml @@ -50,18 +50,18 @@ jackson-databind - com.fasterxml.jackson.module - jackson-module-scala_${scala.binary.version} + com.fasterxml.jackson.core + jackson-annotations + com.google.guava guava - - org.apache.avro - avro + org.apache.commons + commons-lang3 @@ -93,10 +93,6 @@ org.apache.logging.log4j log4j-api - - org.apache.logging.log4j - log4j-1.2-api - @@ -104,6 +100,12 @@ spark-core_${scala.binary.version} provided + + org.apache.spark + spark-catalyst_${scala.binary.version} + ${spark.version} + provided + org.apache.spark spark-sql_${scala.binary.version} @@ -169,6 +171,72 @@ + + org.apache.maven.plugins + maven-shade-plugin + + + package + + shade + + + + + + + LICENSE + NOTICE + NOTICE.txt + + + + META-INF/LICENSE + target/classes/META-INF/LICENSE + + + META-INF/NOTICE + target/classes/META-INF/NOTICE + + + + + com.fasterxml.jackson.core:jackson-core + com.fasterxml.jackson.core:jackson-databind + com.fasterxml.jackson.core:jackson-annotations + org.scala-lang:scala-library + io.delta:delta-core_${scala.binary.version} + io.delta:delta-storage + org.apache.commons:commons-lang3 + com.google.guava:guava + + + + + com.fasterxml.jackson. + org.apache.xtable.shade.com.fasterxml.jackson. + + + io.delta. + org.apache.xtable.shade.io.delta. + + + org.apache.spark.sql.delta. + org.apache.xtable.shade.org.apache.spark.sql.delta. + + + org.apache.commons. + org.apache.xtable.shade.org.apache.commons. + + + com.google.guava. + org.apache.xtable.shade.com.google.guava. + + + + + + diff --git a/xtable-hudi/pom.xml b/xtable-hudi/pom.xml index 02233c871..ebc5d3e62 100644 --- a/xtable-hudi/pom.xml +++ b/xtable-hudi/pom.xml @@ -46,18 +46,7 @@ xtable-hudi-support-utils ${project.version} - - com.fasterxml.jackson.core - jackson-core - - - com.fasterxml.jackson.core - jackson-databind - - - com.fasterxml.jackson.module - jackson-module-scala_${scala.binary.version} - + com.google.guava guava @@ -69,19 +58,27 @@ avro + + + org.apache.parquet + parquet-column + + org.apache.hudi hudi-spark${spark.version.prefix}-bundle_${scala.binary.version} test - - org.apache.hudi - hudi-common - org.apache.hudi hudi-java-client + + + org.apache.hbase + hbase-server + + @@ -96,12 +93,8 @@ org.apache.logging.log4j log4j-api - - org.apache.logging.log4j - log4j-1.2-api - - + org.apache.spark spark-core_${scala.binary.version} @@ -149,6 +142,12 @@ test + + org.apache.commons + commons-lang3 + test + + org.apache.xtable xtable-core_${scala.binary.version} @@ -172,6 +171,76 @@ + + org.apache.maven.plugins + maven-shade-plugin + + + package + + shade + + + + + + + LICENSE + NOTICE + NOTICE.txt + + + + META-INF/LICENSE + target/classes/META-INF/LICENSE + + + META-INF/NOTICE + target/classes/META-INF/NOTICE + + + + + org.apache.xtable:xtable-hudi-support-utils + com.fasterxml.jackson.core:jackson-databind + com.fasterxml.jackson.datatype:jackson-datatype-jsr310 + com.fasterxml.jackson.core:jackson-core + org.apache.parquet:parquet-column + org.apache.parquet:parquet-common + org.apache.parquet:parquet-encoding + org.apache.hudi:hudi-java-client + org.apache.hudi:hudi-client-common + org.apache.hudi:hudi-common + org.apache.avro:avro + com.google.guava:guava + + + + + org.apache.parquet. + org.apache.xtable.shade.org.apache.parquet. + + + org.apache.hudi. + org.apache.xtable.shade.org.apache.hudi. + + + org.apache.avro. + org.apache.xtable.shade.org.apache.avro. + + + com.google.guava. + org.apache.xtable.shade.com.google.guava. + + + com.fasterxml.jackson. + org.apache.xtable.shade.com.fasterxml.jackson. + + + + + + diff --git a/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java index 4958a8399..5b8ce32b1 100644 --- a/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java +++ b/xtable-hudi/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java @@ -49,7 +49,6 @@ import org.apache.parquet.avro.AvroParquetWriter; import org.apache.parquet.hadoop.ParquetWriter; import org.apache.parquet.hadoop.util.HadoopOutputFile; -import org.jetbrains.annotations.NotNull; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -447,7 +446,6 @@ private GenericRecord createRecord( return record; } - @NotNull private GenericData.Record getNestedRecord(Integer nestedIntValue) { GenericData.Record nested = new GenericData.Record(NESTED_SCHEMA); nested.put("nested_int", nestedIntValue); diff --git a/xtable-iceberg/pom.xml b/xtable-iceberg/pom.xml index 59a2dbe88..d741e1e08 100644 --- a/xtable-iceberg/pom.xml +++ b/xtable-iceberg/pom.xml @@ -42,23 +42,6 @@ provided - - com.fasterxml.jackson.core - jackson-core - - - com.fasterxml.jackson.core - jackson-databind - - - com.fasterxml.jackson.module - jackson-module-scala_${scala.binary.version} - - - com.google.guava - guava - - org.apache.avro @@ -75,38 +58,35 @@ iceberg-api - - - org.apache.hadoop - hadoop-common - provided - - org.apache.logging.log4j log4j-api + + - org.apache.logging.log4j - log4j-1.2-api + com.google.guava + guava + test - + + org.apache.hadoop + hadoop-common + provided + + + org.apache.iceberg iceberg-spark-runtime-${spark.version.prefix}_${scala.binary.version} test - org.apache.spark - spark-core_${scala.binary.version} - provided - - - org.apache.spark - spark-sql_${scala.binary.version} - provided + org.apache.hadoop + hadoop-client-runtime + test @@ -168,6 +148,57 @@ + + org.apache.maven.plugins + maven-shade-plugin + + + package + + shade + + + + + + + LICENSE + NOTICE + NOTICE.txt + + + + META-INF/LICENSE + target/classes/META-INF/LICENSE + + + META-INF/NOTICE + target/classes/META-INF/NOTICE + + + + + org.apache.iceberg:iceberg-core + org.apache.iceberg:iceberg-api + org.apache.iceberg:iceberg-common + org.apache.iceberg:iceberg-bundled-guava + org.apache.avro:avro + + + + + org.apache.iceberg. + org.apache.xtable.shade.org.apache.iceberg. + + + org.apache.avro. + org.apache.xtable.shade.org.apache.avro. + + + + + + diff --git a/xtable-integration-tests/pom.xml b/xtable-integration-tests/pom.xml index 3b1d66afa..cf960b2e6 100644 --- a/xtable-integration-tests/pom.xml +++ b/xtable-integration-tests/pom.xml @@ -29,63 +29,21 @@ XTable Project Integration Test Suite - - org.apache.xtable - xtable-api - ${project.version} - - - org.apache.xtable - xtable-core_${scala.binary.version} - ${project.version} - - - org.apache.xtable - xtable-delta_${scala.binary.version} - ${project.version} - - - org.apache.xtable - xtable-iceberg - ${project.version} - - - org.apache.xtable - xtable-hudi - ${project.version} - - - org.apache.xtable - xtable-hudi-support-utils - ${project.version} - com.fasterxml.jackson.core jackson-core + test com.fasterxml.jackson.core jackson-databind + test - - com.fasterxml.jackson.module - jackson-module-scala_${scala.binary.version} - + com.google.guava guava - - - - - org.apache.avro - avro - - - - - org.scala-lang - scala-library + test @@ -94,33 +52,15 @@ hudi-spark${spark.version.prefix}-bundle_${scala.binary.version} test - - org.apache.hudi - hudi-common - - - org.apache.hudi - hudi-java-client - - org.apache.iceberg iceberg-core + test org.apache.iceberg iceberg-api - - - - - io.delta - delta-core_${scala.binary.version} - - - io.delta - delta-standalone_${scala.binary.version} test @@ -150,18 +90,47 @@ org.apache.spark spark-core_${scala.binary.version} - provided + test org.apache.spark spark-sql_${scala.binary.version} - provided + test + + + org.apache.spark + spark-catalyst_${scala.binary.version} + test - - org.mockito - mockito-core + org.apache.xtable + xtable-api + ${project.version} + test + + + org.apache.xtable + xtable-core_${scala.binary.version} + ${project.version} + test + + + org.apache.xtable + xtable-delta_${scala.binary.version} + ${project.version} + test + + + org.apache.xtable + xtable-iceberg + ${project.version} + test + + + org.apache.xtable + xtable-hudi + ${project.version} test From 06ff4e7bf88499adfa0eab7375c16f53a5bb4c00 Mon Sep 17 00:00:00 2001 From: Timothy Brown Date: Mon, 13 Jan 2025 16:32:42 -0600 Subject: [PATCH 09/12] add no-op hbase metrics, include more dependencies in hudi jar --- pom.xml | 5 + xtable-hudi/pom.xml | 42 +++++- .../NoOpMetricsRegionServerSourceFactory.java | 129 ++++++++++++++++++ ...ionserver.MetricsRegionServerSourceFactory | 18 +++ ...ionserver.MetricsRegionServerSourceFactory | 18 +++ xtable-integration-tests/pom.xml | 15 ++ .../apache/xtable/ITConversionController.java | 4 +- .../org/apache/xtable/loadtest/LoadTest.java | 2 +- 8 files changed, 224 insertions(+), 9 deletions(-) create mode 100644 xtable-hudi/src/main/java/org/apache/xtable/hbase/NoOpMetricsRegionServerSourceFactory.java create mode 100644 xtable-hudi/src/main/resources/META-INF/services/org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactory create mode 100644 xtable-hudi/src/main/resources/META-INF/services/org.apache.xtable.shade.org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactory diff --git a/pom.xml b/pom.xml index 806f37682..eef7afd13 100644 --- a/pom.xml +++ b/pom.xml @@ -159,6 +159,11 @@ parquet-column ${parquet.version} + + org.apache.parquet + parquet-hadoop + ${parquet.version} + diff --git a/xtable-hudi/pom.xml b/xtable-hudi/pom.xml index ebc5d3e62..c5d807374 100644 --- a/xtable-hudi/pom.xml +++ b/xtable-hudi/pom.xml @@ -63,6 +63,14 @@ org.apache.parquet parquet-column + + org.apache.parquet + parquet-avro + + + org.apache.parquet + parquet-hadoop + @@ -73,12 +81,6 @@ org.apache.hudi hudi-java-client - - - org.apache.hbase - hbase-server - - @@ -181,6 +183,7 @@ shade + true @@ -205,14 +208,30 @@ com.fasterxml.jackson.core:jackson-databind com.fasterxml.jackson.datatype:jackson-datatype-jsr310 com.fasterxml.jackson.core:jackson-core + com.fasterxml.jackson.core:jackson-annotations org.apache.parquet:parquet-column + org.apache.parquet:parquet-avro org.apache.parquet:parquet-common org.apache.parquet:parquet-encoding + org.apache.parquet:parquet-hadoop + org.apache.parquet:parquet-format-structures org.apache.hudi:hudi-java-client org.apache.hudi:hudi-client-common org.apache.hudi:hudi-common org.apache.avro:avro com.google.guava:guava + + org.apache.hbase:hbase-client + org.apache.hbase:hbase-common + org.apache.hbase:hbase-hadoop-compat + org.apache.hbase:hbase-metrics + org.apache.hbase:hbase-metrics-api + org.apache.hbase:hbase-protocol + org.apache.hbase:hbase-protocol-shaded + org.apache.hbase:hbase-server + org.apache.hbase.thirdparty:hbase-shaded-miscellaneous + org.apache.hbase.thirdparty:hbase-shaded-netty + org.apache.hbase.thirdparty:hbase-shaded-protobuf @@ -223,6 +242,17 @@ org.apache.hudi. org.apache.xtable.shade.org.apache.hudi. + + org.apache.hudi.io.storage.HoodieHBaseKVComparator + + + + org.apache.hadoop.hbase. + org.apache.xtable.shade.org.apache.hadoop.hbase. + + org.apache.hadoop.hbase.CellComparator + org.apache.hadoop.hbase.CellComparatorImpl + org.apache.avro. diff --git a/xtable-hudi/src/main/java/org/apache/xtable/hbase/NoOpMetricsRegionServerSourceFactory.java b/xtable-hudi/src/main/java/org/apache/xtable/hbase/NoOpMetricsRegionServerSourceFactory.java new file mode 100644 index 000000000..b8419ccbf --- /dev/null +++ b/xtable-hudi/src/main/java/org/apache/xtable/hbase/NoOpMetricsRegionServerSourceFactory.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.xtable.hbase; + +import org.apache.hadoop.hbase.io.MetricsIOSource; +import org.apache.hadoop.hbase.io.MetricsIOWrapper; +import org.apache.hadoop.hbase.regionserver.MetricsHeapMemoryManagerSource; +import org.apache.hadoop.hbase.regionserver.MetricsRegionServerSource; +import org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactory; +import org.apache.hadoop.hbase.regionserver.MetricsRegionServerWrapper; +import org.apache.hadoop.hbase.regionserver.MetricsRegionSource; +import org.apache.hadoop.hbase.regionserver.MetricsRegionWrapper; +import org.apache.hadoop.hbase.regionserver.MetricsTableAggregateSource; +import org.apache.hadoop.hbase.regionserver.MetricsTableSource; +import org.apache.hadoop.hbase.regionserver.MetricsTableWrapperAggregate; +import org.apache.hadoop.hbase.regionserver.MetricsUserAggregateSource; +import org.apache.hadoop.hbase.regionserver.MetricsUserSource; + +/** Provides a No-Op metrics implementation for the HFile required by Hudi. */ +public class NoOpMetricsRegionServerSourceFactory implements MetricsRegionServerSourceFactory { + @Override + public MetricsRegionServerSource createServer(MetricsRegionServerWrapper regionServerWrapper) { + return null; + } + + @Override + public MetricsRegionSource createRegion(MetricsRegionWrapper wrapper) { + return null; + } + + @Override + public MetricsUserSource createUser(String shortUserName) { + return null; + } + + @Override + public MetricsUserAggregateSource getUserAggregate() { + return null; + } + + @Override + public MetricsTableSource createTable(String table, MetricsTableWrapperAggregate wrapper) { + return null; + } + + @Override + public MetricsTableAggregateSource getTableAggregate() { + return null; + } + + @Override + public MetricsHeapMemoryManagerSource getHeapMemoryManager() { + return null; + } + + @Override + public MetricsIOSource createIO(MetricsIOWrapper wrapper) { + return new NoOpMetricsIOSource(); + } + + private static class NoOpMetricsIOSource implements MetricsIOSource { + + @Override + public void updateFsReadTime(long t) {} + + @Override + public void updateFsPReadTime(long t) {} + + @Override + public void updateFsWriteTime(long t) {} + + @Override + public void init() {} + + @Override + public void setGauge(String gaugeName, long value) {} + + @Override + public void incGauge(String gaugeName, long delta) {} + + @Override + public void decGauge(String gaugeName, long delta) {} + + @Override + public void removeMetric(String key) {} + + @Override + public void incCounters(String counterName, long delta) {} + + @Override + public void updateHistogram(String name, long value) {} + + @Override + public String getMetricsContext() { + return ""; + } + + @Override + public String getMetricsDescription() { + return ""; + } + + @Override + public String getMetricsJmxContext() { + return ""; + } + + @Override + public String getMetricsName() { + return ""; + } + } +} diff --git a/xtable-hudi/src/main/resources/META-INF/services/org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactory b/xtable-hudi/src/main/resources/META-INF/services/org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactory new file mode 100644 index 000000000..dab50e041 --- /dev/null +++ b/xtable-hudi/src/main/resources/META-INF/services/org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactory @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +org.apache.xtable.hbase.NoOpMetricsRegionServerSourceFactory diff --git a/xtable-hudi/src/main/resources/META-INF/services/org.apache.xtable.shade.org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactory b/xtable-hudi/src/main/resources/META-INF/services/org.apache.xtable.shade.org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactory new file mode 100644 index 000000000..dab50e041 --- /dev/null +++ b/xtable-hudi/src/main/resources/META-INF/services/org.apache.xtable.shade.org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactory @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +org.apache.xtable.hbase.NoOpMetricsRegionServerSourceFactory diff --git a/xtable-integration-tests/pom.xml b/xtable-integration-tests/pom.xml index cf960b2e6..eb11cf1b4 100644 --- a/xtable-integration-tests/pom.xml +++ b/xtable-integration-tests/pom.xml @@ -52,6 +52,18 @@ hudi-spark${spark.version.prefix}-bundle_${scala.binary.version} test + + org.apache.hudi + hudi-java-client + + + org.apache.hbase + hbase-server + + + test + + org.apache.iceberg @@ -119,18 +131,21 @@ org.apache.xtable xtable-delta_${scala.binary.version} ${project.version} + bundled test org.apache.xtable xtable-iceberg ${project.version} + bundled test org.apache.xtable xtable-hudi ${project.version} + bundled test diff --git a/xtable-integration-tests/src/test/java/org/apache/xtable/ITConversionController.java b/xtable-integration-tests/src/test/java/org/apache/xtable/ITConversionController.java index 479eea7ed..3922b22ed 100644 --- a/xtable-integration-tests/src/test/java/org/apache/xtable/ITConversionController.java +++ b/xtable-integration-tests/src/test/java/org/apache/xtable/ITConversionController.java @@ -70,9 +70,7 @@ import org.apache.hudi.common.model.HoodieAvroPayload; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieTableType; -import org.apache.hudi.common.table.timeline.HoodieInstant; -import org.apache.iceberg.Snapshot; import org.apache.iceberg.Table; import org.apache.iceberg.hadoop.HadoopTables; @@ -98,6 +96,8 @@ import org.apache.xtable.iceberg.IcebergConversionSourceProvider; import org.apache.xtable.model.storage.TableFormat; import org.apache.xtable.model.sync.SyncMode; +import org.apache.xtable.shade.org.apache.hudi.common.table.timeline.HoodieInstant; +import org.apache.xtable.shade.org.apache.iceberg.Snapshot; public class ITConversionController { @TempDir public static Path tempDir; diff --git a/xtable-integration-tests/src/test/java/org/apache/xtable/loadtest/LoadTest.java b/xtable-integration-tests/src/test/java/org/apache/xtable/loadtest/LoadTest.java index 63c83bda7..b376d2c5a 100644 --- a/xtable-integration-tests/src/test/java/org/apache/xtable/loadtest/LoadTest.java +++ b/xtable-integration-tests/src/test/java/org/apache/xtable/loadtest/LoadTest.java @@ -34,7 +34,6 @@ import org.junit.jupiter.api.io.TempDir; import org.apache.hudi.common.model.HoodieTableType; -import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.config.HoodieArchivalConfig; import org.apache.xtable.GenericTable; @@ -47,6 +46,7 @@ import org.apache.xtable.hudi.TestJavaHudiTable; import org.apache.xtable.model.storage.TableFormat; import org.apache.xtable.model.sync.SyncMode; +import org.apache.xtable.shade.org.apache.hudi.common.table.timeline.HoodieInstant; /** * Tests that can be run manually to simulate lots of commits/partitions/files/etc. to understand From e2f50b704f4f13c0dce73c0076cbad0f553291cd Mon Sep 17 00:00:00 2001 From: Timothy Brown Date: Mon, 13 Jan 2025 20:14:09 -0600 Subject: [PATCH 10/12] add jackson-scala for delta dependency --- xtable-delta/pom.xml | 9 +++++---- xtable-hudi/pom.xml | 19 ++++++++++--------- xtable-integration-tests/pom.xml | 4 ++++ .../apache/xtable/ITConversionController.java | 2 +- .../org/apache/xtable/loadtest/LoadTest.java | 2 +- 5 files changed, 21 insertions(+), 15 deletions(-) diff --git a/xtable-delta/pom.xml b/xtable-delta/pom.xml index 0ec5dab08..9704519e5 100644 --- a/xtable-delta/pom.xml +++ b/xtable-delta/pom.xml @@ -53,6 +53,10 @@ com.fasterxml.jackson.core jackson-annotations + + com.fasterxml.jackson.module + jackson-module-scala_${scala.binary.version} + com.google.guava @@ -204,6 +208,7 @@ com.fasterxml.jackson.core:jackson-core com.fasterxml.jackson.core:jackson-databind com.fasterxml.jackson.core:jackson-annotations + com.fasterxml.jackson.module:jackson-module-scala_${scala.binary.version} org.scala-lang:scala-library io.delta:delta-core_${scala.binary.version} io.delta:delta-storage @@ -220,10 +225,6 @@ io.delta. org.apache.xtable.shade.io.delta. - - org.apache.spark.sql.delta. - org.apache.xtable.shade.org.apache.spark.sql.delta. - org.apache.commons. org.apache.xtable.shade.org.apache.commons. diff --git a/xtable-hudi/pom.xml b/xtable-hudi/pom.xml index c5d807374..e142d9862 100644 --- a/xtable-hudi/pom.xml +++ b/xtable-hudi/pom.xml @@ -232,6 +232,7 @@ org.apache.hbase.thirdparty:hbase-shaded-miscellaneous org.apache.hbase.thirdparty:hbase-shaded-netty org.apache.hbase.thirdparty:hbase-shaded-protobuf + org.apache.htrace:htrace-core4 @@ -239,24 +240,24 @@ org.apache.parquet. org.apache.xtable.shade.org.apache.parquet. - - org.apache.hudi. - org.apache.xtable.shade.org.apache.hudi. - - org.apache.hudi.io.storage.HoodieHBaseKVComparator - - org.apache.hadoop.hbase. org.apache.xtable.shade.org.apache.hadoop.hbase. + org.apache.hadoop.hbase.KeyValue$KeyComparator org.apache.hadoop.hbase.CellComparator org.apache.hadoop.hbase.CellComparatorImpl + - org.apache.avro. - org.apache.xtable.shade.org.apache.avro. + org.apache.hadoop.hbase. + org.apache.hudi.org.apache.hadoop.hbase. + + org.apache.hadoop.hbase.KeyValue$KeyComparator + org.apache.hadoop.hbase.CellComparator + org.apache.hadoop.hbase.CellComparatorImpl + com.google.guava. diff --git a/xtable-integration-tests/pom.xml b/xtable-integration-tests/pom.xml index eb11cf1b4..941687f73 100644 --- a/xtable-integration-tests/pom.xml +++ b/xtable-integration-tests/pom.xml @@ -60,6 +60,10 @@ org.apache.hbase hbase-server + + org.apache.hbase + hbase-client + test diff --git a/xtable-integration-tests/src/test/java/org/apache/xtable/ITConversionController.java b/xtable-integration-tests/src/test/java/org/apache/xtable/ITConversionController.java index 3922b22ed..a8cbd451b 100644 --- a/xtable-integration-tests/src/test/java/org/apache/xtable/ITConversionController.java +++ b/xtable-integration-tests/src/test/java/org/apache/xtable/ITConversionController.java @@ -70,6 +70,7 @@ import org.apache.hudi.common.model.HoodieAvroPayload; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieTableType; +import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.iceberg.Table; import org.apache.iceberg.hadoop.HadoopTables; @@ -96,7 +97,6 @@ import org.apache.xtable.iceberg.IcebergConversionSourceProvider; import org.apache.xtable.model.storage.TableFormat; import org.apache.xtable.model.sync.SyncMode; -import org.apache.xtable.shade.org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.xtable.shade.org.apache.iceberg.Snapshot; public class ITConversionController { diff --git a/xtable-integration-tests/src/test/java/org/apache/xtable/loadtest/LoadTest.java b/xtable-integration-tests/src/test/java/org/apache/xtable/loadtest/LoadTest.java index b376d2c5a..63c83bda7 100644 --- a/xtable-integration-tests/src/test/java/org/apache/xtable/loadtest/LoadTest.java +++ b/xtable-integration-tests/src/test/java/org/apache/xtable/loadtest/LoadTest.java @@ -34,6 +34,7 @@ import org.junit.jupiter.api.io.TempDir; import org.apache.hudi.common.model.HoodieTableType; +import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.config.HoodieArchivalConfig; import org.apache.xtable.GenericTable; @@ -46,7 +47,6 @@ import org.apache.xtable.hudi.TestJavaHudiTable; import org.apache.xtable.model.storage.TableFormat; import org.apache.xtable.model.sync.SyncMode; -import org.apache.xtable.shade.org.apache.hudi.common.table.timeline.HoodieInstant; /** * Tests that can be run manually to simulate lots of commits/partitions/files/etc. to understand From ad345848eb132429e899b13235a845bd1ebdb04a Mon Sep 17 00:00:00 2001 From: Timothy Brown Date: Wed, 15 Jan 2025 10:25:19 -0600 Subject: [PATCH 11/12] setup test framework for validating bundles --- pom.xml | 5 + .../xtable/iceberg/IcebergCatalogConfig.java | 0 xtable-hudi/pom.xml | 19 +- xtable-iceberg/pom.xml | 6 + xtable-integration-tests/pom.xml | 32 +- .../apache/xtable/ConversionTestingBase.java | 279 ++++++++++++++++++ .../org/apache/xtable/ITBundleValidation.java | 172 +++++++++++ .../apache/xtable/ITConversionController.java | 244 +-------------- xtable-utilities/pom.xml | 15 +- .../org/apache/xtable/utilities/RunSync.java | 16 +- 10 files changed, 518 insertions(+), 270 deletions(-) rename {xtable-iceberg => xtable-core}/src/main/java/org/apache/xtable/iceberg/IcebergCatalogConfig.java (100%) create mode 100644 xtable-integration-tests/src/test/java/org/apache/xtable/ConversionTestingBase.java create mode 100644 xtable-integration-tests/src/test/java/org/apache/xtable/ITBundleValidation.java diff --git a/pom.xml b/pom.xml index eef7afd13..9efd0b942 100644 --- a/pom.xml +++ b/pom.xml @@ -703,6 +703,11 @@ false -Xmx1024m 120 + + ${maven.multiModuleProjectDirectory} + ${project.version} + ${scala.binary.version} + diff --git a/xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergCatalogConfig.java b/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergCatalogConfig.java similarity index 100% rename from xtable-iceberg/src/main/java/org/apache/xtable/iceberg/IcebergCatalogConfig.java rename to xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergCatalogConfig.java diff --git a/xtable-hudi/pom.xml b/xtable-hudi/pom.xml index e142d9862..a7a1f8f71 100644 --- a/xtable-hudi/pom.xml +++ b/xtable-hudi/pom.xml @@ -82,6 +82,12 @@ org.apache.hudi hudi-java-client + + + com.esotericsoftware + kryo + runtime + @@ -224,18 +230,23 @@ org.apache.hbase:hbase-client org.apache.hbase:hbase-common org.apache.hbase:hbase-hadoop-compat - org.apache.hbase:hbase-metrics org.apache.hbase:hbase-metrics-api - org.apache.hbase:hbase-protocol org.apache.hbase:hbase-protocol-shaded org.apache.hbase:hbase-server org.apache.hbase.thirdparty:hbase-shaded-miscellaneous org.apache.hbase.thirdparty:hbase-shaded-netty org.apache.hbase.thirdparty:hbase-shaded-protobuf org.apache.htrace:htrace-core4 + com.esotericsoftware:kryo + org.openjdk.jol:jol-core + org.lz4:lz4-java + + com.esotericsoftware + org.apache.xtable.shade.com.esotericsoftware + org.apache.parquet. org.apache.xtable.shade.org.apache.parquet. @@ -267,6 +278,10 @@ com.fasterxml.jackson. org.apache.xtable.shade.com.fasterxml.jackson. + + net.jpountz. + org.apache.xtable.shade.net.jpountz. + diff --git a/xtable-iceberg/pom.xml b/xtable-iceberg/pom.xml index d741e1e08..65614e610 100644 --- a/xtable-iceberg/pom.xml +++ b/xtable-iceberg/pom.xml @@ -183,9 +183,11 @@ org.apache.iceberg:iceberg-common org.apache.iceberg:iceberg-bundled-guava org.apache.avro:avro + com.github.ben-manes.caffeine:caffeine + org.apache.iceberg. org.apache.xtable.shade.org.apache.iceberg. @@ -194,6 +196,10 @@ org.apache.avro. org.apache.xtable.shade.org.apache.avro. + + com.github.ben-manes. + org.apache.xtable.shade.com.github.ben-manes. + diff --git a/xtable-integration-tests/pom.xml b/xtable-integration-tests/pom.xml index 941687f73..12ca77cad 100644 --- a/xtable-integration-tests/pom.xml +++ b/xtable-integration-tests/pom.xml @@ -91,10 +91,12 @@ org.apache.logging.log4j log4j-api + test org.apache.logging.log4j log4j-1.2-api + test @@ -106,17 +108,17 @@ org.apache.spark spark-core_${scala.binary.version} - test + runtime org.apache.spark spark-sql_${scala.binary.version} - test + runtime org.apache.spark spark-catalyst_${scala.binary.version} - test + runtime @@ -135,21 +137,18 @@ org.apache.xtable xtable-delta_${scala.binary.version} ${project.version} - bundled test org.apache.xtable xtable-iceberg ${project.version} - bundled test org.apache.xtable xtable-hudi ${project.version} - bundled test @@ -217,6 +216,14 @@ test-jar test + + + + org.apache.xtable + xtable-utilities_${scala.binary.version} + ${project.version} + test + @@ -230,6 +237,19 @@ true + + + org.apache.maven.plugins + maven-shade-plugin + + + bundle-spark-test-jar + + spark-testing-bundle_${scala.binary.version} + + + + diff --git a/xtable-integration-tests/src/test/java/org/apache/xtable/ConversionTestingBase.java b/xtable-integration-tests/src/test/java/org/apache/xtable/ConversionTestingBase.java new file mode 100644 index 000000000..cf3fe54b5 --- /dev/null +++ b/xtable-integration-tests/src/test/java/org/apache/xtable/ConversionTestingBase.java @@ -0,0 +1,279 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.xtable; + +import java.nio.ByteBuffer; +import java.nio.file.Path; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.util.Arrays; +import java.util.Base64; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.function.Function; +import java.util.stream.Collectors; + +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.io.TempDir; + +import org.apache.hudi.client.HoodieReadClient; +import org.apache.hudi.common.config.HoodieMetadataConfig; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; + +import org.apache.xtable.hudi.HudiTestUtil; +import org.apache.xtable.model.storage.TableFormat; + +class ConversionTestingBase { + @TempDir public static Path tempDir; + protected static final DateTimeFormatter DATE_FORMAT = + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS").withZone(ZoneId.of("UTC")); + protected static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + protected static final ObjectMapper YAML_MAPPER = new ObjectMapper(new YAMLFactory()); + + protected static JavaSparkContext jsc; + protected static SparkSession sparkSession; + + @BeforeAll + public static void setupOnce() { + SparkConf sparkConf = HudiTestUtil.getSparkConf(tempDir); + sparkSession = + SparkSession.builder().config(HoodieReadClient.addHoodieSupport(sparkConf)).getOrCreate(); + sparkSession + .sparkContext() + .hadoopConfiguration() + .set("parquet.avro.write-old-list-structure", "false"); + jsc = JavaSparkContext.fromSparkContext(sparkSession.sparkContext()); + } + + @AfterAll + public static void teardown() { + if (jsc != null) { + jsc.close(); + } + if (sparkSession != null) { + sparkSession.close(); + } + } + + protected static List getOtherFormats(String sourceTableFormat) { + return Arrays.stream(TableFormat.values()) + .filter(format -> !format.equals(sourceTableFormat)) + .collect(Collectors.toList()); + } + + protected void checkDatasetEquivalenceWithFilter( + String sourceFormat, + GenericTable sourceTable, + List targetFormats, + String filter) { + checkDatasetEquivalence( + sourceFormat, + sourceTable, + Collections.emptyMap(), + targetFormats, + Collections.emptyMap(), + null, + filter); + } + + protected void checkDatasetEquivalence( + String sourceFormat, + GenericTable sourceTable, + List targetFormats, + Integer expectedCount) { + checkDatasetEquivalence( + sourceFormat, + sourceTable, + Collections.emptyMap(), + targetFormats, + Collections.emptyMap(), + expectedCount, + "1 = 1"); + } + + protected void checkDatasetEquivalence( + String sourceFormat, + GenericTable sourceTable, + Map sourceOptions, + List targetFormats, + Map> targetOptions, + Integer expectedCount) { + checkDatasetEquivalence( + sourceFormat, + sourceTable, + sourceOptions, + targetFormats, + targetOptions, + expectedCount, + "1 = 1"); + } + + protected void checkDatasetEquivalence( + String sourceFormat, + GenericTable sourceTable, + Map sourceOptions, + List targetFormats, + Map> targetOptions, + Integer expectedCount, + String filterCondition) { + Dataset sourceRows = + sparkSession + .read() + .options(sourceOptions) + .format(sourceFormat.toLowerCase()) + .load(sourceTable.getBasePath()) + .orderBy(sourceTable.getOrderByColumn()) + .filter(filterCondition); + Map> targetRowsByFormat = + targetFormats.stream() + .collect( + Collectors.toMap( + Function.identity(), + targetFormat -> { + Map finalTargetOptions = + targetOptions.getOrDefault(targetFormat, Collections.emptyMap()); + if (targetFormat.equals(TableFormat.HUDI)) { + finalTargetOptions = new HashMap<>(finalTargetOptions); + finalTargetOptions.put(HoodieMetadataConfig.ENABLE.key(), "true"); + finalTargetOptions.put( + "hoodie.datasource.read.extract.partition.values.from.path", "true"); + } + return sparkSession + .read() + .options(finalTargetOptions) + .format(targetFormat.toLowerCase()) + .load(sourceTable.getDataPath()) + .orderBy(sourceTable.getOrderByColumn()) + .filter(filterCondition); + })); + + String[] selectColumnsArr = sourceTable.getColumnsToSelect().toArray(new String[] {}); + List dataset1Rows = sourceRows.selectExpr(selectColumnsArr).toJSON().collectAsList(); + targetRowsByFormat.forEach( + (format, targetRows) -> { + List dataset2Rows = + targetRows.selectExpr(selectColumnsArr).toJSON().collectAsList(); + Assertions.assertEquals( + dataset1Rows.size(), + dataset2Rows.size(), + String.format( + "Datasets have different row counts when reading from Spark. Source: %s, Target: %s", + sourceFormat, format)); + // sanity check the count to ensure test is set up properly + if (expectedCount != null) { + Assertions.assertEquals(expectedCount, dataset1Rows.size()); + } else { + // if count is not known ahead of time, ensure datasets are non-empty + Assertions.assertFalse(dataset1Rows.isEmpty()); + } + + if (containsUUIDFields(dataset1Rows) && containsUUIDFields(dataset2Rows)) { + compareDatasetWithUUID(dataset1Rows, dataset2Rows); + } else { + Assertions.assertEquals( + dataset1Rows, + dataset2Rows, + String.format( + "Datasets are not equivalent when reading from Spark. Source: %s, Target: %s", + sourceFormat, format)); + } + }); + } + + /** + * Compares two datasets where dataset1Rows is for Iceberg and dataset2Rows is for other formats + * (such as Delta or Hudi). - For the "uuid_field", if present, the UUID from dataset1 (Iceberg) + * is compared with the Base64-encoded UUID from dataset2 (other formats), after decoding. - For + * all other fields, the values are compared directly. - If neither row contains the "uuid_field", + * the rows are compared as plain JSON strings. + * + * @param dataset1Rows List of JSON rows representing the dataset in Iceberg format (UUID is + * stored as a string). + * @param dataset2Rows List of JSON rows representing the dataset in other formats (UUID might be + * Base64-encoded). + */ + private void compareDatasetWithUUID(List dataset1Rows, List dataset2Rows) { + for (int i = 0; i < dataset1Rows.size(); i++) { + String row1 = dataset1Rows.get(i); + String row2 = dataset2Rows.get(i); + if (row1.contains("uuid_field") && row2.contains("uuid_field")) { + try { + JsonNode node1 = OBJECT_MAPPER.readTree(row1); + JsonNode node2 = OBJECT_MAPPER.readTree(row2); + + // check uuid field + String uuidStr1 = node1.get("uuid_field").asText(); + byte[] bytes = Base64.getDecoder().decode(node2.get("uuid_field").asText()); + ByteBuffer bb = ByteBuffer.wrap(bytes); + UUID uuid2 = new UUID(bb.getLong(), bb.getLong()); + String uuidStr2 = uuid2.toString(); + Assertions.assertEquals( + uuidStr1, + uuidStr2, + String.format( + "Datasets are not equivalent when reading from Spark. Source: %s, Target: %s", + uuidStr1, uuidStr2)); + + // check other fields + ((ObjectNode) node1).remove("uuid_field"); + ((ObjectNode) node2).remove("uuid_field"); + Assertions.assertEquals( + node1.toString(), + node2.toString(), + String.format( + "Datasets are not equivalent when comparing other fields. Source: %s, Target: %s", + node1, node2)); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + } else { + Assertions.assertEquals( + row1, + row2, + String.format( + "Datasets are not equivalent when reading from Spark. Source: %s, Target: %s", + row1, row2)); + } + } + } + + private boolean containsUUIDFields(List rows) { + for (String row : rows) { + if (row.contains("\"uuid_field\"")) { + return true; + } + } + return false; + } +} diff --git a/xtable-integration-tests/src/test/java/org/apache/xtable/ITBundleValidation.java b/xtable-integration-tests/src/test/java/org/apache/xtable/ITBundleValidation.java new file mode 100644 index 000000000..3afcd6981 --- /dev/null +++ b/xtable-integration-tests/src/test/java/org/apache/xtable/ITBundleValidation.java @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.xtable; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.UUID; +import java.util.stream.Stream; + +import lombok.SneakyThrows; +import lombok.extern.log4j.Log4j2; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import org.apache.xtable.model.storage.TableFormat; +import org.apache.xtable.utilities.RunSync; + +@Log4j2 +class ITBundleValidation extends ConversionTestingBase { + private static final String PROJECT_ROOT = System.getenv("ROOT_DIR"); + private static final String PROJECT_VERSION = System.getenv("PROJECT_VERSION"); + private static final String SCALA_VERSION = System.getenv("SCALA_VERSION"); + private static final String UTILITIES_JAR_PATH = + String.format( + "%s/xtable-utilities/target/xtable-utilities_%s-%s-bundled.jar", + PROJECT_ROOT, SCALA_VERSION, PROJECT_VERSION); + private static final String ICEBERG_JAR_PATH = + String.format( + "%s/xtable-iceberg/target/xtable-iceberg-%s-bundled.jar", PROJECT_ROOT, PROJECT_VERSION); + private static final String HUDI_JAR_PATH = + String.format( + "%s/xtable-hudi/target/xtable-hudi-%s-bundled.jar", PROJECT_ROOT, PROJECT_VERSION); + private static final String DELTA_JAR_PATH = + String.format( + "%s/xtable-delta/target/xtable-delta_%s-%s-bundled.jar", + PROJECT_ROOT, SCALA_VERSION, PROJECT_VERSION); + private static final String SPARK_BUNDLE_PATH = + String.format( + "%s/xtable-integration-tests/target/spark-testing-bundle_%s.jar", + PROJECT_ROOT, SCALA_VERSION); + + private static Stream generateTestParametersForFormats() { + List arguments = new ArrayList<>(); + List formats = Arrays.asList(TableFormat.HUDI, TableFormat.DELTA, TableFormat.ICEBERG); + for (String sourceTableFormat : formats) { + for (String targetTableFormat : formats) { + if (!sourceTableFormat.equals(targetTableFormat)) { + arguments.add(Arguments.of(sourceTableFormat, targetTableFormat)); + } + } + } + return arguments.stream(); + } + /* + * This test has the following steps at a high level. + * 1. Insert few records. + * 2. Upsert few records. + * 3. Delete few records. + * After each step the RunSync command is run. + */ + @ParameterizedTest + @MethodSource("generateTestParametersForFormats") + public void testConversionWithBundles(String sourceTableFormat, String targetTableFormat) { + String tableName = GenericTable.getTableName(); + List targetTableFormats = Collections.singletonList(targetTableFormat); + String partitionConfig = "level:VALUE"; + List insertRecords; + try (GenericTable table = + GenericTableFactory.getInstance( + tableName, tempDir, sparkSession, jsc, sourceTableFormat, true)) { + String configPath = + writeConfig(sourceTableFormat, targetTableFormats, table, tableName, partitionConfig); + insertRecords = table.insertRows(100); + + executeRunSync(configPath, sourceTableFormat, targetTableFormat); + checkDatasetEquivalence(sourceTableFormat, table, targetTableFormats, 100); + + // make multiple commits and then sync + table.insertRows(100); + table.upsertRows(insertRecords.subList(0, 20)); + executeRunSync(configPath, sourceTableFormat, targetTableFormat); + checkDatasetEquivalence(sourceTableFormat, table, targetTableFormats, 200); + + table.deleteRows(insertRecords.subList(30, 50)); + executeRunSync(configPath, sourceTableFormat, targetTableFormat); + checkDatasetEquivalence(sourceTableFormat, table, targetTableFormats, 180); + checkDatasetEquivalenceWithFilter( + sourceTableFormat, table, targetTableFormats, table.getFilterQuery()); + } + } + + @SneakyThrows + private String writeConfig( + String sourceFormat, + List targetFormats, + GenericTable table, + String tableName, + String partitionSpec) { + RunSync.DatasetConfig.Table tableConfig = + new RunSync.DatasetConfig.Table( + table.getBasePath(), table.getDataPath(), tableName, partitionSpec, null); + RunSync.DatasetConfig datasetConfig = + new RunSync.DatasetConfig( + sourceFormat, targetFormats, Collections.singletonList(tableConfig)); + Path configPath = tempDir.resolve("config_" + UUID.randomUUID()); + YAML_MAPPER.writeValue(configPath.toFile(), datasetConfig); + return configPath.toString(); + } + + @SneakyThrows + private void executeRunSync( + String configPath, String sourceTableFormat, String targetTableFormat) { + String classPath = + String.format( + "%s:%s:%s", + UTILITIES_JAR_PATH, + getJarsForFormat(sourceTableFormat), + getJarsForFormat(targetTableFormat)); + Process process = + new ProcessBuilder() + .command( + "java", "-cp", classPath, RunSync.class.getName(), "--datasetConfig", configPath) + .redirectErrorStream(true) + .start(); + try (BufferedReader reader = + new BufferedReader(new InputStreamReader(process.getInputStream()))) { + String line; + while ((line = reader.readLine()) != null) { + log.info("System log {}", line); + } + } + assertEquals(0, process.waitFor()); + } + + private String getJarsForFormat(String format) { + switch (format) { + case TableFormat.HUDI: + return HUDI_JAR_PATH; + case TableFormat.ICEBERG: + return ICEBERG_JAR_PATH; + case TableFormat.DELTA: + return String.format("%s:%s", DELTA_JAR_PATH, SPARK_BUNDLE_PATH); + default: + throw new UnsupportedOperationException("Unsupported format: " + format); + } + } +} diff --git a/xtable-integration-tests/src/test/java/org/apache/xtable/ITConversionController.java b/xtable-integration-tests/src/test/java/org/apache/xtable/ITConversionController.java index a8cbd451b..1654dde42 100644 --- a/xtable-integration-tests/src/test/java/org/apache/xtable/ITConversionController.java +++ b/xtable-integration-tests/src/test/java/org/apache/xtable/ITConversionController.java @@ -21,25 +21,19 @@ import static org.apache.xtable.hudi.HudiTestUtil.PartitionConfig; import java.net.URI; -import java.nio.ByteBuffer; import java.nio.file.Files; -import java.nio.file.Path; import java.nio.file.Paths; import java.time.Duration; import java.time.Instant; -import java.time.ZoneId; -import java.time.format.DateTimeFormatter; import java.time.temporal.ChronoUnit; import java.util.ArrayList; import java.util.Arrays; -import java.util.Base64; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Properties; -import java.util.UUID; import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -49,38 +43,26 @@ import lombok.Builder; import lombok.Value; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; -import org.apache.spark.sql.SparkSession; -import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.EnumSource; import org.junit.jupiter.params.provider.MethodSource; import org.junit.jupiter.params.provider.ValueSource; -import org.apache.hudi.client.HoodieReadClient; -import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.model.HoodieAvroPayload; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.table.timeline.HoodieInstant; +import org.apache.iceberg.Snapshot; import org.apache.iceberg.Table; import org.apache.iceberg.hadoop.HadoopTables; import org.apache.spark.sql.delta.DeltaLog; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.collect.ImmutableList; import org.apache.xtable.conversion.ConversionConfig; @@ -91,44 +73,13 @@ import org.apache.xtable.delta.DeltaConversionSourceProvider; import org.apache.xtable.hudi.HudiConversionSourceProvider; import org.apache.xtable.hudi.HudiSourceConfig; -import org.apache.xtable.hudi.HudiTestUtil; import org.apache.xtable.hudi.TestJavaHudiTable; import org.apache.xtable.hudi.TestSparkHudiTable; import org.apache.xtable.iceberg.IcebergConversionSourceProvider; import org.apache.xtable.model.storage.TableFormat; import org.apache.xtable.model.sync.SyncMode; -import org.apache.xtable.shade.org.apache.iceberg.Snapshot; -public class ITConversionController { - @TempDir public static Path tempDir; - private static final DateTimeFormatter DATE_FORMAT = - DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS").withZone(ZoneId.of("UTC")); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - private static JavaSparkContext jsc; - private static SparkSession sparkSession; - - @BeforeAll - public static void setupOnce() { - SparkConf sparkConf = HudiTestUtil.getSparkConf(tempDir); - sparkSession = - SparkSession.builder().config(HoodieReadClient.addHoodieSupport(sparkConf)).getOrCreate(); - sparkSession - .sparkContext() - .hadoopConfiguration() - .set("parquet.avro.write-old-list-structure", "false"); - jsc = JavaSparkContext.fromSparkContext(sparkSession.sparkContext()); - } - - @AfterAll - public static void teardown() { - if (jsc != null) { - jsc.close(); - } - if (sparkSession != null) { - sparkSession.close(); - } - } +public class ITConversionController extends ConversionTestingBase { private static Stream testCasesWithPartitioningAndSyncModes() { return addBasicPartitionCases(testCasesWithSyncModes()); @@ -493,12 +444,6 @@ public void testTimeTravelQueries(String sourceTableFormat) throws Exception { } } - private static List getOtherFormats(String sourceTableFormat) { - return Arrays.stream(TableFormat.values()) - .filter(format -> !format.equals(sourceTableFormat)) - .collect(Collectors.toList()); - } - private static Stream provideArgsForPartitionTesting() { String timestampFilter = String.format( @@ -817,191 +762,6 @@ private Map getTimeTravelOption(String tableFormat, Instant time return options; } - private void checkDatasetEquivalenceWithFilter( - String sourceFormat, - GenericTable sourceTable, - List targetFormats, - String filter) { - checkDatasetEquivalence( - sourceFormat, - sourceTable, - Collections.emptyMap(), - targetFormats, - Collections.emptyMap(), - null, - filter); - } - - private void checkDatasetEquivalence( - String sourceFormat, - GenericTable sourceTable, - List targetFormats, - Integer expectedCount) { - checkDatasetEquivalence( - sourceFormat, - sourceTable, - Collections.emptyMap(), - targetFormats, - Collections.emptyMap(), - expectedCount, - "1 = 1"); - } - - private void checkDatasetEquivalence( - String sourceFormat, - GenericTable sourceTable, - Map sourceOptions, - List targetFormats, - Map> targetOptions, - Integer expectedCount) { - checkDatasetEquivalence( - sourceFormat, - sourceTable, - sourceOptions, - targetFormats, - targetOptions, - expectedCount, - "1 = 1"); - } - - private void checkDatasetEquivalence( - String sourceFormat, - GenericTable sourceTable, - Map sourceOptions, - List targetFormats, - Map> targetOptions, - Integer expectedCount, - String filterCondition) { - Dataset sourceRows = - sparkSession - .read() - .options(sourceOptions) - .format(sourceFormat.toLowerCase()) - .load(sourceTable.getBasePath()) - .orderBy(sourceTable.getOrderByColumn()) - .filter(filterCondition); - Map> targetRowsByFormat = - targetFormats.stream() - .collect( - Collectors.toMap( - Function.identity(), - targetFormat -> { - Map finalTargetOptions = - targetOptions.getOrDefault(targetFormat, Collections.emptyMap()); - if (targetFormat.equals(TableFormat.HUDI)) { - finalTargetOptions = new HashMap<>(finalTargetOptions); - finalTargetOptions.put(HoodieMetadataConfig.ENABLE.key(), "true"); - finalTargetOptions.put( - "hoodie.datasource.read.extract.partition.values.from.path", "true"); - } - return sparkSession - .read() - .options(finalTargetOptions) - .format(targetFormat.toLowerCase()) - .load(sourceTable.getDataPath()) - .orderBy(sourceTable.getOrderByColumn()) - .filter(filterCondition); - })); - - String[] selectColumnsArr = sourceTable.getColumnsToSelect().toArray(new String[] {}); - List dataset1Rows = sourceRows.selectExpr(selectColumnsArr).toJSON().collectAsList(); - targetRowsByFormat.forEach( - (format, targetRows) -> { - List dataset2Rows = - targetRows.selectExpr(selectColumnsArr).toJSON().collectAsList(); - Assertions.assertEquals( - dataset1Rows.size(), - dataset2Rows.size(), - String.format( - "Datasets have different row counts when reading from Spark. Source: %s, Target: %s", - sourceFormat, format)); - // sanity check the count to ensure test is set up properly - if (expectedCount != null) { - Assertions.assertEquals(expectedCount, dataset1Rows.size()); - } else { - // if count is not known ahead of time, ensure datasets are non-empty - Assertions.assertFalse(dataset1Rows.isEmpty()); - } - - if (containsUUIDFields(dataset1Rows) && containsUUIDFields(dataset2Rows)) { - compareDatasetWithUUID(dataset1Rows, dataset2Rows); - } else { - Assertions.assertEquals( - dataset1Rows, - dataset2Rows, - String.format( - "Datasets are not equivalent when reading from Spark. Source: %s, Target: %s", - sourceFormat, format)); - } - }); - } - - /** - * Compares two datasets where dataset1Rows is for Iceberg and dataset2Rows is for other formats - * (such as Delta or Hudi). - For the "uuid_field", if present, the UUID from dataset1 (Iceberg) - * is compared with the Base64-encoded UUID from dataset2 (other formats), after decoding. - For - * all other fields, the values are compared directly. - If neither row contains the "uuid_field", - * the rows are compared as plain JSON strings. - * - * @param dataset1Rows List of JSON rows representing the dataset in Iceberg format (UUID is - * stored as a string). - * @param dataset2Rows List of JSON rows representing the dataset in other formats (UUID might be - * Base64-encoded). - */ - private void compareDatasetWithUUID(List dataset1Rows, List dataset2Rows) { - for (int i = 0; i < dataset1Rows.size(); i++) { - String row1 = dataset1Rows.get(i); - String row2 = dataset2Rows.get(i); - if (row1.contains("uuid_field") && row2.contains("uuid_field")) { - try { - JsonNode node1 = OBJECT_MAPPER.readTree(row1); - JsonNode node2 = OBJECT_MAPPER.readTree(row2); - - // check uuid field - String uuidStr1 = node1.get("uuid_field").asText(); - byte[] bytes = Base64.getDecoder().decode(node2.get("uuid_field").asText()); - ByteBuffer bb = ByteBuffer.wrap(bytes); - UUID uuid2 = new UUID(bb.getLong(), bb.getLong()); - String uuidStr2 = uuid2.toString(); - Assertions.assertEquals( - uuidStr1, - uuidStr2, - String.format( - "Datasets are not equivalent when reading from Spark. Source: %s, Target: %s", - uuidStr1, uuidStr2)); - - // check other fields - ((ObjectNode) node1).remove("uuid_field"); - ((ObjectNode) node2).remove("uuid_field"); - Assertions.assertEquals( - node1.toString(), - node2.toString(), - String.format( - "Datasets are not equivalent when comparing other fields. Source: %s, Target: %s", - node1, node2)); - } catch (JsonProcessingException e) { - throw new RuntimeException(e); - } - } else { - Assertions.assertEquals( - row1, - row2, - String.format( - "Datasets are not equivalent when reading from Spark. Source: %s, Target: %s", - row1, row2)); - } - } - } - - private boolean containsUUIDFields(List rows) { - for (String row : rows) { - if (row.contains("\"uuid_field\"")) { - return true; - } - } - return false; - } - private static Stream addBasicPartitionCases(Stream arguments) { // add unpartitioned and partitioned cases return arguments.flatMap( diff --git a/xtable-utilities/pom.xml b/xtable-utilities/pom.xml index 2865782cb..37e472254 100644 --- a/xtable-utilities/pom.xml +++ b/xtable-utilities/pom.xml @@ -35,12 +35,6 @@ ${project.version} - - org.apache.xtable - xtable-iceberg - ${project.version} - - commons-cli @@ -78,12 +72,12 @@ org.apache.spark spark-core_${scala.binary.version} - runtime + provided org.apache.spark spark-sql_${scala.binary.version} - runtime + provided @@ -93,11 +87,6 @@ compile - - org.apache.parquet - parquet-avro - - org.apache.hadoop diff --git a/xtable-utilities/src/main/java/org/apache/xtable/utilities/RunSync.java b/xtable-utilities/src/main/java/org/apache/xtable/utilities/RunSync.java index 32f496669..ea28c4c72 100644 --- a/xtable-utilities/src/main/java/org/apache/xtable/utilities/RunSync.java +++ b/xtable-utilities/src/main/java/org/apache/xtable/utilities/RunSync.java @@ -29,6 +29,8 @@ import java.util.stream.Collectors; import lombok.Data; +import lombok.Value; +import lombok.extern.jackson.Jacksonized; import lombok.extern.log4j.Log4j2; import org.apache.commons.cli.CommandLine; @@ -42,7 +44,6 @@ import com.fasterxml.jackson.annotation.JsonMerge; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.ObjectReader; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import com.google.common.annotations.VisibleForTesting; @@ -114,11 +115,10 @@ public static void main(String[] args) throws IOException { return; } - DatasetConfig datasetConfig = new DatasetConfig(); + DatasetConfig datasetConfig; try (InputStream inputStream = Files.newInputStream(Paths.get(cmd.getOptionValue(DATASET_CONFIG_OPTION)))) { - ObjectReader objectReader = YAML_MAPPER.readerForUpdating(datasetConfig); - objectReader.readValue(inputStream); + datasetConfig = YAML_MAPPER.readValue(inputStream, DatasetConfig.class); } byte[] customConfig = getCustomConfigurations(cmd, HADOOP_CONFIG_PATH); @@ -170,7 +170,7 @@ public static void main(String[] args) throws IOException { tableFormat -> TargetTable.builder() .name(table.getTableName()) - .basePath(table.getTableBasePath()) + .basePath(table.getTableDataPath()) .namespace( table.getNamespace() == null ? null @@ -241,7 +241,8 @@ static IcebergCatalogConfig loadIcebergCatalogConfig(byte[] customConfigs) throw : YAML_MAPPER.readValue(customConfigs, IcebergCatalogConfig.class); } - @Data + @Jacksonized + @Value public static class DatasetConfig { /** @@ -257,7 +258,8 @@ public static class DatasetConfig { /** Configuration of the dataset to sync, path, table name, etc. */ List datasets; - @Data + @Jacksonized + @Value public static class Table { /** * The base path of the table to sync. Any authentication configuration needed by HDFS client From 3403b51dd95f09646a7b9d4d320a756d21fa7473 Mon Sep 17 00:00:00 2001 From: Timothy Brown Date: Mon, 20 Jan 2025 17:33:16 -0600 Subject: [PATCH 12/12] add workaround for hudi/avro compatibility --- xtable-hudi/pom.xml | 1 + .../org/apache/avro/data/TimeConversions.java | 151 ++++++++++++++++++ xtable-integration-tests/pom.xml | 6 +- .../org/apache/xtable/ITBundleValidation.java | 4 +- xtable-utilities/pom.xml | 8 + 5 files changed, 167 insertions(+), 3 deletions(-) create mode 100644 xtable-hudi/src/main/java/org/apache/avro/data/TimeConversions.java diff --git a/xtable-hudi/pom.xml b/xtable-hudi/pom.xml index a7a1f8f71..258e52e7b 100644 --- a/xtable-hudi/pom.xml +++ b/xtable-hudi/pom.xml @@ -221,6 +221,7 @@ org.apache.parquet:parquet-encoding org.apache.parquet:parquet-hadoop org.apache.parquet:parquet-format-structures + org.apache.parquet:parquet-jackson org.apache.hudi:hudi-java-client org.apache.hudi:hudi-client-common org.apache.hudi:hudi-common diff --git a/xtable-hudi/src/main/java/org/apache/avro/data/TimeConversions.java b/xtable-hudi/src/main/java/org/apache/avro/data/TimeConversions.java new file mode 100644 index 000000000..066b117b4 --- /dev/null +++ b/xtable-hudi/src/main/java/org/apache/avro/data/TimeConversions.java @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.data; + +import org.apache.avro.Conversion; +import org.apache.avro.LogicalType; +import org.apache.avro.Schema; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; +import org.joda.time.Days; +import org.joda.time.LocalDate; +import org.joda.time.LocalTime; + +/** + * Provides compatibility across various versions of avro used in the Hudi generated jars. Without + * this, there will be unresolved dependencies at runtime. + */ +@SuppressWarnings("unused") +public class TimeConversions { + public static class DateConversion extends Conversion { + private static final LocalDate EPOCH_DATE = new LocalDate(1970, 1, 1); + + @Override + public Class getConvertedType() { + return LocalDate.class; + } + + @Override + public String getLogicalTypeName() { + return "date"; + } + + @Override + public LocalDate fromInt(Integer daysFromEpoch, Schema schema, LogicalType type) { + return EPOCH_DATE.plusDays(daysFromEpoch); + } + + @Override + public Integer toInt(LocalDate date, Schema schema, LogicalType type) { + return Days.daysBetween(EPOCH_DATE, date).getDays(); + } + } + + public static class TimeConversion extends Conversion { + @Override + public Class getConvertedType() { + return LocalTime.class; + } + + @Override + public String getLogicalTypeName() { + return "time-millis"; + } + + @Override + public LocalTime fromInt(Integer millisFromMidnight, Schema schema, LogicalType type) { + return LocalTime.fromMillisOfDay(millisFromMidnight); + } + + @Override + public Integer toInt(LocalTime time, Schema schema, LogicalType type) { + return time.millisOfDay().get(); + } + } + + public static class TimeMicrosConversion extends Conversion { + @Override + public Class getConvertedType() { + return LocalTime.class; + } + + @Override + public String getLogicalTypeName() { + return "time-micros"; + } + + @Override + public LocalTime fromLong(Long microsFromMidnight, Schema schema, LogicalType type) { + return LocalTime.fromMillisOfDay(microsFromMidnight / 1000); + } + } + + public static class LossyTimeMicrosConversion extends TimeMicrosConversion { + @Override + public Long toLong(LocalTime time, Schema schema, LogicalType type) { + return 1000 * (long) time.millisOfDay().get(); + } + } + + public static class TimestampConversion extends Conversion { + @Override + public Class getConvertedType() { + return DateTime.class; + } + + @Override + public String getLogicalTypeName() { + return "timestamp-millis"; + } + + @Override + public DateTime fromLong(Long millisFromEpoch, Schema schema, LogicalType type) { + return new DateTime(millisFromEpoch, DateTimeZone.UTC); + } + + @Override + public Long toLong(DateTime timestamp, Schema schema, LogicalType type) { + return timestamp.getMillis(); + } + } + + public static class TimestampMicrosConversion extends Conversion { + @Override + public Class getConvertedType() { + return DateTime.class; + } + + @Override + public String getLogicalTypeName() { + return "timestamp-micros"; + } + + @Override + public DateTime fromLong(Long microsFromEpoch, Schema schema, LogicalType type) { + return new DateTime(microsFromEpoch / 1000, DateTimeZone.UTC); + } + } + + public static class LossyTimestampMicrosConversion extends TimestampMicrosConversion { + @Override + public Long toLong(DateTime timestamp, Schema schema, LogicalType type) { + return 1000 * timestamp.getMillis(); + } + } +} diff --git a/xtable-integration-tests/pom.xml b/xtable-integration-tests/pom.xml index 12ca77cad..ea05ab5bb 100644 --- a/xtable-integration-tests/pom.xml +++ b/xtable-integration-tests/pom.xml @@ -243,9 +243,13 @@ maven-shade-plugin - bundle-spark-test-jar + package + + shade + spark-testing-bundle_${scala.binary.version} + false diff --git a/xtable-integration-tests/src/test/java/org/apache/xtable/ITBundleValidation.java b/xtable-integration-tests/src/test/java/org/apache/xtable/ITBundleValidation.java index 3afcd6981..0d666ecc3 100644 --- a/xtable-integration-tests/src/test/java/org/apache/xtable/ITBundleValidation.java +++ b/xtable-integration-tests/src/test/java/org/apache/xtable/ITBundleValidation.java @@ -81,7 +81,7 @@ private static Stream generateTestParametersForFormats() { * 1. Insert few records. * 2. Upsert few records. * 3. Delete few records. - * After each step the RunSync command is run. + * After each step the RunSync command is run as a separate process to validate proper dependencies are included in the bundles. */ @ParameterizedTest @MethodSource("generateTestParametersForFormats") @@ -151,7 +151,7 @@ private void executeRunSync( new BufferedReader(new InputStreamReader(process.getInputStream()))) { String line; while ((line = reader.readLine()) != null) { - log.info("System log {}", line); + log.info("Process log {}", line); } } assertEquals(0, process.waitFor()); diff --git a/xtable-utilities/pom.xml b/xtable-utilities/pom.xml index 37e472254..f3b763df2 100644 --- a/xtable-utilities/pom.xml +++ b/xtable-utilities/pom.xml @@ -81,6 +81,14 @@ + + org.apache.hadoop + hadoop-client-api + + + org.apache.hadoop + hadoop-client-runtime + org.apache.hadoop hadoop-common