apache · the-other-tim-brown · Dec 28, 2024 · Jan 6, 2025 · Jan 6, 2025 · Jan 6, 2025
diff --git a/.github/workflows/mvn-ci-build.yml b/.github/workflows/mvn-ci-build.yml
@@ -44,4 +44,4 @@ jobs:
           cache: maven
 
       - name: Build all module with Maven
-        run: mvn clean install -ntp -B
+        run: mvn clean install -ntp -B -T 2C
diff --git a/pom.xml b/pom.xml
@@ -51,6 +51,10 @@
         <module>xtable-hudi-support</module>
         <module>xtable-core</module>
         <module>xtable-utilities</module>
+        <module>xtable-delta</module>
+        <module>xtable-integration-tests</module>
+        <module>xtable-hudi</module>
+        <module>xtable-iceberg</module>
     </modules>
 
     <properties>
@@ -150,6 +154,16 @@
                 <artifactId>parquet-avro</artifactId>
                 <version>${parquet.version}</version>
             </dependency>
+            <dependency>
+                <groupId>org.apache.parquet</groupId>
+                <artifactId>parquet-column</artifactId>
+                <version>${parquet.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.parquet</groupId>
+                <artifactId>parquet-hadoop</artifactId>
+                <version>${parquet.version}</version>
+            </dependency>
 
             <!-- Logging -->
             <dependency>
@@ -318,6 +332,12 @@
                 <version>${spark.version}</version>
                 <scope>provided</scope>
             </dependency>
+            <dependency>
+                <groupId>org.apache.spark</groupId>
+                <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+                <version>${spark.version}</version>
+                <scope>provided</scope>
+            </dependency>
 
             <dependency>
                 <groupId>commons-cli</groupId>
@@ -683,6 +703,11 @@
                     <trimStackTrace>false</trimStackTrace>
                     <argLine>-Xmx1024m</argLine>
                     <forkedProcessExitTimeoutInSeconds>120</forkedProcessExitTimeoutInSeconds>
+                    <environmentVariables>
+                        <ROOT_DIR>${maven.multiModuleProjectDirectory}</ROOT_DIR>
+                        <PROJECT_VERSION>${project.version}</PROJECT_VERSION>
+                        <SCALA_VERSION>${scala.binary.version}</SCALA_VERSION>
+                    </environmentVariables>
                 </configuration>
             </plugin>
             <plugin>

diff --git a/xtable-core/pom.xml b/xtable-core/pom.xml
@@ -34,75 +34,6 @@
             <artifactId>xtable-api</artifactId>
             <version>${project.version}</version>
         </dependency>
-        <dependency>
-            <groupId>org.apache.xtable</groupId>
-            <artifactId>xtable-hudi-support-utils</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-core</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-databind</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.module</groupId>
-            <artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>com.google.guava</groupId>
-            <artifactId>guava</artifactId>
-        </dependency>
-
-        <!-- Avro -->
-        <dependency>
-            <groupId>org.apache.avro</groupId>
-            <artifactId>avro</artifactId>
-        </dependency>
-
-        <!-- Scala dependencies -->
-        <dependency>
-            <groupId>org.scala-lang</groupId>
-            <artifactId>scala-library</artifactId>
-        </dependency>
-
-        <!-- Hudi dependencies -->
-        <dependency>
-            <groupId>org.apache.hudi</groupId>
-            <artifactId>hudi-spark${spark.version.prefix}-bundle_${scala.binary.version}</artifactId>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.hudi</groupId>
-            <artifactId>hudi-common</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.hudi</groupId>
-            <artifactId>hudi-java-client</artifactId>
-        </dependency>
-
-        <!-- Iceberg dependencies -->
-        <dependency>
-            <groupId>org.apache.iceberg</groupId>
-            <artifactId>iceberg-core</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.iceberg</groupId>
-            <artifactId>iceberg-api</artifactId>
-        </dependency>
-
-        <!-- Delta dependencies -->
-        <dependency>
-            <groupId>io.delta</groupId>
-            <artifactId>delta-core_${scala.binary.version}</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>io.delta</groupId>
-            <artifactId>delta-standalone_${scala.binary.version}</artifactId>
-            <scope>test</scope>
-        </dependency>
 
         <!-- Hadoop dependencies -->
         <dependency>
@@ -116,27 +47,6 @@
             <groupId>org.apache.logging.log4j</groupId>
             <artifactId>log4j-api</artifactId>
         </dependency>
-        <dependency>
-            <groupId>org.apache.logging.log4j</groupId>
-            <artifactId>log4j-1.2-api</artifactId>
-        </dependency>
-
-        <!-- Spark/Iceberg/Hudi dependencies for reading/writing tables -->
-        <dependency>
-            <groupId>org.apache.iceberg</groupId>
-            <artifactId>iceberg-spark-runtime-${spark.version.prefix}_${scala.binary.version}</artifactId>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.spark</groupId>
-            <artifactId>spark-core_${scala.binary.version}</artifactId>
-            <scope>provided</scope>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.spark</groupId>
-            <artifactId>spark-sql_${scala.binary.version}</artifactId>
-            <scope>provided</scope>
-        </dependency>
 
         <!-- Mockito -->
         <dependency>
@@ -173,5 +83,27 @@
             <artifactId>log4j-slf4j2-impl</artifactId>
             <scope>test</scope>
         </dependency>
+
+        <dependency>
+            <groupId>com.google.guava</groupId>
+            <artifactId>guava</artifactId>
+            <scope>test</scope>
+        </dependency>
     </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-jar-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>test-jar</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
 </project>
diff --git a/xtable-core/src/test/java/org/apache/xtable/GenericTable.java b/xtable-core/src/test/java/org/apache/xtable/GenericTable.java
@@ -18,20 +18,10 @@
 
 package org.apache.xtable;
 
-import static org.apache.xtable.model.storage.TableFormat.DELTA;
-import static org.apache.xtable.model.storage.TableFormat.HUDI;
-import static org.apache.xtable.model.storage.TableFormat.ICEBERG;
-
-import java.nio.file.Path;
 import java.util.Arrays;
 import java.util.List;
 import java.util.UUID;
 
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.SparkSession;
-
-import org.apache.hudi.common.model.HoodieTableType;
-
 public interface GenericTable<T, Q> extends AutoCloseable {
   // A list of values for the level field which serves as a basic field to partition on for tests
   List<String> LEVEL_VALUES = Arrays.asList("INFO", "WARN", "ERROR");
@@ -66,83 +56,6 @@ default String getDataPath() {
 
   String getFilterQuery();
 
-  static GenericTable getInstance(
-      String tableName,
-      Path tempDir,
-      SparkSession sparkSession,
-      JavaSparkContext jsc,
-      String sourceFormat,
-      boolean isPartitioned) {
-    switch (sourceFormat) {
-      case HUDI:
-        return TestSparkHudiTable.forStandardSchemaAndPartitioning(
-            tableName, tempDir, jsc, isPartitioned);
-      case DELTA:
-        return TestSparkDeltaTable.forStandardSchemaAndPartitioning(
-            tableName, tempDir, sparkSession, isPartitioned ? "level" : null);
-      case ICEBERG:
-        return TestIcebergTable.forStandardSchemaAndPartitioning(
-            tableName, isPartitioned ? "level" : null, tempDir, jsc.hadoopConfiguration());
-      default:
-        throw new IllegalArgumentException("Unsupported source format: " + sourceFormat);
-    }
-  }
-
-  static GenericTable getInstanceWithAdditionalColumns(
-      String tableName,
-      Path tempDir,
-      SparkSession sparkSession,
-      JavaSparkContext jsc,
-      String sourceFormat,
-      boolean isPartitioned) {
-    switch (sourceFormat) {
-      case HUDI:
-        return TestSparkHudiTable.forSchemaWithAdditionalColumnsAndPartitioning(
-            tableName, tempDir, jsc, isPartitioned);
-      case DELTA:
-        return TestSparkDeltaTable.forSchemaWithAdditionalColumnsAndPartitioning(
-            tableName, tempDir, sparkSession, isPartitioned ? "level" : null);
-      case ICEBERG:
-        return TestIcebergTable.forSchemaWithAdditionalColumnsAndPartitioning(
-            tableName, isPartitioned ? "level" : null, tempDir, jsc.hadoopConfiguration());
-      default:
-        throw new IllegalArgumentException("Unsupported source format: " + sourceFormat);
-    }
-  }
-
-  static GenericTable getInstanceWithCustomPartitionConfig(
-      String tableName,
-      Path tempDir,
-      JavaSparkContext jsc,
-      String sourceFormat,
-      String partitionConfig) {
-    switch (sourceFormat) {
-      case HUDI:
-        return TestSparkHudiTable.forStandardSchema(
-            tableName, tempDir, jsc, partitionConfig, HoodieTableType.COPY_ON_WRITE);
-      default:
-        throw new IllegalArgumentException(
-            String.format(
-                "Unsupported source format: %s for custom partition config", sourceFormat));
-    }
-  }
-
-  static GenericTable getInstanceWithUUIDColumns(
-      String tableName,
-      Path tempDir,
-      SparkSession sparkSession,
-      JavaSparkContext jsc,
-      String sourceFormat,
-      boolean isPartitioned) {
-    switch (sourceFormat) {
-      case ICEBERG:
-        return TestIcebergTable.forSchemaWithUUIDColumns(
-            tableName, isPartitioned ? "level" : null, tempDir, jsc.hadoopConfiguration());
-      default:
-        throw new IllegalArgumentException("Unsupported source format: " + sourceFormat);
-    }
-  }
-
   static String getTableName() {
     return "test_table_" + UUID.randomUUID().toString().replaceAll("-", "_");
   }