Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DRAFT] Restructure Repo for Table Format specific modules #618

Draft
wants to merge 14 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/mvn-ci-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@ jobs:
cache: maven

- name: Build all module with Maven
run: mvn clean install -ntp -B
run: mvn clean install -ntp -B -T 2C
25 changes: 25 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@
<module>xtable-hudi-support</module>
<module>xtable-core</module>
<module>xtable-utilities</module>
<module>xtable-delta</module>
<module>xtable-integration-tests</module>
<module>xtable-hudi</module>
<module>xtable-iceberg</module>
</modules>

<properties>
Expand Down Expand Up @@ -150,6 +154,16 @@
<artifactId>parquet-avro</artifactId>
<version>${parquet.version}</version>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-column</artifactId>
<version>${parquet.version}</version>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-hadoop</artifactId>
<version>${parquet.version}</version>
</dependency>

<!-- Logging -->
<dependency>
Expand Down Expand Up @@ -318,6 +332,12 @@
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-catalyst_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>commons-cli</groupId>
Expand Down Expand Up @@ -683,6 +703,11 @@
<trimStackTrace>false</trimStackTrace>
<argLine>-Xmx1024m</argLine>
<forkedProcessExitTimeoutInSeconds>120</forkedProcessExitTimeoutInSeconds>
<environmentVariables>
<ROOT_DIR>${maven.multiModuleProjectDirectory}</ROOT_DIR>
<PROJECT_VERSION>${project.version}</PROJECT_VERSION>
<SCALA_VERSION>${scala.binary.version}</SCALA_VERSION>
</environmentVariables>
</configuration>
</plugin>
<plugin>
Expand Down
112 changes: 22 additions & 90 deletions xtable-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -34,75 +34,6 @@
<artifactId>xtable-api</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.xtable</groupId>
<artifactId>xtable-hudi-support-utils</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.module</groupId>
<artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</dependency>

<!-- Avro -->
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
</dependency>

<!-- Scala dependencies -->
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
</dependency>

<!-- Hudi dependencies -->
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark${spark.version.prefix}-bundle_${scala.binary.version}</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-common</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-java-client</artifactId>
</dependency>

<!-- Iceberg dependencies -->
<dependency>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-core</artifactId>
</dependency>
<dependency>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-api</artifactId>
</dependency>

<!-- Delta dependencies -->
<dependency>
<groupId>io.delta</groupId>
<artifactId>delta-core_${scala.binary.version}</artifactId>
</dependency>
<dependency>
<groupId>io.delta</groupId>
<artifactId>delta-standalone_${scala.binary.version}</artifactId>
<scope>test</scope>
</dependency>

<!-- Hadoop dependencies -->
<dependency>
Expand All @@ -116,27 +47,6 @@
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
</dependency>

<!-- Spark/Iceberg/Hudi dependencies for reading/writing tables -->
<dependency>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-spark-runtime-${spark.version.prefix}_${scala.binary.version}</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<scope>provided</scope>
</dependency>

<!-- Mockito -->
<dependency>
Expand Down Expand Up @@ -173,5 +83,27 @@
<artifactId>log4j-slf4j2-impl</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<scope>test</scope>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>test-jar</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
87 changes: 0 additions & 87 deletions xtable-core/src/test/java/org/apache/xtable/GenericTable.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,10 @@

package org.apache.xtable;

import static org.apache.xtable.model.storage.TableFormat.DELTA;
import static org.apache.xtable.model.storage.TableFormat.HUDI;
import static org.apache.xtable.model.storage.TableFormat.ICEBERG;

import java.nio.file.Path;
import java.util.Arrays;
import java.util.List;
import java.util.UUID;

import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;

import org.apache.hudi.common.model.HoodieTableType;

public interface GenericTable<T, Q> extends AutoCloseable {
// A list of values for the level field which serves as a basic field to partition on for tests
List<String> LEVEL_VALUES = Arrays.asList("INFO", "WARN", "ERROR");
Expand Down Expand Up @@ -66,83 +56,6 @@ default String getDataPath() {

String getFilterQuery();

static GenericTable getInstance(
String tableName,
Path tempDir,
SparkSession sparkSession,
JavaSparkContext jsc,
String sourceFormat,
boolean isPartitioned) {
switch (sourceFormat) {
case HUDI:
return TestSparkHudiTable.forStandardSchemaAndPartitioning(
tableName, tempDir, jsc, isPartitioned);
case DELTA:
return TestSparkDeltaTable.forStandardSchemaAndPartitioning(
tableName, tempDir, sparkSession, isPartitioned ? "level" : null);
case ICEBERG:
return TestIcebergTable.forStandardSchemaAndPartitioning(
tableName, isPartitioned ? "level" : null, tempDir, jsc.hadoopConfiguration());
default:
throw new IllegalArgumentException("Unsupported source format: " + sourceFormat);
}
}

static GenericTable getInstanceWithAdditionalColumns(
String tableName,
Path tempDir,
SparkSession sparkSession,
JavaSparkContext jsc,
String sourceFormat,
boolean isPartitioned) {
switch (sourceFormat) {
case HUDI:
return TestSparkHudiTable.forSchemaWithAdditionalColumnsAndPartitioning(
tableName, tempDir, jsc, isPartitioned);
case DELTA:
return TestSparkDeltaTable.forSchemaWithAdditionalColumnsAndPartitioning(
tableName, tempDir, sparkSession, isPartitioned ? "level" : null);
case ICEBERG:
return TestIcebergTable.forSchemaWithAdditionalColumnsAndPartitioning(
tableName, isPartitioned ? "level" : null, tempDir, jsc.hadoopConfiguration());
default:
throw new IllegalArgumentException("Unsupported source format: " + sourceFormat);
}
}

static GenericTable getInstanceWithCustomPartitionConfig(
String tableName,
Path tempDir,
JavaSparkContext jsc,
String sourceFormat,
String partitionConfig) {
switch (sourceFormat) {
case HUDI:
return TestSparkHudiTable.forStandardSchema(
tableName, tempDir, jsc, partitionConfig, HoodieTableType.COPY_ON_WRITE);
default:
throw new IllegalArgumentException(
String.format(
"Unsupported source format: %s for custom partition config", sourceFormat));
}
}

static GenericTable getInstanceWithUUIDColumns(
String tableName,
Path tempDir,
SparkSession sparkSession,
JavaSparkContext jsc,
String sourceFormat,
boolean isPartitioned) {
switch (sourceFormat) {
case ICEBERG:
return TestIcebergTable.forSchemaWithUUIDColumns(
tableName, isPartitioned ? "level" : null, tempDir, jsc.hadoopConfiguration());
default:
throw new IllegalArgumentException("Unsupported source format: " + sourceFormat);
}
}

static String getTableName() {
return "test_table_" + UUID.randomUUID().toString().replaceAll("-", "_");
}
Expand Down
Loading
Loading