Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Build scala 2.13 spark 3.3 #49

Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
current-build changes for local run
ShreeshaS01 committed Nov 21, 2024
commit 3ffd48a2cc33d945206d6ed40aeca4523d003749
12 changes: 12 additions & 0 deletions spark-bigtable_2.12-it/pom.xml
Original file line number Diff line number Diff line change
@@ -53,6 +53,18 @@
<groupId>com.google.cloud.spark.bigtable</groupId>
<artifactId>spark-bigtable_2.12</artifactId>
<version>0.2.1</version> <!-- ${NEXT_VERSION_FLAG} -->
<exclusions>
<!-- Exclude google-cloud-bigtable dependency -->
<exclusion>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-bigtable</artifactId>
</exclusion>
<!-- Exclude io.grpc dependencies -->
<exclusion>
<groupId>io.grpc</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
Original file line number Diff line number Diff line change
@@ -96,8 +96,8 @@ static void stopJavaSparkContext(JavaSparkContext javaSparkContext) {
}

static void setBigtableProperties() throws Exception {
projectId = System.getProperty(PROJECT_ID_PROPERTY_NAME);
instanceId = System.getProperty(INSTANCE_ID_PROPERTY_NAME);
projectId = "my-local-project";//System.getProperty(PROJECT_ID_PROPERTY_NAME);
instanceId = "my-local-instance";//System.getProperty(INSTANCE_ID_PROPERTY_NAME);
if (instanceId == null || projectId == null) {
throw new IllegalArgumentException(
"Bigtable project and instance ID must be provided using "
Original file line number Diff line number Diff line change
@@ -23,12 +23,12 @@
import com.google.cloud.spark.bigtable.fakeserver.FakeServerBuilder;
import com.google.cloud.spark.bigtable.fakeserver.FakeTableAdminService;
import com.google.cloud.spark.bigtable.model.TestRow;
import com.google.cloud.spark.bigtable.repackaged.com.google.bigtable.admin.v2.CreateTableRequest;
import com.google.cloud.spark.bigtable.repackaged.com.google.bigtable.v2.MutateRowsRequest;
import com.google.cloud.spark.bigtable.repackaged.com.google.bigtable.v2.ReadRowsRequest;
import com.google.cloud.spark.bigtable.repackaged.com.google.bigtable.v2.SampleRowKeysRequest;
import com.google.cloud.spark.bigtable.repackaged.io.grpc.Server;
import com.google.cloud.spark.bigtable.repackaged.io.grpc.Status;
import /*com.google.cloud.spark.bigtable.repackaged.*/com.google.bigtable.admin.v2.CreateTableRequest;
import /*com.google.cloud.spark.bigtable.repackaged.*/com.google.bigtable.v2.MutateRowsRequest;
import /*com.google.cloud.spark.bigtable.repackaged.*/com.google.bigtable.v2.ReadRowsRequest;
import /*com.google.cloud.spark.bigtable.repackaged.*/com.google.bigtable.v2.SampleRowKeysRequest;
import /*com.google.cloud.spark.bigtable.repackaged.*/io.grpc.Server;
import /*com.google.cloud.spark.bigtable.repackaged.*/io.grpc.Status;
import com.google.errorprone.annotations.Keep;
import java.util.ArrayList;
import junitparams.JUnitParamsRunner;
Original file line number Diff line number Diff line change
@@ -3,6 +3,7 @@
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.core.Is.is;

import com.google.api.gax.core.NoCredentialsProvider;
import com.google.cloud.bigtable.admin.v2.BigtableTableAdminClient;
import com.google.cloud.bigtable.admin.v2.BigtableTableAdminSettings;
import com.google.gson.JsonObject;
@@ -37,6 +38,7 @@ public static void initialSetup() throws Exception {
BigtableTableAdminSettings.newBuilder()
.setProjectId(projectId)
.setInstanceId(instanceId)
.setCredentialsProvider(NoCredentialsProvider.create())
.build();
adminClient = BigtableTableAdminClient.create(adminSettings);
}
@@ -87,7 +89,6 @@ public void testOpenLineageEvents() throws Exception {
// event data.
Dataset<Row> outputReadDf = readDataframeFromBigtable(spark, outputCatalog);
assertDataFramesEqual(outputReadDf, outputDf);

List<JsonObject> jsonObjects = parseEventLog(lineageFile);
assertThat(jsonObjects.isEmpty(), is(false));

@@ -114,6 +115,7 @@ private static SparkSession createSparkSessionWithOL() throws IOException {
lineageFile = File.createTempFile("openlineage_test_" + System.nanoTime(), ".log");
lineageFile.deleteOnExit();

System.out.println(lineageFile.getAbsolutePath());
spark =
SparkSession.builder()
.master("local")
@@ -134,6 +136,7 @@ private List<JsonObject> parseEventLog(File file) throws Exception {
eventList = new ArrayList<>();
while (scanner.hasNextLine()) {
String line = scanner.nextLine();
System.out.println(line);
JsonObject event = JsonParser.parseString(line).getAsJsonObject();
if (!event.getAsJsonArray("inputs").isEmpty()
&& !event.getAsJsonArray("outputs").isEmpty()) {
Original file line number Diff line number Diff line change
@@ -23,10 +23,14 @@
import com.google.cloud.bigtable.admin.v2.BigtableTableAdminSettings;
import com.google.cloud.spark.bigtable.datasources.BigtableSparkConf;
import com.google.cloud.spark.bigtable.datasources.BigtableSparkConfBuilder;
import com.google.cloud.spark.bigtable.repackaged.com.google.cloud.bigtable.data.v2.models.Row;
import com.google.cloud.spark.bigtable.repackaged.com.google.cloud.bigtable.data.v2.models.RowCell;
import com.google.cloud.spark.bigtable.repackaged.com.google.cloud.bigtable.data.v2.models.RowMutationEntry;
import com.google.cloud.spark.bigtable.repackaged.io.grpc.Status;
//import com.google.cloud.spark.bigtable.repackaged.com.google.cloud.bigtable.data.v2.models.Row;
//import com.google.cloud.spark.bigtable.repackaged.com.google.cloud.bigtable.data.v2.models.RowCell;
//import com.google.cloud.spark.bigtable.repackaged.com.google.cloud.bigtable.data.v2.models.RowMutationEntry;
//import com.google.cloud.spark.bigtable.repackaged.io.grpc.Status;
import /*com.google.cloud.spark.bigtable.repackaged.*/com.google.cloud.bigtable.data.v2.models.Row;
import /*com.google.cloud.spark.bigtable.repackaged.*/com.google.cloud.bigtable.data.v2.models.RowCell;
import /*com.google.cloud.spark.bigtable.repackaged.*/com.google.cloud.bigtable.data.v2.models.RowMutationEntry;
import /*com.google.cloud.spark.bigtable.repackaged.*/io.grpc.Status;
import java.util.ArrayList;
import java.util.List;
import junitparams.JUnitParamsRunner;
@@ -117,6 +121,8 @@ public void writeWithFailingMutationTest() throws Exception {
LOG.info("Original RDD Created.");

BigtableRDD bigtableRDD = new BigtableRDD(spark.sparkContext());
System.out.println(erroneousMutationsRDD.collect());
System.out.println(Status.NOT_FOUND.getCode().toString());
try {
bigtableRDD.writeRDD(erroneousMutationsRDD.rdd(), useTable, createRDDConf());
fail("The connector should have thrown a " + Status.NOT_FOUND + " exception.");
Original file line number Diff line number Diff line change
@@ -20,12 +20,13 @@
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import com.google.api.gax.rpc.NotFoundException;
import com.google.cloud.bigtable.admin.v2.BigtableTableAdminClient;
import com.google.cloud.bigtable.admin.v2.BigtableTableAdminSettings;
import com.google.cloud.spark.bigtable.model.Favorites;
import com.google.cloud.spark.bigtable.model.TestAvroRow;
import com.google.cloud.spark.bigtable.model.TestRow;
import com.google.cloud.spark.bigtable.repackaged.com.google.api.gax.rpc.NotFoundException;
//import com.google.cloud.spark.bigtable.repackaged.com.google.api.gax.rpc.NotFoundException;
import java.util.ArrayList;
import junitparams.JUnitParamsRunner;
import junitparams.Parameters;
Original file line number Diff line number Diff line change
@@ -37,8 +37,9 @@ public class ReadWriteLongRunningTest extends AbstractTestBase {
private static BigtableTableAdminClient adminClient;

private static final int batchSize = 1000;
private static final long batchGapMillis = 5 * 60 * 1000; // 5 minutes
private static final long totalDurationMillis = 18 * 3600 * 1000; // 18 hours
private static final long batchGapMillis = 2 * 60 * 1000; // 5 minutes
private static final long totalDurationMillis = 5 * 60 * 1000;
//private static final long totalDurationMillis = 18 * 3600 * 1000; // 18 hours

private long endTime;

Original file line number Diff line number Diff line change
@@ -45,11 +45,11 @@ public class WriteFuzzTest extends AbstractTestBase {
private static final Logger LOG = LoggerFactory.getLogger(WriteFuzzTest.class);
private static BigtableTableAdminClient adminClient;

private final int minRows = 50000;
private final int maxRows = 250000;
private final int minRows = 500;
private final int maxRows = 2500;
private final int minCols = 18;
private final int maxCols = 25;
private static final long totalDurationMinutes = 90;
private static final long totalDurationMinutes = 5;
private final Stopwatch totalRunTime;

public WriteFuzzTest() {
23 changes: 20 additions & 3 deletions spark-bigtable_2.12/pom.xml
Original file line number Diff line number Diff line change
@@ -41,7 +41,18 @@
<artifactId>grpc-google-cloud-bigtable-admin-v2</artifactId>
<version>${bigtable.java.version}</version>
</dependency>

<!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-core -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
<version>2.15.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-annotations -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
<version>2.15.2</version>
</dependency>
<!-- To fix the "NoClassDefFoundError: scala/util/parsing/json/JSON$" error with Spark 3.4. -->
<dependency>
<groupId>org.scala-lang.modules</groupId>
@@ -220,6 +231,11 @@
</transformers>
<artifactSet>
<excludes>
<!-- Exclude Bigtable related dependencies -->
<exclude>com.google.cloud:google-cloud-bigtable</exclude>
<!-- Exclude gRPC related dependencies -->
<exclude>com.google.api.grpc:*</exclude>
<!-- Existing exclusions for slf4j and reload4j -->
<exclude>org.slf4j:slf4j-reload4j</exclude>
<exclude>org.slf4j:slf4j-api</exclude>
<exclude>ch.qos.reload4j:reload4j</exclude>
@@ -230,7 +246,7 @@
<pattern>io.netty</pattern>
<shadedPattern>com.google.cloud.spark.bigtable.repackaged.io.netty</shadedPattern>
</relocation>
<relocation>
<!--<relocation>
<pattern>io.grpc</pattern>
<shadedPattern>com.google.cloud.spark.bigtable.repackaged.io.grpc</shadedPattern>
</relocation>
@@ -239,8 +255,9 @@
<shadedPattern>com.google.cloud.spark.bigtable.repackaged.com.google</shadedPattern>
<excludes>
<exclude>com.google.cloud.spark.bigtable.**</exclude>
<exclude>com.google.cloud.bigtable.**</exclude>
</excludes>
</relocation>
</relocation>-->
<relocation>
<pattern>io.openlineage.spark.shade</pattern>
<shadedPattern>com.google.cloud.spark.bigtable.repackaged.io.openlineage.spark.shade</shadedPattern>