From 7f0a8d2b0a486edd86646c84047c3a881b8660b8 Mon Sep 17 00:00:00 2001 From: Madhavan Date: Thu, 12 Sep 2024 09:18:03 -0400 Subject: [PATCH] Apply suggestions from code review --- RELEASE.md | 4 ++-- .../cdm/cql/statement/TargetUpsertRunDetailsStatement.java | 4 ++-- src/resources/cdm-detailed.properties | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 02c28e4f..74e10506 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,10 +1,10 @@ # Release Notes -## [4.3.10] - 2024-09-11 +## [4.3.10] - 2024-09-12 - Added property `spark.cdm.trackRun.runId` to support a custom unique identifier for the current run. This can be used by wrapper scripts to pass a known `runId` and then use it to query the `cdm_run_info` and `cdm_run_details` tables. ## [4.3.9] - 2024-09-11 - Added new `status` value of `DIFF_CORRECTED` on `cdm_run_details` table to specifically mark partitions that were corrected during the CDM validation run. -- Upgraded Validation job skip partitions with `DIFF_CORRECTED` status on rerun with a previous `runId`. +- Upgraded Validation job to skip partitions with `DIFF_CORRECTED` status on rerun with a previous `runId`. ## [4.3.8] - 2024-09-09 - Upgraded `spark.cdm.trackRun` feature to include `status` on `cdm_run_info` table. Also improved the code to handle rerun of previous run which may have exited before being correctly initialized. diff --git a/src/main/java/com/datastax/cdm/cql/statement/TargetUpsertRunDetailsStatement.java b/src/main/java/com/datastax/cdm/cql/statement/TargetUpsertRunDetailsStatement.java index 0b67078a..ea2b64e2 100644 --- a/src/main/java/com/datastax/cdm/cql/statement/TargetUpsertRunDetailsStatement.java +++ b/src/main/java/com/datastax/cdm/cql/statement/TargetUpsertRunDetailsStatement.java @@ -68,8 +68,8 @@ public TargetUpsertRunDetailsStatement(CqlSession session, String keyspaceTable) // ignore if column already exists logger.trace("Column 'status' already exists in table {}", cdmKsTabInfo); } - this.session.execute("create table if not exists " + cdmKsTabDetails - + " (table_name text, run_id bigint, start_time timestamp, token_min bigint, token_max bigint, status text, primary key ((table_name, run_id), token_min))"); + this.session.execute("CREATE TABLE IF NOT EXISTS " + cdmKsTabDetails + + " (table_name TEXT, run_id BIGINT, start_time TIMESTAMP, token_min BIGINT, token_max BIGINT, status TEXT, PRIMARY KEY ((table_name, run_id), token_min))"); boundInitInfoStatement = bindStatement("INSERT INTO " + cdmKsTabInfo + " (table_name, run_id, run_type, prev_run_id, start_time, status) VALUES (?, ?, ?, ?, dateof(now()), ?)"); diff --git a/src/resources/cdm-detailed.properties b/src/resources/cdm-detailed.properties index c334a7fb..500f579b 100644 --- a/src/resources/cdm-detailed.properties +++ b/src/resources/cdm-detailed.properties @@ -174,7 +174,7 @@ spark.cdm.autocorrect.missing.counter false # reasons including if the job get killed, or some token-ranges fail due to load on # the cluster (origin or target) or any other reasons. # .runId : Default is an auto generated unique long value. When a non-zero value is provided, -# it will be used a custom and unique identifier for the current run. Note the value +# it will be used as a custom and unique identifier for the current run. Note the value # of this id must be numeric and can be any java `long` unique value. This can be used # by wrapper scripts to pass a known `runId` and then use it to query the # `cdm_run_info` and `cdm_run_details` tables.