diff --git a/.github/workflows/Rest.yml b/.github/workflows/Rest.yml index 5f11464..11560e9 100644 --- a/.github/workflows/Rest.yml +++ b/.github/workflows/Rest.yml @@ -39,6 +39,10 @@ jobs: with: vcpkgGitCommitId: 5e5d0e1cd7785623065e77eff011afdeec1a3574 + - name: Setup Ccache + uses: hendrikmuhs/ccache-action@main + continue-on-error: true + - name: Build extension env: GEN: ninja @@ -47,12 +51,16 @@ jobs: make release - name: Start Rest Catalog - working-directory: scripts/ run: | - ./start-rest-catalog.sh + make start-rest-catalog + + - name: Generate data + run: | + make data - - name: Test With rest catalog + - name: Test with rest catalog env: ICEBERG_SERVER_AVAILABLE: 1 + DUCKDB_ICEBERG_HAVE_GENERATED_DATA: 1 run: | - make test_release \ No newline at end of file + make test_release diff --git a/.gitignore b/.gitignore index cb608ef..a9d9ef6 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,7 @@ data/iceberg/generated_* scripts/metastore_db/ scripts/derby.log scripts/test-script-with-path.sql +scripts/data_generators/__pycache__/ +scripts/data_generators/*/__pycache__/ +scripts/data_generators/*/*/*.parquet +data/generated/* diff --git a/Makefile b/Makefile index 8bb7b77..30b7abb 100644 --- a/Makefile +++ b/Makefile @@ -5,18 +5,23 @@ EXT_NAME=iceberg EXT_CONFIG=${PROJ_DIR}extension_config.cmake # We need this for testing -CORE_EXTENSIONS='httpfs' +CORE_EXTENSIONS='parquet;httpfs' # Include the Makefile from extension-ci-tools include extension-ci-tools/makefiles/duckdb_extension.Makefile +start-rest-catalog: install_requirements + ./scripts/start-rest-catalog.sh + +install_requirements: + python3 -m pip install -r scripts/requirements.txt + # Custom makefile targets -data: data_clean - python3 scripts/test_data_generator/generate_iceberg.py 0.001 data/iceberg/generated_spec1_0_001 1 - python3 scripts/test_data_generator/generate_iceberg.py 0.001 data/iceberg/generated_spec2_0_001 2 +data: data_clean start-rest-catalog + python3 scripts/data_generators/generate_data.py data_large: data data_clean - python3 scripts/test_data_generator/generate_iceberg.py 1 data/iceberg/generated_spec2_1 2 + python3 scripts/data_generators/generate_data.py data_clean: - rm -rf data/iceberg/generated_* + rm -rf data/generated diff --git a/data/iceberg/generated_spec1_0_001/expected_results/last/count.csv b/data/iceberg/generated_spec1_0_001/expected_results/last/count.csv deleted file mode 100644 index 3b35899..0000000 --- a/data/iceberg/generated_spec1_0_001/expected_results/last/count.csv +++ /dev/null @@ -1,2 +0,0 @@ -count -7690 \ No newline at end of file diff --git a/data/iceberg/generated_spec1_0_001/expected_results/last/data/._SUCCESS.crc b/data/iceberg/generated_spec1_0_001/expected_results/last/data/._SUCCESS.crc deleted file mode 100644 index 3b7b044..0000000 Binary files a/data/iceberg/generated_spec1_0_001/expected_results/last/data/._SUCCESS.crc and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/expected_results/last/data/.part-00000-90484277-8f62-41f7-b073-a4c8999e393e-c000.snappy.parquet.crc b/data/iceberg/generated_spec1_0_001/expected_results/last/data/.part-00000-90484277-8f62-41f7-b073-a4c8999e393e-c000.snappy.parquet.crc deleted file mode 100644 index 62fa3b7..0000000 Binary files a/data/iceberg/generated_spec1_0_001/expected_results/last/data/.part-00000-90484277-8f62-41f7-b073-a4c8999e393e-c000.snappy.parquet.crc and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/expected_results/last/data/part-00000-90484277-8f62-41f7-b073-a4c8999e393e-c000.snappy.parquet b/data/iceberg/generated_spec1_0_001/expected_results/last/data/part-00000-90484277-8f62-41f7-b073-a4c8999e393e-c000.snappy.parquet deleted file mode 100644 index 54fb77d..0000000 Binary files a/data/iceberg/generated_spec1_0_001/expected_results/last/data/part-00000-90484277-8f62-41f7-b073-a4c8999e393e-c000.snappy.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/expected_results/last/query.sql b/data/iceberg/generated_spec1_0_001/expected_results/last/query.sql deleted file mode 100644 index 6c7f91c..0000000 --- a/data/iceberg/generated_spec1_0_001/expected_results/last/query.sql +++ /dev/null @@ -1,3 +0,0 @@ --- The query executed at this step: -ALTER TABLE iceberg_catalog.pyspark_iceberg_table -ALTER COLUMN schema_evol_added_col_1 TYPE BIGINT; \ No newline at end of file diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-1-bcb5bbb9-a993-41f7-95e1-09e0c2475f4a-00001.parquet b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-1-bcb5bbb9-a993-41f7-95e1-09e0c2475f4a-00001.parquet deleted file mode 100644 index 97ac7e8..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-1-bcb5bbb9-a993-41f7-95e1-09e0c2475f4a-00001.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-14-db28d572-b7de-4568-9b00-8c55c69cb179-00001.parquet b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-14-db28d572-b7de-4568-9b00-8c55c69cb179-00001.parquet deleted file mode 100644 index 9b8b181..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-14-db28d572-b7de-4568-9b00-8c55c69cb179-00001.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-26-f6481588-9ba4-4a7b-b3dd-f188d41fa5b8-00001.parquet b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-26-f6481588-9ba4-4a7b-b3dd-f188d41fa5b8-00001.parquet deleted file mode 100644 index 3755277..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-26-f6481588-9ba4-4a7b-b3dd-f188d41fa5b8-00001.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-36-cf35a788-d8c2-4ded-a9f7-5239797e80b8-00001.parquet b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-36-cf35a788-d8c2-4ded-a9f7-5239797e80b8-00001.parquet deleted file mode 100644 index facc504..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-36-cf35a788-d8c2-4ded-a9f7-5239797e80b8-00001.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-5-bd5417f5-f28c-46b1-b1ab-39ee9c191368-00001.parquet b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-5-bd5417f5-f28c-46b1-b1ab-39ee9c191368-00001.parquet deleted file mode 100644 index 5998bc2..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-5-bd5417f5-f28c-46b1-b1ab-39ee9c191368-00001.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-9-6cdc0135-4256-4772-8c3e-3f4803ded842-00001.parquet b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-9-6cdc0135-4256-4772-8c3e-3f4803ded842-00001.parquet deleted file mode 100644 index 8f8a093..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/data/00000-9-6cdc0135-4256-4772-8c3e-3f4803ded842-00001.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/0f120788-1926-4605-a1ab-450f4cf3ccee-m0.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/0f120788-1926-4605-a1ab-450f4cf3ccee-m0.avro deleted file mode 100644 index 90e2897..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/0f120788-1926-4605-a1ab-450f4cf3ccee-m0.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/0f120788-1926-4605-a1ab-450f4cf3ccee-m1.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/0f120788-1926-4605-a1ab-450f4cf3ccee-m1.avro deleted file mode 100644 index 584671d..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/0f120788-1926-4605-a1ab-450f4cf3ccee-m1.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/4aa40041-ccc6-4e64-a9ab-366875aafd63-m0.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/4aa40041-ccc6-4e64-a9ab-366875aafd63-m0.avro deleted file mode 100644 index 7dd57c8..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/4aa40041-ccc6-4e64-a9ab-366875aafd63-m0.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/4aa40041-ccc6-4e64-a9ab-366875aafd63-m1.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/4aa40041-ccc6-4e64-a9ab-366875aafd63-m1.avro deleted file mode 100644 index 80355e7..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/4aa40041-ccc6-4e64-a9ab-366875aafd63-m1.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/7723fb1b-ae48-49de-9e77-cd7945667cb9-m0.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/7723fb1b-ae48-49de-9e77-cd7945667cb9-m0.avro deleted file mode 100644 index 54b6edf..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/7723fb1b-ae48-49de-9e77-cd7945667cb9-m0.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/c091e891-ac3a-4429-be9a-e63f1ed63b99-m0.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/c091e891-ac3a-4429-be9a-e63f1ed63b99-m0.avro deleted file mode 100644 index 2974743..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/c091e891-ac3a-4429-be9a-e63f1ed63b99-m0.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/c091e891-ac3a-4429-be9a-e63f1ed63b99-m1.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/c091e891-ac3a-4429-be9a-e63f1ed63b99-m1.avro deleted file mode 100644 index 2ecadc1..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/c091e891-ac3a-4429-be9a-e63f1ed63b99-m1.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/d76c5203-4f0a-46ef-a293-268e0afec64b-m0.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/d76c5203-4f0a-46ef-a293-268e0afec64b-m0.avro deleted file mode 100644 index 925a068..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/d76c5203-4f0a-46ef-a293-268e0afec64b-m0.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/d76c5203-4f0a-46ef-a293-268e0afec64b-m1.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/d76c5203-4f0a-46ef-a293-268e0afec64b-m1.avro deleted file mode 100644 index 3666460..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/d76c5203-4f0a-46ef-a293-268e0afec64b-m1.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/d76c5203-4f0a-46ef-a293-268e0afec64b-m2.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/d76c5203-4f0a-46ef-a293-268e0afec64b-m2.avro deleted file mode 100644 index feb60ea..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/d76c5203-4f0a-46ef-a293-268e0afec64b-m2.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/e3febcc2-7f11-44b9-80af-571fb1c0463a-m0.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/e3febcc2-7f11-44b9-80af-571fb1c0463a-m0.avro deleted file mode 100644 index b812dca..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/e3febcc2-7f11-44b9-80af-571fb1c0463a-m0.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-1692767036460164714-1-0f120788-1926-4605-a1ab-450f4cf3ccee.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-1692767036460164714-1-0f120788-1926-4605-a1ab-450f4cf3ccee.avro deleted file mode 100644 index 9a7b32f..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-1692767036460164714-1-0f120788-1926-4605-a1ab-450f4cf3ccee.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-2276968461870063565-1-5ee46b42-10e4-401d-8f61-2bd3b5ebb548.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-2276968461870063565-1-5ee46b42-10e4-401d-8f61-2bd3b5ebb548.avro deleted file mode 100644 index 45cc87a..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-2276968461870063565-1-5ee46b42-10e4-401d-8f61-2bd3b5ebb548.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4407328776463037310-1-c091e891-ac3a-4429-be9a-e63f1ed63b99.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4407328776463037310-1-c091e891-ac3a-4429-be9a-e63f1ed63b99.avro deleted file mode 100644 index c0715d7..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4407328776463037310-1-c091e891-ac3a-4429-be9a-e63f1ed63b99.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4543110679664799316-1-e3febcc2-7f11-44b9-80af-571fb1c0463a.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4543110679664799316-1-e3febcc2-7f11-44b9-80af-571fb1c0463a.avro deleted file mode 100644 index e6edc42..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4543110679664799316-1-e3febcc2-7f11-44b9-80af-571fb1c0463a.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-6238750566879819059-1-d76c5203-4f0a-46ef-a293-268e0afec64b.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-6238750566879819059-1-d76c5203-4f0a-46ef-a293-268e0afec64b.avro deleted file mode 100644 index be74220..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-6238750566879819059-1-d76c5203-4f0a-46ef-a293-268e0afec64b.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-8671490307245765264-1-4aa40041-ccc6-4e64-a9ab-366875aafd63.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-8671490307245765264-1-4aa40041-ccc6-4e64-a9ab-366875aafd63.avro deleted file mode 100644 index fde9231..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-8671490307245765264-1-4aa40041-ccc6-4e64-a9ab-366875aafd63.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro deleted file mode 100644 index 63e2d76..0000000 Binary files a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro and /dev/null differ diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v1.metadata.json b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v1.metadata.json deleted file mode 100644 index ec7f17f..0000000 --- a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v1.metadata.json +++ /dev/null @@ -1,217 +0,0 @@ -{ - "format-version" : 1, - "table-uuid" : "2e23a4d3-2f64-47ac-aad6-f37df92836a1", - "location" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table", - "last-updated-ms" : 1719580919873, - "last-column-id" : 15, - "schema" : { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - }, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - } ], - "partition-spec" : [ ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd" - }, - "current-snapshot-id" : 9145725745960929259, - "refs" : { - "main" : { - "snapshot-id" : 9145725745960929259, - "type" : "branch" - } - }, - "snapshots" : [ { - "snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580919873, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440845", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580919873, - "snapshot-id" : 9145725745960929259 - } ], - "metadata-log" : [ ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v2.metadata.json b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v2.metadata.json deleted file mode 100644 index 8eb6b17..0000000 --- a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v2.metadata.json +++ /dev/null @@ -1,246 +0,0 @@ -{ - "format-version" : 1, - "table-uuid" : "2e23a4d3-2f64-47ac-aad6-f37df92836a1", - "location" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table", - "last-updated-ms" : 1719580920785, - "last-column-id" : 15, - "schema" : { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - }, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - } ], - "partition-spec" : [ ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd" - }, - "current-snapshot-id" : 8671490307245765264, - "refs" : { - "main" : { - "snapshot-id" : 8671490307245765264, - "type" : "branch" - } - }, - "snapshots" : [ { - "snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580919873, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440845", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 8671490307245765264, - "parent-snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580920785, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6005", - "deleted-records" : "6005", - "added-files-size" : "340114", - "removed-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "340114", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-8671490307245765264-1-4aa40041-ccc6-4e64-a9ab-366875aafd63.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580919873, - "snapshot-id" : 9145725745960929259 - }, { - "timestamp-ms" : 1719580920785, - "snapshot-id" : 8671490307245765264 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580919873, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v3.metadata.json b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v3.metadata.json deleted file mode 100644 index ae141f5..0000000 --- a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v3.metadata.json +++ /dev/null @@ -1,272 +0,0 @@ -{ - "format-version" : 1, - "table-uuid" : "2e23a4d3-2f64-47ac-aad6-f37df92836a1", - "location" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table", - "last-updated-ms" : 1719580921348, - "last-column-id" : 15, - "schema" : { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - }, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - } ], - "partition-spec" : [ ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd" - }, - "current-snapshot-id" : 4543110679664799316, - "refs" : { - "main" : { - "snapshot-id" : 4543110679664799316, - "type" : "branch" - } - }, - "snapshots" : [ { - "snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580919873, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440845", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 8671490307245765264, - "parent-snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580920785, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6005", - "deleted-records" : "6005", - "added-files-size" : "340114", - "removed-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "340114", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-8671490307245765264-1-4aa40041-ccc6-4e64-a9ab-366875aafd63.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 4543110679664799316, - "parent-snapshot-id" : 8671490307245765264, - "timestamp-ms" : 1719580921348, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133331", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "473445", - "total-data-files" : "2", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4543110679664799316-1-e3febcc2-7f11-44b9-80af-571fb1c0463a.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580919873, - "snapshot-id" : 9145725745960929259 - }, { - "timestamp-ms" : 1719580920785, - "snapshot-id" : 8671490307245765264 - }, { - "timestamp-ms" : 1719580921348, - "snapshot-id" : 4543110679664799316 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580919873, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580920785, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v4.metadata.json b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v4.metadata.json deleted file mode 100644 index b44942a..0000000 --- a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v4.metadata.json +++ /dev/null @@ -1,301 +0,0 @@ -{ - "format-version" : 1, - "table-uuid" : "2e23a4d3-2f64-47ac-aad6-f37df92836a1", - "location" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table", - "last-updated-ms" : 1719580921764, - "last-column-id" : 15, - "schema" : { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - }, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - } ], - "partition-spec" : [ ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd" - }, - "current-snapshot-id" : 6238750566879819059, - "refs" : { - "main" : { - "snapshot-id" : 6238750566879819059, - "type" : "branch" - } - }, - "snapshots" : [ { - "snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580919873, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440845", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 8671490307245765264, - "parent-snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580920785, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6005", - "deleted-records" : "6005", - "added-files-size" : "340114", - "removed-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "340114", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-8671490307245765264-1-4aa40041-ccc6-4e64-a9ab-366875aafd63.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 4543110679664799316, - "parent-snapshot-id" : 8671490307245765264, - "timestamp-ms" : 1719580921348, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133331", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "473445", - "total-data-files" : "2", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4543110679664799316-1-e3febcc2-7f11-44b9-80af-571fb1c0463a.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 6238750566879819059, - "parent-snapshot-id" : 4543110679664799316, - "timestamp-ms" : 1719580921764, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "2", - "added-records" : "7690", - "deleted-records" : "7690", - "added-files-size" : "399010", - "removed-files-size" : "473445", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-6238750566879819059-1-d76c5203-4f0a-46ef-a293-268e0afec64b.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580919873, - "snapshot-id" : 9145725745960929259 - }, { - "timestamp-ms" : 1719580920785, - "snapshot-id" : 8671490307245765264 - }, { - "timestamp-ms" : 1719580921348, - "snapshot-id" : 4543110679664799316 - }, { - "timestamp-ms" : 1719580921764, - "snapshot-id" : 6238750566879819059 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580919873, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580920785, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - }, { - "timestamp-ms" : 1719580921348, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v3.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v5.metadata.json b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v5.metadata.json deleted file mode 100644 index 1aabc15..0000000 --- a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v5.metadata.json +++ /dev/null @@ -1,324 +0,0 @@ -{ - "format-version" : 1, - "table-uuid" : "2e23a4d3-2f64-47ac-aad6-f37df92836a1", - "location" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table", - "last-updated-ms" : 1719580922113, - "last-column-id" : 15, - "schema" : { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - }, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - } ], - "partition-spec" : [ ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd" - }, - "current-snapshot-id" : 2276968461870063565, - "refs" : { - "main" : { - "snapshot-id" : 2276968461870063565, - "type" : "branch" - } - }, - "snapshots" : [ { - "snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580919873, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440845", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 8671490307245765264, - "parent-snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580920785, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6005", - "deleted-records" : "6005", - "added-files-size" : "340114", - "removed-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "340114", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-8671490307245765264-1-4aa40041-ccc6-4e64-a9ab-366875aafd63.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 4543110679664799316, - "parent-snapshot-id" : 8671490307245765264, - "timestamp-ms" : 1719580921348, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133331", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "473445", - "total-data-files" : "2", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4543110679664799316-1-e3febcc2-7f11-44b9-80af-571fb1c0463a.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 6238750566879819059, - "parent-snapshot-id" : 4543110679664799316, - "timestamp-ms" : 1719580921764, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "2", - "added-records" : "7690", - "deleted-records" : "7690", - "added-files-size" : "399010", - "removed-files-size" : "473445", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-6238750566879819059-1-d76c5203-4f0a-46ef-a293-268e0afec64b.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 2276968461870063565, - "parent-snapshot-id" : 6238750566879819059, - "timestamp-ms" : 1719580922113, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "changed-partition-count" : "0", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-2276968461870063565-1-5ee46b42-10e4-401d-8f61-2bd3b5ebb548.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580919873, - "snapshot-id" : 9145725745960929259 - }, { - "timestamp-ms" : 1719580920785, - "snapshot-id" : 8671490307245765264 - }, { - "timestamp-ms" : 1719580921348, - "snapshot-id" : 4543110679664799316 - }, { - "timestamp-ms" : 1719580921764, - "snapshot-id" : 6238750566879819059 - }, { - "timestamp-ms" : 1719580922113, - "snapshot-id" : 2276968461870063565 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580919873, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580920785, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - }, { - "timestamp-ms" : 1719580921348, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v3.metadata.json" - }, { - "timestamp-ms" : 1719580921764, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v4.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v6.metadata.json b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v6.metadata.json deleted file mode 100644 index 9162f1b..0000000 --- a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v6.metadata.json +++ /dev/null @@ -1,353 +0,0 @@ -{ - "format-version" : 1, - "table-uuid" : "2e23a4d3-2f64-47ac-aad6-f37df92836a1", - "location" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table", - "last-updated-ms" : 1719580922559, - "last-column-id" : 15, - "schema" : { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - }, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - } ], - "partition-spec" : [ ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd" - }, - "current-snapshot-id" : 1692767036460164714, - "refs" : { - "main" : { - "snapshot-id" : 1692767036460164714, - "type" : "branch" - } - }, - "snapshots" : [ { - "snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580919873, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440845", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 8671490307245765264, - "parent-snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580920785, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6005", - "deleted-records" : "6005", - "added-files-size" : "340114", - "removed-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "340114", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-8671490307245765264-1-4aa40041-ccc6-4e64-a9ab-366875aafd63.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 4543110679664799316, - "parent-snapshot-id" : 8671490307245765264, - "timestamp-ms" : 1719580921348, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133331", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "473445", - "total-data-files" : "2", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4543110679664799316-1-e3febcc2-7f11-44b9-80af-571fb1c0463a.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 6238750566879819059, - "parent-snapshot-id" : 4543110679664799316, - "timestamp-ms" : 1719580921764, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "2", - "added-records" : "7690", - "deleted-records" : "7690", - "added-files-size" : "399010", - "removed-files-size" : "473445", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-6238750566879819059-1-d76c5203-4f0a-46ef-a293-268e0afec64b.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 2276968461870063565, - "parent-snapshot-id" : 6238750566879819059, - "timestamp-ms" : 1719580922113, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "changed-partition-count" : "0", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-2276968461870063565-1-5ee46b42-10e4-401d-8f61-2bd3b5ebb548.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 1692767036460164714, - "parent-snapshot-id" : 2276968461870063565, - "timestamp-ms" : 1719580922559, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "7690", - "deleted-records" : "7690", - "added-files-size" : "399010", - "removed-files-size" : "399010", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-1692767036460164714-1-0f120788-1926-4605-a1ab-450f4cf3ccee.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580919873, - "snapshot-id" : 9145725745960929259 - }, { - "timestamp-ms" : 1719580920785, - "snapshot-id" : 8671490307245765264 - }, { - "timestamp-ms" : 1719580921348, - "snapshot-id" : 4543110679664799316 - }, { - "timestamp-ms" : 1719580921764, - "snapshot-id" : 6238750566879819059 - }, { - "timestamp-ms" : 1719580922113, - "snapshot-id" : 2276968461870063565 - }, { - "timestamp-ms" : 1719580922559, - "snapshot-id" : 1692767036460164714 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580919873, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580920785, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - }, { - "timestamp-ms" : 1719580921348, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v3.metadata.json" - }, { - "timestamp-ms" : 1719580921764, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v4.metadata.json" - }, { - "timestamp-ms" : 1719580922113, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v5.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v7.metadata.json b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v7.metadata.json deleted file mode 100644 index 744af3d..0000000 --- a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v7.metadata.json +++ /dev/null @@ -1,445 +0,0 @@ -{ - "format-version" : 1, - "table-uuid" : "2e23a4d3-2f64-47ac-aad6-f37df92836a1", - "location" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table", - "last-updated-ms" : 1719580922734, - "last-column-id" : 16, - "schema" : { - "type" : "struct", - "schema-id" : 1, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - }, { - "id" : 16, - "name" : "schema_evol_added_col_1", - "required" : false, - "type" : "int" - } ] - }, - "current-schema-id" : 1, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - }, { - "type" : "struct", - "schema-id" : 1, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - }, { - "id" : 16, - "name" : "schema_evol_added_col_1", - "required" : false, - "type" : "int" - } ] - } ], - "partition-spec" : [ ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd" - }, - "current-snapshot-id" : 1692767036460164714, - "refs" : { - "main" : { - "snapshot-id" : 1692767036460164714, - "type" : "branch" - } - }, - "snapshots" : [ { - "snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580919873, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440845", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 8671490307245765264, - "parent-snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580920785, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6005", - "deleted-records" : "6005", - "added-files-size" : "340114", - "removed-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "340114", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-8671490307245765264-1-4aa40041-ccc6-4e64-a9ab-366875aafd63.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 4543110679664799316, - "parent-snapshot-id" : 8671490307245765264, - "timestamp-ms" : 1719580921348, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133331", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "473445", - "total-data-files" : "2", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4543110679664799316-1-e3febcc2-7f11-44b9-80af-571fb1c0463a.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 6238750566879819059, - "parent-snapshot-id" : 4543110679664799316, - "timestamp-ms" : 1719580921764, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "2", - "added-records" : "7690", - "deleted-records" : "7690", - "added-files-size" : "399010", - "removed-files-size" : "473445", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-6238750566879819059-1-d76c5203-4f0a-46ef-a293-268e0afec64b.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 2276968461870063565, - "parent-snapshot-id" : 6238750566879819059, - "timestamp-ms" : 1719580922113, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "changed-partition-count" : "0", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-2276968461870063565-1-5ee46b42-10e4-401d-8f61-2bd3b5ebb548.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 1692767036460164714, - "parent-snapshot-id" : 2276968461870063565, - "timestamp-ms" : 1719580922559, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "7690", - "deleted-records" : "7690", - "added-files-size" : "399010", - "removed-files-size" : "399010", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-1692767036460164714-1-0f120788-1926-4605-a1ab-450f4cf3ccee.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580919873, - "snapshot-id" : 9145725745960929259 - }, { - "timestamp-ms" : 1719580920785, - "snapshot-id" : 8671490307245765264 - }, { - "timestamp-ms" : 1719580921348, - "snapshot-id" : 4543110679664799316 - }, { - "timestamp-ms" : 1719580921764, - "snapshot-id" : 6238750566879819059 - }, { - "timestamp-ms" : 1719580922113, - "snapshot-id" : 2276968461870063565 - }, { - "timestamp-ms" : 1719580922559, - "snapshot-id" : 1692767036460164714 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580919873, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580920785, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - }, { - "timestamp-ms" : 1719580921348, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v3.metadata.json" - }, { - "timestamp-ms" : 1719580921764, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v4.metadata.json" - }, { - "timestamp-ms" : 1719580922113, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v5.metadata.json" - }, { - "timestamp-ms" : 1719580922559, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v6.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v8.metadata.json b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v8.metadata.json deleted file mode 100644 index ede9cca..0000000 --- a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v8.metadata.json +++ /dev/null @@ -1,474 +0,0 @@ -{ - "format-version" : 1, - "table-uuid" : "2e23a4d3-2f64-47ac-aad6-f37df92836a1", - "location" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table", - "last-updated-ms" : 1719580923120, - "last-column-id" : 16, - "schema" : { - "type" : "struct", - "schema-id" : 1, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - }, { - "id" : 16, - "name" : "schema_evol_added_col_1", - "required" : false, - "type" : "int" - } ] - }, - "current-schema-id" : 1, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - }, { - "type" : "struct", - "schema-id" : 1, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - }, { - "id" : 16, - "name" : "schema_evol_added_col_1", - "required" : false, - "type" : "int" - } ] - } ], - "partition-spec" : [ ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd" - }, - "current-snapshot-id" : 4407328776463037310, - "refs" : { - "main" : { - "snapshot-id" : 4407328776463037310, - "type" : "branch" - } - }, - "snapshots" : [ { - "snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580919873, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440845", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 8671490307245765264, - "parent-snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580920785, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6005", - "deleted-records" : "6005", - "added-files-size" : "340114", - "removed-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "340114", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-8671490307245765264-1-4aa40041-ccc6-4e64-a9ab-366875aafd63.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 4543110679664799316, - "parent-snapshot-id" : 8671490307245765264, - "timestamp-ms" : 1719580921348, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133331", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "473445", - "total-data-files" : "2", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4543110679664799316-1-e3febcc2-7f11-44b9-80af-571fb1c0463a.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 6238750566879819059, - "parent-snapshot-id" : 4543110679664799316, - "timestamp-ms" : 1719580921764, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "2", - "added-records" : "7690", - "deleted-records" : "7690", - "added-files-size" : "399010", - "removed-files-size" : "473445", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-6238750566879819059-1-d76c5203-4f0a-46ef-a293-268e0afec64b.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 2276968461870063565, - "parent-snapshot-id" : 6238750566879819059, - "timestamp-ms" : 1719580922113, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "changed-partition-count" : "0", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-2276968461870063565-1-5ee46b42-10e4-401d-8f61-2bd3b5ebb548.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 1692767036460164714, - "parent-snapshot-id" : 2276968461870063565, - "timestamp-ms" : 1719580922559, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "7690", - "deleted-records" : "7690", - "added-files-size" : "399010", - "removed-files-size" : "399010", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-1692767036460164714-1-0f120788-1926-4605-a1ab-450f4cf3ccee.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 4407328776463037310, - "parent-snapshot-id" : 1692767036460164714, - "timestamp-ms" : 1719580923120, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "7690", - "deleted-records" : "7690", - "added-files-size" : "400831", - "removed-files-size" : "399010", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "400831", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4407328776463037310-1-c091e891-ac3a-4429-be9a-e63f1ed63b99.avro", - "schema-id" : 1 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580919873, - "snapshot-id" : 9145725745960929259 - }, { - "timestamp-ms" : 1719580920785, - "snapshot-id" : 8671490307245765264 - }, { - "timestamp-ms" : 1719580921348, - "snapshot-id" : 4543110679664799316 - }, { - "timestamp-ms" : 1719580921764, - "snapshot-id" : 6238750566879819059 - }, { - "timestamp-ms" : 1719580922113, - "snapshot-id" : 2276968461870063565 - }, { - "timestamp-ms" : 1719580922559, - "snapshot-id" : 1692767036460164714 - }, { - "timestamp-ms" : 1719580923120, - "snapshot-id" : 4407328776463037310 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580919873, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580920785, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - }, { - "timestamp-ms" : 1719580921348, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v3.metadata.json" - }, { - "timestamp-ms" : 1719580921764, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v4.metadata.json" - }, { - "timestamp-ms" : 1719580922113, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v5.metadata.json" - }, { - "timestamp-ms" : 1719580922559, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v6.metadata.json" - }, { - "timestamp-ms" : 1719580922734, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v7.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v9.metadata.json b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v9.metadata.json deleted file mode 100644 index 6d60244..0000000 --- a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v9.metadata.json +++ /dev/null @@ -1,561 +0,0 @@ -{ - "format-version" : 1, - "table-uuid" : "2e23a4d3-2f64-47ac-aad6-f37df92836a1", - "location" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table", - "last-updated-ms" : 1719580923295, - "last-column-id" : 16, - "schema" : { - "type" : "struct", - "schema-id" : 2, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - }, { - "id" : 16, - "name" : "schema_evol_added_col_1", - "required" : false, - "type" : "long" - } ] - }, - "current-schema-id" : 2, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - }, { - "type" : "struct", - "schema-id" : 1, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - }, { - "id" : 16, - "name" : "schema_evol_added_col_1", - "required" : false, - "type" : "int" - } ] - }, { - "type" : "struct", - "schema-id" : 2, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - }, { - "id" : 16, - "name" : "schema_evol_added_col_1", - "required" : false, - "type" : "long" - } ] - } ], - "partition-spec" : [ ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd" - }, - "current-snapshot-id" : 4407328776463037310, - "refs" : { - "main" : { - "snapshot-id" : 4407328776463037310, - "type" : "branch" - } - }, - "snapshots" : [ { - "snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580919873, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440845", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-9145725745960929259-1-7723fb1b-ae48-49de-9e77-cd7945667cb9.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 8671490307245765264, - "parent-snapshot-id" : 9145725745960929259, - "timestamp-ms" : 1719580920785, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6005", - "deleted-records" : "6005", - "added-files-size" : "340114", - "removed-files-size" : "440845", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "340114", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-8671490307245765264-1-4aa40041-ccc6-4e64-a9ab-366875aafd63.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 4543110679664799316, - "parent-snapshot-id" : 8671490307245765264, - "timestamp-ms" : 1719580921348, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133331", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "473445", - "total-data-files" : "2", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4543110679664799316-1-e3febcc2-7f11-44b9-80af-571fb1c0463a.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 6238750566879819059, - "parent-snapshot-id" : 4543110679664799316, - "timestamp-ms" : 1719580921764, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "2", - "added-records" : "7690", - "deleted-records" : "7690", - "added-files-size" : "399010", - "removed-files-size" : "473445", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-6238750566879819059-1-d76c5203-4f0a-46ef-a293-268e0afec64b.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 2276968461870063565, - "parent-snapshot-id" : 6238750566879819059, - "timestamp-ms" : 1719580922113, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "changed-partition-count" : "0", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-2276968461870063565-1-5ee46b42-10e4-401d-8f61-2bd3b5ebb548.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 1692767036460164714, - "parent-snapshot-id" : 2276968461870063565, - "timestamp-ms" : 1719580922559, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "7690", - "deleted-records" : "7690", - "added-files-size" : "399010", - "removed-files-size" : "399010", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "399010", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-1692767036460164714-1-0f120788-1926-4605-a1ab-450f4cf3ccee.avro", - "schema-id" : 0 - }, { - "snapshot-id" : 4407328776463037310, - "parent-snapshot-id" : 1692767036460164714, - "timestamp-ms" : 1719580923120, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580917302", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "7690", - "deleted-records" : "7690", - "added-files-size" : "400831", - "removed-files-size" : "399010", - "changed-partition-count" : "1", - "total-records" : "7690", - "total-files-size" : "400831", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/snap-4407328776463037310-1-c091e891-ac3a-4429-be9a-e63f1ed63b99.avro", - "schema-id" : 1 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580919873, - "snapshot-id" : 9145725745960929259 - }, { - "timestamp-ms" : 1719580920785, - "snapshot-id" : 8671490307245765264 - }, { - "timestamp-ms" : 1719580921348, - "snapshot-id" : 4543110679664799316 - }, { - "timestamp-ms" : 1719580921764, - "snapshot-id" : 6238750566879819059 - }, { - "timestamp-ms" : 1719580922113, - "snapshot-id" : 2276968461870063565 - }, { - "timestamp-ms" : 1719580922559, - "snapshot-id" : 1692767036460164714 - }, { - "timestamp-ms" : 1719580923120, - "snapshot-id" : 4407328776463037310 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580919873, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580920785, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - }, { - "timestamp-ms" : 1719580921348, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v3.metadata.json" - }, { - "timestamp-ms" : 1719580921764, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v4.metadata.json" - }, { - "timestamp-ms" : 1719580922113, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v5.metadata.json" - }, { - "timestamp-ms" : 1719580922559, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v6.metadata.json" - }, { - "timestamp-ms" : 1719580922734, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v7.metadata.json" - }, { - "timestamp-ms" : 1719580923120, - "metadata-file" : "data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v8.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/version-hint.text b/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/version-hint.text deleted file mode 100644 index f11c82a..0000000 --- a/data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/version-hint.text +++ /dev/null @@ -1 +0,0 @@ -9 \ No newline at end of file diff --git a/data/iceberg/generated_spec2_0_001/expected_results/last/count.csv b/data/iceberg/generated_spec2_0_001/expected_results/last/count.csv deleted file mode 100644 index 1221195..0000000 --- a/data/iceberg/generated_spec2_0_001/expected_results/last/count.csv +++ /dev/null @@ -1,2 +0,0 @@ -count -6592 \ No newline at end of file diff --git a/data/iceberg/generated_spec2_0_001/expected_results/last/data/._SUCCESS.crc b/data/iceberg/generated_spec2_0_001/expected_results/last/data/._SUCCESS.crc deleted file mode 100644 index 3b7b044..0000000 Binary files a/data/iceberg/generated_spec2_0_001/expected_results/last/data/._SUCCESS.crc and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/expected_results/last/data/.part-00000-5aa13b42-2ef5-483d-a6a0-f5bf9eac87c4-c000.snappy.parquet.crc b/data/iceberg/generated_spec2_0_001/expected_results/last/data/.part-00000-5aa13b42-2ef5-483d-a6a0-f5bf9eac87c4-c000.snappy.parquet.crc deleted file mode 100644 index a0d706d..0000000 Binary files a/data/iceberg/generated_spec2_0_001/expected_results/last/data/.part-00000-5aa13b42-2ef5-483d-a6a0-f5bf9eac87c4-c000.snappy.parquet.crc and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/expected_results/last/data/.part-00001-5aa13b42-2ef5-483d-a6a0-f5bf9eac87c4-c000.snappy.parquet.crc b/data/iceberg/generated_spec2_0_001/expected_results/last/data/.part-00001-5aa13b42-2ef5-483d-a6a0-f5bf9eac87c4-c000.snappy.parquet.crc deleted file mode 100644 index 2f43db6..0000000 Binary files a/data/iceberg/generated_spec2_0_001/expected_results/last/data/.part-00001-5aa13b42-2ef5-483d-a6a0-f5bf9eac87c4-c000.snappy.parquet.crc and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/expected_results/last/data/part-00000-5aa13b42-2ef5-483d-a6a0-f5bf9eac87c4-c000.snappy.parquet b/data/iceberg/generated_spec2_0_001/expected_results/last/data/part-00000-5aa13b42-2ef5-483d-a6a0-f5bf9eac87c4-c000.snappy.parquet deleted file mode 100644 index 53283bf..0000000 Binary files a/data/iceberg/generated_spec2_0_001/expected_results/last/data/part-00000-5aa13b42-2ef5-483d-a6a0-f5bf9eac87c4-c000.snappy.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/expected_results/last/data/part-00001-5aa13b42-2ef5-483d-a6a0-f5bf9eac87c4-c000.snappy.parquet b/data/iceberg/generated_spec2_0_001/expected_results/last/data/part-00001-5aa13b42-2ef5-483d-a6a0-f5bf9eac87c4-c000.snappy.parquet deleted file mode 100644 index 6d6fe6d..0000000 Binary files a/data/iceberg/generated_spec2_0_001/expected_results/last/data/part-00001-5aa13b42-2ef5-483d-a6a0-f5bf9eac87c4-c000.snappy.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/expected_results/last/query.sql b/data/iceberg/generated_spec2_0_001/expected_results/last/query.sql deleted file mode 100644 index 6c7f91c..0000000 --- a/data/iceberg/generated_spec2_0_001/expected_results/last/query.sql +++ /dev/null @@ -1,3 +0,0 @@ --- The query executed at this step: -ALTER TABLE iceberg_catalog.pyspark_iceberg_table -ALTER COLUMN schema_evol_added_col_1 TYPE BIGINT; \ No newline at end of file diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-1-3e88ec3a-0596-440f-9ce6-3debf172be49-00001.parquet b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-1-3e88ec3a-0596-440f-9ce6-3debf172be49-00001.parquet deleted file mode 100644 index 6837339..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-1-3e88ec3a-0596-440f-9ce6-3debf172be49-00001.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-12-ac52ac46-8deb-43f9-b745-e7c078928b7a-00001-deletes.parquet b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-12-ac52ac46-8deb-43f9-b745-e7c078928b7a-00001-deletes.parquet deleted file mode 100644 index 4d44760..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-12-ac52ac46-8deb-43f9-b745-e7c078928b7a-00001-deletes.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-12-ac52ac46-8deb-43f9-b745-e7c078928b7a-00001.parquet b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-12-ac52ac46-8deb-43f9-b745-e7c078928b7a-00001.parquet deleted file mode 100644 index b3c12c7..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-12-ac52ac46-8deb-43f9-b745-e7c078928b7a-00001.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-24-3a7a66b3-bd3a-4417-b6a9-45cb309eddc2-00001.parquet b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-24-3a7a66b3-bd3a-4417-b6a9-45cb309eddc2-00001.parquet deleted file mode 100644 index 98c2d49..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-24-3a7a66b3-bd3a-4417-b6a9-45cb309eddc2-00001.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-3-1c142ffe-c3f5-4089-9820-f2a530d50754-00001-deletes.parquet b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-3-1c142ffe-c3f5-4089-9820-f2a530d50754-00001-deletes.parquet deleted file mode 100644 index 47329b2..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-3-1c142ffe-c3f5-4089-9820-f2a530d50754-00001-deletes.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-3-1c142ffe-c3f5-4089-9820-f2a530d50754-00001.parquet b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-3-1c142ffe-c3f5-4089-9820-f2a530d50754-00001.parquet deleted file mode 100644 index 979ad92..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-3-1c142ffe-c3f5-4089-9820-f2a530d50754-00001.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-46-08e25db5-5199-4416-8916-bfb07212b1fb-00001-deletes.parquet b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-46-08e25db5-5199-4416-8916-bfb07212b1fb-00001-deletes.parquet deleted file mode 100644 index 99aafa1..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-46-08e25db5-5199-4416-8916-bfb07212b1fb-00001-deletes.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-46-08e25db5-5199-4416-8916-bfb07212b1fb-00001.parquet b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-46-08e25db5-5199-4416-8916-bfb07212b1fb-00001.parquet deleted file mode 100644 index c242949..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-46-08e25db5-5199-4416-8916-bfb07212b1fb-00001.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-7-3be35a72-224f-475b-a0eb-34cea92784b4-00001.parquet b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-7-3be35a72-224f-475b-a0eb-34cea92784b4-00001.parquet deleted file mode 100644 index 285069d..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/data/00000-7-3be35a72-224f-475b-a0eb-34cea92784b4-00001.parquet and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/26871791-3133-4757-9cbc-b356c613c83a-m0.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/26871791-3133-4757-9cbc-b356c613c83a-m0.avro deleted file mode 100644 index a922251..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/26871791-3133-4757-9cbc-b356c613c83a-m0.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/355a32d2-0d4f-4da3-8019-f0b782863350-m0.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/355a32d2-0d4f-4da3-8019-f0b782863350-m0.avro deleted file mode 100644 index f0b73f1..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/355a32d2-0d4f-4da3-8019-f0b782863350-m0.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/355a32d2-0d4f-4da3-8019-f0b782863350-m1.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/355a32d2-0d4f-4da3-8019-f0b782863350-m1.avro deleted file mode 100644 index d7b9a64..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/355a32d2-0d4f-4da3-8019-f0b782863350-m1.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/7c6f85be-3a33-4e3a-817d-7839fa44ff07-m0.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/7c6f85be-3a33-4e3a-817d-7839fa44ff07-m0.avro deleted file mode 100644 index 19d600f..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/7c6f85be-3a33-4e3a-817d-7839fa44ff07-m0.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/7c6f85be-3a33-4e3a-817d-7839fa44ff07-m1.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/7c6f85be-3a33-4e3a-817d-7839fa44ff07-m1.avro deleted file mode 100644 index c8d664d..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/7c6f85be-3a33-4e3a-817d-7839fa44ff07-m1.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/9ae37730-f1aa-4609-8b39-3f0ded6f78cf-m0.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/9ae37730-f1aa-4609-8b39-3f0ded6f78cf-m0.avro deleted file mode 100644 index 92c4470..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/9ae37730-f1aa-4609-8b39-3f0ded6f78cf-m0.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/b467c132-3bea-404a-ae0f-54ef5a4fbd1f-m0.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/b467c132-3bea-404a-ae0f-54ef5a4fbd1f-m0.avro deleted file mode 100644 index e28081d..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/b467c132-3bea-404a-ae0f-54ef5a4fbd1f-m0.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/b467c132-3bea-404a-ae0f-54ef5a4fbd1f-m1.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/b467c132-3bea-404a-ae0f-54ef5a4fbd1f-m1.avro deleted file mode 100644 index 9101316..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/b467c132-3bea-404a-ae0f-54ef5a4fbd1f-m1.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/c958489b-0a9b-4c1a-b254-f7162a3fbd6b-m0.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/c958489b-0a9b-4c1a-b254-f7162a3fbd6b-m0.avro deleted file mode 100644 index 9a6b6d5..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/c958489b-0a9b-4c1a-b254-f7162a3fbd6b-m0.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/c958489b-0a9b-4c1a-b254-f7162a3fbd6b-m1.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/c958489b-0a9b-4c1a-b254-f7162a3fbd6b-m1.avro deleted file mode 100644 index 31716e1..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/c958489b-0a9b-4c1a-b254-f7162a3fbd6b-m1.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-3119545726281138740-1-e3f073e1-20d8-4831-b927-86100e4ad98c.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-3119545726281138740-1-e3f073e1-20d8-4831-b927-86100e4ad98c.avro deleted file mode 100644 index fafc511..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-3119545726281138740-1-e3f073e1-20d8-4831-b927-86100e4ad98c.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4037069315291880534-1-c958489b-0a9b-4c1a-b254-f7162a3fbd6b.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4037069315291880534-1-c958489b-0a9b-4c1a-b254-f7162a3fbd6b.avro deleted file mode 100644 index 3df6c65..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4037069315291880534-1-c958489b-0a9b-4c1a-b254-f7162a3fbd6b.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4440319347650982524-1-b467c132-3bea-404a-ae0f-54ef5a4fbd1f.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4440319347650982524-1-b467c132-3bea-404a-ae0f-54ef5a4fbd1f.avro deleted file mode 100644 index f4da2fc..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4440319347650982524-1-b467c132-3bea-404a-ae0f-54ef5a4fbd1f.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4786266686210019019-1-7c6f85be-3a33-4e3a-817d-7839fa44ff07.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4786266686210019019-1-7c6f85be-3a33-4e3a-817d-7839fa44ff07.avro deleted file mode 100644 index 0da1497..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4786266686210019019-1-7c6f85be-3a33-4e3a-817d-7839fa44ff07.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6287117141668015642-1-9ae37730-f1aa-4609-8b39-3f0ded6f78cf.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6287117141668015642-1-9ae37730-f1aa-4609-8b39-3f0ded6f78cf.avro deleted file mode 100644 index 8f34f30..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6287117141668015642-1-9ae37730-f1aa-4609-8b39-3f0ded6f78cf.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6585012225877417653-1-355a32d2-0d4f-4da3-8019-f0b782863350.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6585012225877417653-1-355a32d2-0d4f-4da3-8019-f0b782863350.avro deleted file mode 100644 index 3e9ee6b..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6585012225877417653-1-355a32d2-0d4f-4da3-8019-f0b782863350.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-764624380497366583-1-26871791-3133-4757-9cbc-b356c613c83a.avro b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-764624380497366583-1-26871791-3133-4757-9cbc-b356c613c83a.avro deleted file mode 100644 index aab9ece..0000000 Binary files a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-764624380497366583-1-26871791-3133-4757-9cbc-b356c613c83a.avro and /dev/null differ diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v1.metadata.json b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v1.metadata.json deleted file mode 100644 index 3c9acfc..0000000 --- a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v1.metadata.json +++ /dev/null @@ -1,139 +0,0 @@ -{ - "format-version" : 2, - "table-uuid" : "7c10a28a-8931-4e12-8142-0befc8b0eed7", - "location" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table", - "last-sequence-number" : 1, - "last-updated-ms" : 1719580927570, - "last-column-id" : 15, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - } ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd", - "write.update.mode" : "merge-on-read" - }, - "current-snapshot-id" : 764624380497366583, - "refs" : { - "main" : { - "snapshot-id" : 764624380497366583, - "type" : "branch" - } - }, - "snapshots" : [ { - "sequence-number" : 1, - "snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580927570, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440835", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440835", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-764624380497366583-1-26871791-3133-4757-9cbc-b356c613c83a.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580927570, - "snapshot-id" : 764624380497366583 - } ], - "metadata-log" : [ ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v2.metadata.json b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v2.metadata.json deleted file mode 100644 index bae2e24..0000000 --- a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v2.metadata.json +++ /dev/null @@ -1,169 +0,0 @@ -{ - "format-version" : 2, - "table-uuid" : "7c10a28a-8931-4e12-8142-0befc8b0eed7", - "location" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table", - "last-sequence-number" : 2, - "last-updated-ms" : 1719580928275, - "last-column-id" : 15, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - } ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd", - "write.update.mode" : "merge-on-read" - }, - "current-snapshot-id" : 4037069315291880534, - "refs" : { - "main" : { - "snapshot-id" : 4037069315291880534, - "type" : "branch" - } - }, - "snapshots" : [ { - "sequence-number" : 1, - "snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580927570, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440835", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440835", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-764624380497366583-1-26871791-3133-4757-9cbc-b356c613c83a.avro", - "schema-id" : 0 - }, { - "sequence-number" : 2, - "snapshot-id" : 4037069315291880534, - "parent-snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580928275, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "3077", - "added-files-size" : "114786", - "added-position-deletes" : "3077", - "changed-partition-count" : "1", - "total-records" : "9082", - "total-files-size" : "555621", - "total-data-files" : "2", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4037069315291880534-1-c958489b-0a9b-4c1a-b254-f7162a3fbd6b.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580927570, - "snapshot-id" : 764624380497366583 - }, { - "timestamp-ms" : 1719580928275, - "snapshot-id" : 4037069315291880534 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580927570, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v3.metadata.json b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v3.metadata.json deleted file mode 100644 index 7db94f1..0000000 --- a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v3.metadata.json +++ /dev/null @@ -1,196 +0,0 @@ -{ - "format-version" : 2, - "table-uuid" : "7c10a28a-8931-4e12-8142-0befc8b0eed7", - "location" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table", - "last-sequence-number" : 3, - "last-updated-ms" : 1719580929047, - "last-column-id" : 15, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - } ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd", - "write.update.mode" : "merge-on-read" - }, - "current-snapshot-id" : 6287117141668015642, - "refs" : { - "main" : { - "snapshot-id" : 6287117141668015642, - "type" : "branch" - } - }, - "snapshots" : [ { - "sequence-number" : 1, - "snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580927570, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440835", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440835", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-764624380497366583-1-26871791-3133-4757-9cbc-b356c613c83a.avro", - "schema-id" : 0 - }, { - "sequence-number" : 2, - "snapshot-id" : 4037069315291880534, - "parent-snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580928275, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "3077", - "added-files-size" : "114786", - "added-position-deletes" : "3077", - "changed-partition-count" : "1", - "total-records" : "9082", - "total-files-size" : "555621", - "total-data-files" : "2", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4037069315291880534-1-c958489b-0a9b-4c1a-b254-f7162a3fbd6b.avro", - "schema-id" : 0 - }, { - "sequence-number" : 3, - "snapshot-id" : 6287117141668015642, - "parent-snapshot-id" : 4037069315291880534, - "timestamp-ms" : 1719580929047, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133314", - "changed-partition-count" : "1", - "total-records" : "10767", - "total-files-size" : "688935", - "total-data-files" : "3", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6287117141668015642-1-9ae37730-f1aa-4609-8b39-3f0ded6f78cf.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580927570, - "snapshot-id" : 764624380497366583 - }, { - "timestamp-ms" : 1719580928275, - "snapshot-id" : 4037069315291880534 - }, { - "timestamp-ms" : 1719580929047, - "snapshot-id" : 6287117141668015642 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580927570, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580928275, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v4.metadata.json b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v4.metadata.json deleted file mode 100644 index edcbb4b..0000000 --- a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v4.metadata.json +++ /dev/null @@ -1,226 +0,0 @@ -{ - "format-version" : 2, - "table-uuid" : "7c10a28a-8931-4e12-8142-0befc8b0eed7", - "location" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table", - "last-sequence-number" : 4, - "last-updated-ms" : 1719580929661, - "last-column-id" : 15, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - } ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd", - "write.update.mode" : "merge-on-read" - }, - "current-snapshot-id" : 6585012225877417653, - "refs" : { - "main" : { - "snapshot-id" : 6585012225877417653, - "type" : "branch" - } - }, - "snapshots" : [ { - "sequence-number" : 1, - "snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580927570, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440835", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440835", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-764624380497366583-1-26871791-3133-4757-9cbc-b356c613c83a.avro", - "schema-id" : 0 - }, { - "sequence-number" : 2, - "snapshot-id" : 4037069315291880534, - "parent-snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580928275, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "3077", - "added-files-size" : "114786", - "added-position-deletes" : "3077", - "changed-partition-count" : "1", - "total-records" : "9082", - "total-files-size" : "555621", - "total-data-files" : "2", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4037069315291880534-1-c958489b-0a9b-4c1a-b254-f7162a3fbd6b.avro", - "schema-id" : 0 - }, { - "sequence-number" : 3, - "snapshot-id" : 6287117141668015642, - "parent-snapshot-id" : 4037069315291880534, - "timestamp-ms" : 1719580929047, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133314", - "changed-partition-count" : "1", - "total-records" : "10767", - "total-files-size" : "688935", - "total-data-files" : "3", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6287117141668015642-1-9ae37730-f1aa-4609-8b39-3f0ded6f78cf.avro", - "schema-id" : 0 - }, { - "sequence-number" : 4, - "snapshot-id" : 6585012225877417653, - "parent-snapshot-id" : 6287117141668015642, - "timestamp-ms" : 1719580929661, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "7690", - "added-files-size" : "410506", - "added-position-deletes" : "7690", - "changed-partition-count" : "1", - "total-records" : "18457", - "total-files-size" : "1099441", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6585012225877417653-1-355a32d2-0d4f-4da3-8019-f0b782863350.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580927570, - "snapshot-id" : 764624380497366583 - }, { - "timestamp-ms" : 1719580928275, - "snapshot-id" : 4037069315291880534 - }, { - "timestamp-ms" : 1719580929047, - "snapshot-id" : 6287117141668015642 - }, { - "timestamp-ms" : 1719580929661, - "snapshot-id" : 6585012225877417653 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580927570, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580928275, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - }, { - "timestamp-ms" : 1719580929047, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v3.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v5.metadata.json b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v5.metadata.json deleted file mode 100644 index 61b94fc..0000000 --- a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v5.metadata.json +++ /dev/null @@ -1,256 +0,0 @@ -{ - "format-version" : 2, - "table-uuid" : "7c10a28a-8931-4e12-8142-0befc8b0eed7", - "location" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table", - "last-sequence-number" : 5, - "last-updated-ms" : 1719580930402, - "last-column-id" : 15, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - } ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd", - "write.update.mode" : "merge-on-read" - }, - "current-snapshot-id" : 4440319347650982524, - "refs" : { - "main" : { - "snapshot-id" : 4440319347650982524, - "type" : "branch" - } - }, - "snapshots" : [ { - "sequence-number" : 1, - "snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580927570, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440835", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440835", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-764624380497366583-1-26871791-3133-4757-9cbc-b356c613c83a.avro", - "schema-id" : 0 - }, { - "sequence-number" : 2, - "snapshot-id" : 4037069315291880534, - "parent-snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580928275, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "3077", - "added-files-size" : "114786", - "added-position-deletes" : "3077", - "changed-partition-count" : "1", - "total-records" : "9082", - "total-files-size" : "555621", - "total-data-files" : "2", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4037069315291880534-1-c958489b-0a9b-4c1a-b254-f7162a3fbd6b.avro", - "schema-id" : 0 - }, { - "sequence-number" : 3, - "snapshot-id" : 6287117141668015642, - "parent-snapshot-id" : 4037069315291880534, - "timestamp-ms" : 1719580929047, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133314", - "changed-partition-count" : "1", - "total-records" : "10767", - "total-files-size" : "688935", - "total-data-files" : "3", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6287117141668015642-1-9ae37730-f1aa-4609-8b39-3f0ded6f78cf.avro", - "schema-id" : 0 - }, { - "sequence-number" : 4, - "snapshot-id" : 6585012225877417653, - "parent-snapshot-id" : 6287117141668015642, - "timestamp-ms" : 1719580929661, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "7690", - "added-files-size" : "410506", - "added-position-deletes" : "7690", - "changed-partition-count" : "1", - "total-records" : "18457", - "total-files-size" : "1099441", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6585012225877417653-1-355a32d2-0d4f-4da3-8019-f0b782863350.avro", - "schema-id" : 0 - }, { - "sequence-number" : 5, - "snapshot-id" : 4440319347650982524, - "parent-snapshot-id" : 6585012225877417653, - "timestamp-ms" : 1719580930402, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6592", - "deleted-records" : "7690", - "added-files-size" : "333848", - "removed-files-size" : "388851", - "changed-partition-count" : "1", - "total-records" : "17359", - "total-files-size" : "1044438", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4440319347650982524-1-b467c132-3bea-404a-ae0f-54ef5a4fbd1f.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580927570, - "snapshot-id" : 764624380497366583 - }, { - "timestamp-ms" : 1719580928275, - "snapshot-id" : 4037069315291880534 - }, { - "timestamp-ms" : 1719580929047, - "snapshot-id" : 6287117141668015642 - }, { - "timestamp-ms" : 1719580929661, - "snapshot-id" : 6585012225877417653 - }, { - "timestamp-ms" : 1719580930402, - "snapshot-id" : 4440319347650982524 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580927570, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580928275, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - }, { - "timestamp-ms" : 1719580929047, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v3.metadata.json" - }, { - "timestamp-ms" : 1719580929661, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v4.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v6.metadata.json b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v6.metadata.json deleted file mode 100644 index 2f8a8fb..0000000 --- a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v6.metadata.json +++ /dev/null @@ -1,280 +0,0 @@ -{ - "format-version" : 2, - "table-uuid" : "7c10a28a-8931-4e12-8142-0befc8b0eed7", - "location" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table", - "last-sequence-number" : 6, - "last-updated-ms" : 1719580930749, - "last-column-id" : 15, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - } ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd", - "write.update.mode" : "merge-on-read" - }, - "current-snapshot-id" : 3119545726281138740, - "refs" : { - "main" : { - "snapshot-id" : 3119545726281138740, - "type" : "branch" - } - }, - "snapshots" : [ { - "sequence-number" : 1, - "snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580927570, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440835", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440835", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-764624380497366583-1-26871791-3133-4757-9cbc-b356c613c83a.avro", - "schema-id" : 0 - }, { - "sequence-number" : 2, - "snapshot-id" : 4037069315291880534, - "parent-snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580928275, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "3077", - "added-files-size" : "114786", - "added-position-deletes" : "3077", - "changed-partition-count" : "1", - "total-records" : "9082", - "total-files-size" : "555621", - "total-data-files" : "2", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4037069315291880534-1-c958489b-0a9b-4c1a-b254-f7162a3fbd6b.avro", - "schema-id" : 0 - }, { - "sequence-number" : 3, - "snapshot-id" : 6287117141668015642, - "parent-snapshot-id" : 4037069315291880534, - "timestamp-ms" : 1719580929047, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133314", - "changed-partition-count" : "1", - "total-records" : "10767", - "total-files-size" : "688935", - "total-data-files" : "3", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6287117141668015642-1-9ae37730-f1aa-4609-8b39-3f0ded6f78cf.avro", - "schema-id" : 0 - }, { - "sequence-number" : 4, - "snapshot-id" : 6585012225877417653, - "parent-snapshot-id" : 6287117141668015642, - "timestamp-ms" : 1719580929661, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "7690", - "added-files-size" : "410506", - "added-position-deletes" : "7690", - "changed-partition-count" : "1", - "total-records" : "18457", - "total-files-size" : "1099441", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6585012225877417653-1-355a32d2-0d4f-4da3-8019-f0b782863350.avro", - "schema-id" : 0 - }, { - "sequence-number" : 5, - "snapshot-id" : 4440319347650982524, - "parent-snapshot-id" : 6585012225877417653, - "timestamp-ms" : 1719580930402, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6592", - "deleted-records" : "7690", - "added-files-size" : "333848", - "removed-files-size" : "388851", - "changed-partition-count" : "1", - "total-records" : "17359", - "total-files-size" : "1044438", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4440319347650982524-1-b467c132-3bea-404a-ae0f-54ef5a4fbd1f.avro", - "schema-id" : 0 - }, { - "sequence-number" : 6, - "snapshot-id" : 3119545726281138740, - "parent-snapshot-id" : 4440319347650982524, - "timestamp-ms" : 1719580930749, - "summary" : { - "operation" : "delete", - "spark.app.id" : "local-1719580924876", - "changed-partition-count" : "0", - "total-records" : "17359", - "total-files-size" : "1044438", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-3119545726281138740-1-e3f073e1-20d8-4831-b927-86100e4ad98c.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580927570, - "snapshot-id" : 764624380497366583 - }, { - "timestamp-ms" : 1719580928275, - "snapshot-id" : 4037069315291880534 - }, { - "timestamp-ms" : 1719580929047, - "snapshot-id" : 6287117141668015642 - }, { - "timestamp-ms" : 1719580929661, - "snapshot-id" : 6585012225877417653 - }, { - "timestamp-ms" : 1719580930402, - "snapshot-id" : 4440319347650982524 - }, { - "timestamp-ms" : 1719580930749, - "snapshot-id" : 3119545726281138740 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580927570, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580928275, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - }, { - "timestamp-ms" : 1719580929047, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v3.metadata.json" - }, { - "timestamp-ms" : 1719580929661, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v4.metadata.json" - }, { - "timestamp-ms" : 1719580930402, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v5.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v7.metadata.json b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v7.metadata.json deleted file mode 100644 index 7dd8062..0000000 --- a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v7.metadata.json +++ /dev/null @@ -1,367 +0,0 @@ -{ - "format-version" : 2, - "table-uuid" : "7c10a28a-8931-4e12-8142-0befc8b0eed7", - "location" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table", - "last-sequence-number" : 6, - "last-updated-ms" : 1719580930997, - "last-column-id" : 16, - "current-schema-id" : 1, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - }, { - "type" : "struct", - "schema-id" : 1, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - }, { - "id" : 16, - "name" : "schema_evol_added_col_1", - "required" : false, - "type" : "int" - } ] - } ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd", - "write.update.mode" : "merge-on-read" - }, - "current-snapshot-id" : 3119545726281138740, - "refs" : { - "main" : { - "snapshot-id" : 3119545726281138740, - "type" : "branch" - } - }, - "snapshots" : [ { - "sequence-number" : 1, - "snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580927570, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440835", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440835", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-764624380497366583-1-26871791-3133-4757-9cbc-b356c613c83a.avro", - "schema-id" : 0 - }, { - "sequence-number" : 2, - "snapshot-id" : 4037069315291880534, - "parent-snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580928275, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "3077", - "added-files-size" : "114786", - "added-position-deletes" : "3077", - "changed-partition-count" : "1", - "total-records" : "9082", - "total-files-size" : "555621", - "total-data-files" : "2", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4037069315291880534-1-c958489b-0a9b-4c1a-b254-f7162a3fbd6b.avro", - "schema-id" : 0 - }, { - "sequence-number" : 3, - "snapshot-id" : 6287117141668015642, - "parent-snapshot-id" : 4037069315291880534, - "timestamp-ms" : 1719580929047, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133314", - "changed-partition-count" : "1", - "total-records" : "10767", - "total-files-size" : "688935", - "total-data-files" : "3", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6287117141668015642-1-9ae37730-f1aa-4609-8b39-3f0ded6f78cf.avro", - "schema-id" : 0 - }, { - "sequence-number" : 4, - "snapshot-id" : 6585012225877417653, - "parent-snapshot-id" : 6287117141668015642, - "timestamp-ms" : 1719580929661, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "7690", - "added-files-size" : "410506", - "added-position-deletes" : "7690", - "changed-partition-count" : "1", - "total-records" : "18457", - "total-files-size" : "1099441", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6585012225877417653-1-355a32d2-0d4f-4da3-8019-f0b782863350.avro", - "schema-id" : 0 - }, { - "sequence-number" : 5, - "snapshot-id" : 4440319347650982524, - "parent-snapshot-id" : 6585012225877417653, - "timestamp-ms" : 1719580930402, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6592", - "deleted-records" : "7690", - "added-files-size" : "333848", - "removed-files-size" : "388851", - "changed-partition-count" : "1", - "total-records" : "17359", - "total-files-size" : "1044438", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4440319347650982524-1-b467c132-3bea-404a-ae0f-54ef5a4fbd1f.avro", - "schema-id" : 0 - }, { - "sequence-number" : 6, - "snapshot-id" : 3119545726281138740, - "parent-snapshot-id" : 4440319347650982524, - "timestamp-ms" : 1719580930749, - "summary" : { - "operation" : "delete", - "spark.app.id" : "local-1719580924876", - "changed-partition-count" : "0", - "total-records" : "17359", - "total-files-size" : "1044438", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-3119545726281138740-1-e3f073e1-20d8-4831-b927-86100e4ad98c.avro", - "schema-id" : 0 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580927570, - "snapshot-id" : 764624380497366583 - }, { - "timestamp-ms" : 1719580928275, - "snapshot-id" : 4037069315291880534 - }, { - "timestamp-ms" : 1719580929047, - "snapshot-id" : 6287117141668015642 - }, { - "timestamp-ms" : 1719580929661, - "snapshot-id" : 6585012225877417653 - }, { - "timestamp-ms" : 1719580930402, - "snapshot-id" : 4440319347650982524 - }, { - "timestamp-ms" : 1719580930749, - "snapshot-id" : 3119545726281138740 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580927570, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580928275, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - }, { - "timestamp-ms" : 1719580929047, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v3.metadata.json" - }, { - "timestamp-ms" : 1719580929661, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v4.metadata.json" - }, { - "timestamp-ms" : 1719580930402, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v5.metadata.json" - }, { - "timestamp-ms" : 1719580930749, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v6.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v8.metadata.json b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v8.metadata.json deleted file mode 100644 index 56d07a9..0000000 --- a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v8.metadata.json +++ /dev/null @@ -1,397 +0,0 @@ -{ - "format-version" : 2, - "table-uuid" : "7c10a28a-8931-4e12-8142-0befc8b0eed7", - "location" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table", - "last-sequence-number" : 7, - "last-updated-ms" : 1719580931465, - "last-column-id" : 16, - "current-schema-id" : 1, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - }, { - "type" : "struct", - "schema-id" : 1, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - }, { - "id" : 16, - "name" : "schema_evol_added_col_1", - "required" : false, - "type" : "int" - } ] - } ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd", - "write.update.mode" : "merge-on-read" - }, - "current-snapshot-id" : 4786266686210019019, - "refs" : { - "main" : { - "snapshot-id" : 4786266686210019019, - "type" : "branch" - } - }, - "snapshots" : [ { - "sequence-number" : 1, - "snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580927570, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440835", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440835", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-764624380497366583-1-26871791-3133-4757-9cbc-b356c613c83a.avro", - "schema-id" : 0 - }, { - "sequence-number" : 2, - "snapshot-id" : 4037069315291880534, - "parent-snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580928275, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "3077", - "added-files-size" : "114786", - "added-position-deletes" : "3077", - "changed-partition-count" : "1", - "total-records" : "9082", - "total-files-size" : "555621", - "total-data-files" : "2", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4037069315291880534-1-c958489b-0a9b-4c1a-b254-f7162a3fbd6b.avro", - "schema-id" : 0 - }, { - "sequence-number" : 3, - "snapshot-id" : 6287117141668015642, - "parent-snapshot-id" : 4037069315291880534, - "timestamp-ms" : 1719580929047, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133314", - "changed-partition-count" : "1", - "total-records" : "10767", - "total-files-size" : "688935", - "total-data-files" : "3", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6287117141668015642-1-9ae37730-f1aa-4609-8b39-3f0ded6f78cf.avro", - "schema-id" : 0 - }, { - "sequence-number" : 4, - "snapshot-id" : 6585012225877417653, - "parent-snapshot-id" : 6287117141668015642, - "timestamp-ms" : 1719580929661, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "7690", - "added-files-size" : "410506", - "added-position-deletes" : "7690", - "changed-partition-count" : "1", - "total-records" : "18457", - "total-files-size" : "1099441", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6585012225877417653-1-355a32d2-0d4f-4da3-8019-f0b782863350.avro", - "schema-id" : 0 - }, { - "sequence-number" : 5, - "snapshot-id" : 4440319347650982524, - "parent-snapshot-id" : 6585012225877417653, - "timestamp-ms" : 1719580930402, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6592", - "deleted-records" : "7690", - "added-files-size" : "333848", - "removed-files-size" : "388851", - "changed-partition-count" : "1", - "total-records" : "17359", - "total-files-size" : "1044438", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4440319347650982524-1-b467c132-3bea-404a-ae0f-54ef5a4fbd1f.avro", - "schema-id" : 0 - }, { - "sequence-number" : 6, - "snapshot-id" : 3119545726281138740, - "parent-snapshot-id" : 4440319347650982524, - "timestamp-ms" : 1719580930749, - "summary" : { - "operation" : "delete", - "spark.app.id" : "local-1719580924876", - "changed-partition-count" : "0", - "total-records" : "17359", - "total-files-size" : "1044438", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-3119545726281138740-1-e3f073e1-20d8-4831-b927-86100e4ad98c.avro", - "schema-id" : 0 - }, { - "sequence-number" : 7, - "snapshot-id" : 4786266686210019019, - "parent-snapshot-id" : 3119545726281138740, - "timestamp-ms" : 1719580931465, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "685", - "added-files-size" : "51653", - "added-position-deletes" : "685", - "changed-partition-count" : "1", - "total-records" : "18044", - "total-files-size" : "1096091", - "total-data-files" : "5", - "total-delete-files" : "3", - "total-position-deletes" : "11452", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4786266686210019019-1-7c6f85be-3a33-4e3a-817d-7839fa44ff07.avro", - "schema-id" : 1 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580927570, - "snapshot-id" : 764624380497366583 - }, { - "timestamp-ms" : 1719580928275, - "snapshot-id" : 4037069315291880534 - }, { - "timestamp-ms" : 1719580929047, - "snapshot-id" : 6287117141668015642 - }, { - "timestamp-ms" : 1719580929661, - "snapshot-id" : 6585012225877417653 - }, { - "timestamp-ms" : 1719580930402, - "snapshot-id" : 4440319347650982524 - }, { - "timestamp-ms" : 1719580930749, - "snapshot-id" : 3119545726281138740 - }, { - "timestamp-ms" : 1719580931465, - "snapshot-id" : 4786266686210019019 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580927570, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580928275, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - }, { - "timestamp-ms" : 1719580929047, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v3.metadata.json" - }, { - "timestamp-ms" : 1719580929661, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v4.metadata.json" - }, { - "timestamp-ms" : 1719580930402, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v5.metadata.json" - }, { - "timestamp-ms" : 1719580930749, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v6.metadata.json" - }, { - "timestamp-ms" : 1719580930997, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v7.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v9.metadata.json b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v9.metadata.json deleted file mode 100644 index 85c01bd..0000000 --- a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v9.metadata.json +++ /dev/null @@ -1,484 +0,0 @@ -{ - "format-version" : 2, - "table-uuid" : "7c10a28a-8931-4e12-8142-0befc8b0eed7", - "location" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table", - "last-sequence-number" : 7, - "last-updated-ms" : 1719580931691, - "last-column-id" : 16, - "current-schema-id" : 2, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - } ] - }, { - "type" : "struct", - "schema-id" : 1, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - }, { - "id" : 16, - "name" : "schema_evol_added_col_1", - "required" : false, - "type" : "int" - } ] - }, { - "type" : "struct", - "schema-id" : 2, - "fields" : [ { - "id" : 1, - "name" : "l_orderkey_bool", - "required" : false, - "type" : "boolean" - }, { - "id" : 2, - "name" : "l_partkey_int", - "required" : false, - "type" : "int" - }, { - "id" : 3, - "name" : "l_suppkey_long", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "l_extendedprice_float", - "required" : false, - "type" : "float" - }, { - "id" : 5, - "name" : "l_extendedprice_double", - "required" : false, - "type" : "double" - }, { - "id" : 6, - "name" : "l_extendedprice_dec9_2", - "required" : false, - "type" : "decimal(9, 2)" - }, { - "id" : 7, - "name" : "l_extendedprice_dec18_6", - "required" : false, - "type" : "decimal(18, 6)" - }, { - "id" : 8, - "name" : "l_extendedprice_dec38_10", - "required" : false, - "type" : "decimal(38, 10)" - }, { - "id" : 9, - "name" : "l_shipdate_date", - "required" : false, - "type" : "date" - }, { - "id" : 10, - "name" : "l_partkey_time", - "required" : false, - "type" : "int" - }, { - "id" : 11, - "name" : "l_commitdate_timestamp", - "required" : false, - "type" : "timestamp" - }, { - "id" : 12, - "name" : "l_commitdate_timestamp_tz", - "required" : false, - "type" : "timestamptz" - }, { - "id" : 13, - "name" : "l_comment_string", - "required" : false, - "type" : "string" - }, { - "id" : 14, - "name" : "uuid", - "required" : false, - "type" : "string" - }, { - "id" : 15, - "name" : "l_comment_blob", - "required" : false, - "type" : "binary" - }, { - "id" : 16, - "name" : "schema_evol_added_col_1", - "required" : false, - "type" : "long" - } ] - } ], - "default-spec-id" : 0, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ ] - } ], - "last-partition-id" : 999, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "peter", - "write.parquet.compression-codec" : "zstd", - "write.update.mode" : "merge-on-read" - }, - "current-snapshot-id" : 4786266686210019019, - "refs" : { - "main" : { - "snapshot-id" : 4786266686210019019, - "type" : "branch" - } - }, - "snapshots" : [ { - "sequence-number" : 1, - "snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580927570, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "6005", - "added-files-size" : "440835", - "changed-partition-count" : "1", - "total-records" : "6005", - "total-files-size" : "440835", - "total-data-files" : "1", - "total-delete-files" : "0", - "total-position-deletes" : "0", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-764624380497366583-1-26871791-3133-4757-9cbc-b356c613c83a.avro", - "schema-id" : 0 - }, { - "sequence-number" : 2, - "snapshot-id" : 4037069315291880534, - "parent-snapshot-id" : 764624380497366583, - "timestamp-ms" : 1719580928275, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "3077", - "added-files-size" : "114786", - "added-position-deletes" : "3077", - "changed-partition-count" : "1", - "total-records" : "9082", - "total-files-size" : "555621", - "total-data-files" : "2", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4037069315291880534-1-c958489b-0a9b-4c1a-b254-f7162a3fbd6b.avro", - "schema-id" : 0 - }, { - "sequence-number" : 3, - "snapshot-id" : 6287117141668015642, - "parent-snapshot-id" : 4037069315291880534, - "timestamp-ms" : 1719580929047, - "summary" : { - "operation" : "append", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-records" : "1685", - "added-files-size" : "133314", - "changed-partition-count" : "1", - "total-records" : "10767", - "total-files-size" : "688935", - "total-data-files" : "3", - "total-delete-files" : "1", - "total-position-deletes" : "3077", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6287117141668015642-1-9ae37730-f1aa-4609-8b39-3f0ded6f78cf.avro", - "schema-id" : 0 - }, { - "sequence-number" : 4, - "snapshot-id" : 6585012225877417653, - "parent-snapshot-id" : 6287117141668015642, - "timestamp-ms" : 1719580929661, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "7690", - "added-files-size" : "410506", - "added-position-deletes" : "7690", - "changed-partition-count" : "1", - "total-records" : "18457", - "total-files-size" : "1099441", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-6585012225877417653-1-355a32d2-0d4f-4da3-8019-f0b782863350.avro", - "schema-id" : 0 - }, { - "sequence-number" : 5, - "snapshot-id" : 4440319347650982524, - "parent-snapshot-id" : 6585012225877417653, - "timestamp-ms" : 1719580930402, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "deleted-data-files" : "1", - "added-records" : "6592", - "deleted-records" : "7690", - "added-files-size" : "333848", - "removed-files-size" : "388851", - "changed-partition-count" : "1", - "total-records" : "17359", - "total-files-size" : "1044438", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4440319347650982524-1-b467c132-3bea-404a-ae0f-54ef5a4fbd1f.avro", - "schema-id" : 0 - }, { - "sequence-number" : 6, - "snapshot-id" : 3119545726281138740, - "parent-snapshot-id" : 4440319347650982524, - "timestamp-ms" : 1719580930749, - "summary" : { - "operation" : "delete", - "spark.app.id" : "local-1719580924876", - "changed-partition-count" : "0", - "total-records" : "17359", - "total-files-size" : "1044438", - "total-data-files" : "4", - "total-delete-files" : "2", - "total-position-deletes" : "10767", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-3119545726281138740-1-e3f073e1-20d8-4831-b927-86100e4ad98c.avro", - "schema-id" : 0 - }, { - "sequence-number" : 7, - "snapshot-id" : 4786266686210019019, - "parent-snapshot-id" : 3119545726281138740, - "timestamp-ms" : 1719580931465, - "summary" : { - "operation" : "overwrite", - "spark.app.id" : "local-1719580924876", - "added-data-files" : "1", - "added-position-delete-files" : "1", - "added-delete-files" : "1", - "added-records" : "685", - "added-files-size" : "51653", - "added-position-deletes" : "685", - "changed-partition-count" : "1", - "total-records" : "18044", - "total-files-size" : "1096091", - "total-data-files" : "5", - "total-delete-files" : "3", - "total-position-deletes" : "11452", - "total-equality-deletes" : "0" - }, - "manifest-list" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/snap-4786266686210019019-1-7c6f85be-3a33-4e3a-817d-7839fa44ff07.avro", - "schema-id" : 1 - } ], - "statistics" : [ ], - "snapshot-log" : [ { - "timestamp-ms" : 1719580927570, - "snapshot-id" : 764624380497366583 - }, { - "timestamp-ms" : 1719580928275, - "snapshot-id" : 4037069315291880534 - }, { - "timestamp-ms" : 1719580929047, - "snapshot-id" : 6287117141668015642 - }, { - "timestamp-ms" : 1719580929661, - "snapshot-id" : 6585012225877417653 - }, { - "timestamp-ms" : 1719580930402, - "snapshot-id" : 4440319347650982524 - }, { - "timestamp-ms" : 1719580930749, - "snapshot-id" : 3119545726281138740 - }, { - "timestamp-ms" : 1719580931465, - "snapshot-id" : 4786266686210019019 - } ], - "metadata-log" : [ { - "timestamp-ms" : 1719580927570, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v1.metadata.json" - }, { - "timestamp-ms" : 1719580928275, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v2.metadata.json" - }, { - "timestamp-ms" : 1719580929047, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v3.metadata.json" - }, { - "timestamp-ms" : 1719580929661, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v4.metadata.json" - }, { - "timestamp-ms" : 1719580930402, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v5.metadata.json" - }, { - "timestamp-ms" : 1719580930749, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v6.metadata.json" - }, { - "timestamp-ms" : 1719580930997, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v7.metadata.json" - }, { - "timestamp-ms" : 1719580931465, - "metadata-file" : "data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v8.metadata.json" - } ] -} \ No newline at end of file diff --git a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/version-hint.text b/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/version-hint.text deleted file mode 100644 index f11c82a..0000000 --- a/data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/version-hint.text +++ /dev/null @@ -1 +0,0 @@ -9 \ No newline at end of file diff --git a/data/bad_data/bad_iceberg_metadata.json b/data/persistent/bad_data/bad_iceberg_metadata.json similarity index 100% rename from data/bad_data/bad_iceberg_metadata.json rename to data/persistent/bad_data/bad_iceberg_metadata.json diff --git a/data/iceberg/lineitem_iceberg/README.md b/data/persistent/iceberg/lineitem_iceberg/README.md similarity index 100% rename from data/iceberg/lineitem_iceberg/README.md rename to data/persistent/iceberg/lineitem_iceberg/README.md diff --git a/data/iceberg/lineitem_iceberg/data/.00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet.crc b/data/persistent/iceberg/lineitem_iceberg/data/.00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet.crc similarity index 100% rename from data/iceberg/lineitem_iceberg/data/.00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet.crc rename to data/persistent/iceberg/lineitem_iceberg/data/.00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet.crc diff --git a/data/iceberg/lineitem_iceberg/data/.00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet.crc b/data/persistent/iceberg/lineitem_iceberg/data/.00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet.crc similarity index 100% rename from data/iceberg/lineitem_iceberg/data/.00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet.crc rename to data/persistent/iceberg/lineitem_iceberg/data/.00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet.crc diff --git a/data/iceberg/lineitem_iceberg/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet b/data/persistent/iceberg/lineitem_iceberg/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet similarity index 100% rename from data/iceberg/lineitem_iceberg/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet rename to data/persistent/iceberg/lineitem_iceberg/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet diff --git a/data/iceberg/lineitem_iceberg/data/00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet b/data/persistent/iceberg/lineitem_iceberg/data/00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet similarity index 100% rename from data/iceberg/lineitem_iceberg/data/00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet rename to data/persistent/iceberg/lineitem_iceberg/data/00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet diff --git a/data/iceberg/lineitem_iceberg/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro.crc b/data/persistent/iceberg/lineitem_iceberg/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro.crc similarity index 100% rename from data/iceberg/lineitem_iceberg/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro.crc rename to data/persistent/iceberg/lineitem_iceberg/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro.crc diff --git a/data/iceberg/lineitem_iceberg/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro.crc b/data/persistent/iceberg/lineitem_iceberg/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro.crc similarity index 100% rename from data/iceberg/lineitem_iceberg/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro.crc rename to data/persistent/iceberg/lineitem_iceberg/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro.crc diff --git a/data/iceberg/lineitem_iceberg/metadata/.cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro.crc b/data/persistent/iceberg/lineitem_iceberg/metadata/.cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro.crc similarity index 100% rename from data/iceberg/lineitem_iceberg/metadata/.cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro.crc rename to data/persistent/iceberg/lineitem_iceberg/metadata/.cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro.crc diff --git a/data/iceberg/lineitem_iceberg/metadata/.snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro.crc b/data/persistent/iceberg/lineitem_iceberg/metadata/.snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro.crc similarity index 100% rename from data/iceberg/lineitem_iceberg/metadata/.snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro.crc rename to data/persistent/iceberg/lineitem_iceberg/metadata/.snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro.crc diff --git a/data/iceberg/lineitem_iceberg/metadata/.snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro.crc b/data/persistent/iceberg/lineitem_iceberg/metadata/.snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro.crc similarity index 100% rename from data/iceberg/lineitem_iceberg/metadata/.snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro.crc rename to data/persistent/iceberg/lineitem_iceberg/metadata/.snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro.crc diff --git a/data/iceberg/lineitem_iceberg/metadata/.v1.metadata.json.crc b/data/persistent/iceberg/lineitem_iceberg/metadata/.v1.metadata.json.crc similarity index 100% rename from data/iceberg/lineitem_iceberg/metadata/.v1.metadata.json.crc rename to data/persistent/iceberg/lineitem_iceberg/metadata/.v1.metadata.json.crc diff --git a/data/iceberg/lineitem_iceberg/metadata/.v2.metadata.json.crc b/data/persistent/iceberg/lineitem_iceberg/metadata/.v2.metadata.json.crc similarity index 100% rename from data/iceberg/lineitem_iceberg/metadata/.v2.metadata.json.crc rename to data/persistent/iceberg/lineitem_iceberg/metadata/.v2.metadata.json.crc diff --git a/data/iceberg/lineitem_iceberg/metadata/.version-hint.text.crc b/data/persistent/iceberg/lineitem_iceberg/metadata/.version-hint.text.crc similarity index 100% rename from data/iceberg/lineitem_iceberg/metadata/.version-hint.text.crc rename to data/persistent/iceberg/lineitem_iceberg/metadata/.version-hint.text.crc diff --git a/data/iceberg/lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro b/data/persistent/iceberg/lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro similarity index 100% rename from data/iceberg/lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro rename to data/persistent/iceberg/lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro diff --git a/data/iceberg/lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro b/data/persistent/iceberg/lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro similarity index 100% rename from data/iceberg/lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro rename to data/persistent/iceberg/lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro diff --git a/data/iceberg/lineitem_iceberg/metadata/cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro b/data/persistent/iceberg/lineitem_iceberg/metadata/cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro similarity index 100% rename from data/iceberg/lineitem_iceberg/metadata/cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro rename to data/persistent/iceberg/lineitem_iceberg/metadata/cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro diff --git a/data/iceberg/lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro b/data/persistent/iceberg/lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro similarity index 100% rename from data/iceberg/lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro rename to data/persistent/iceberg/lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro diff --git a/data/iceberg/lineitem_iceberg/metadata/snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro b/data/persistent/iceberg/lineitem_iceberg/metadata/snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro similarity index 100% rename from data/iceberg/lineitem_iceberg/metadata/snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro rename to data/persistent/iceberg/lineitem_iceberg/metadata/snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro diff --git a/data/iceberg/lineitem_iceberg/metadata/v1.metadata.json b/data/persistent/iceberg/lineitem_iceberg/metadata/v1.metadata.json similarity index 100% rename from data/iceberg/lineitem_iceberg/metadata/v1.metadata.json rename to data/persistent/iceberg/lineitem_iceberg/metadata/v1.metadata.json diff --git a/data/iceberg/lineitem_iceberg/metadata/v2.metadata.json b/data/persistent/iceberg/lineitem_iceberg/metadata/v2.metadata.json similarity index 100% rename from data/iceberg/lineitem_iceberg/metadata/v2.metadata.json rename to data/persistent/iceberg/lineitem_iceberg/metadata/v2.metadata.json diff --git a/data/iceberg/lineitem_iceberg/metadata/version-hint.text b/data/persistent/iceberg/lineitem_iceberg/metadata/version-hint.text similarity index 100% rename from data/iceberg/lineitem_iceberg/metadata/version-hint.text rename to data/persistent/iceberg/lineitem_iceberg/metadata/version-hint.text diff --git a/data/iceberg/lineitem_iceberg_gz/data/.00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet.crc b/data/persistent/iceberg/lineitem_iceberg_gz/data/.00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet.crc similarity index 100% rename from data/iceberg/lineitem_iceberg_gz/data/.00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet.crc rename to data/persistent/iceberg/lineitem_iceberg_gz/data/.00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet.crc diff --git a/data/iceberg/lineitem_iceberg_gz/data/00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet b/data/persistent/iceberg/lineitem_iceberg_gz/data/00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet similarity index 100% rename from data/iceberg/lineitem_iceberg_gz/data/00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet rename to data/persistent/iceberg/lineitem_iceberg_gz/data/00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet diff --git a/data/iceberg/lineitem_iceberg_gz/metadata/.23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro.crc b/data/persistent/iceberg/lineitem_iceberg_gz/metadata/.23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro.crc similarity index 100% rename from data/iceberg/lineitem_iceberg_gz/metadata/.23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro.crc rename to data/persistent/iceberg/lineitem_iceberg_gz/metadata/.23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro.crc diff --git a/data/iceberg/lineitem_iceberg_gz/metadata/.snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro.crc b/data/persistent/iceberg/lineitem_iceberg_gz/metadata/.snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro.crc similarity index 100% rename from data/iceberg/lineitem_iceberg_gz/metadata/.snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro.crc rename to data/persistent/iceberg/lineitem_iceberg_gz/metadata/.snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro.crc diff --git a/data/iceberg/lineitem_iceberg_gz/metadata/.v1.gz.metadata.json.crc b/data/persistent/iceberg/lineitem_iceberg_gz/metadata/.v1.gz.metadata.json.crc similarity index 100% rename from data/iceberg/lineitem_iceberg_gz/metadata/.v1.gz.metadata.json.crc rename to data/persistent/iceberg/lineitem_iceberg_gz/metadata/.v1.gz.metadata.json.crc diff --git a/data/iceberg/lineitem_iceberg_gz/metadata/.v2.gz.metadata.json.crc b/data/persistent/iceberg/lineitem_iceberg_gz/metadata/.v2.gz.metadata.json.crc similarity index 100% rename from data/iceberg/lineitem_iceberg_gz/metadata/.v2.gz.metadata.json.crc rename to data/persistent/iceberg/lineitem_iceberg_gz/metadata/.v2.gz.metadata.json.crc diff --git a/data/iceberg/lineitem_iceberg_gz/metadata/.version-hint.text.crc b/data/persistent/iceberg/lineitem_iceberg_gz/metadata/.version-hint.text.crc similarity index 100% rename from data/iceberg/lineitem_iceberg_gz/metadata/.version-hint.text.crc rename to data/persistent/iceberg/lineitem_iceberg_gz/metadata/.version-hint.text.crc diff --git a/data/iceberg/lineitem_iceberg_gz/metadata/23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro b/data/persistent/iceberg/lineitem_iceberg_gz/metadata/23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro similarity index 100% rename from data/iceberg/lineitem_iceberg_gz/metadata/23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro rename to data/persistent/iceberg/lineitem_iceberg_gz/metadata/23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro diff --git a/data/iceberg/lineitem_iceberg_gz/metadata/snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro b/data/persistent/iceberg/lineitem_iceberg_gz/metadata/snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro similarity index 100% rename from data/iceberg/lineitem_iceberg_gz/metadata/snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro rename to data/persistent/iceberg/lineitem_iceberg_gz/metadata/snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro diff --git a/data/iceberg/lineitem_iceberg_gz/metadata/v1.gz.metadata.json b/data/persistent/iceberg/lineitem_iceberg_gz/metadata/v1.gz.metadata.json similarity index 100% rename from data/iceberg/lineitem_iceberg_gz/metadata/v1.gz.metadata.json rename to data/persistent/iceberg/lineitem_iceberg_gz/metadata/v1.gz.metadata.json diff --git a/data/iceberg/lineitem_iceberg_gz/metadata/v2.gz.metadata.json b/data/persistent/iceberg/lineitem_iceberg_gz/metadata/v2.gz.metadata.json similarity index 100% rename from data/iceberg/lineitem_iceberg_gz/metadata/v2.gz.metadata.json rename to data/persistent/iceberg/lineitem_iceberg_gz/metadata/v2.gz.metadata.json diff --git a/data/iceberg/lineitem_iceberg_gz/metadata/version-hint.text b/data/persistent/iceberg/lineitem_iceberg_gz/metadata/version-hint.text similarity index 100% rename from data/iceberg/lineitem_iceberg_gz/metadata/version-hint.text rename to data/persistent/iceberg/lineitem_iceberg_gz/metadata/version-hint.text diff --git a/data/iceberg/lineitem_iceberg_gz_no_hint/data/.00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet.crc b/data/persistent/iceberg/lineitem_iceberg_gz_no_hint/data/.00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet.crc similarity index 100% rename from data/iceberg/lineitem_iceberg_gz_no_hint/data/.00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet.crc rename to data/persistent/iceberg/lineitem_iceberg_gz_no_hint/data/.00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet.crc diff --git a/data/iceberg/lineitem_iceberg_gz_no_hint/data/00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet b/data/persistent/iceberg/lineitem_iceberg_gz_no_hint/data/00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet similarity index 100% rename from data/iceberg/lineitem_iceberg_gz_no_hint/data/00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet rename to data/persistent/iceberg/lineitem_iceberg_gz_no_hint/data/00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet diff --git a/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro.crc b/data/persistent/iceberg/lineitem_iceberg_gz_no_hint/metadata/.23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro.crc similarity index 100% rename from data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro.crc rename to data/persistent/iceberg/lineitem_iceberg_gz_no_hint/metadata/.23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro.crc diff --git a/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro.crc b/data/persistent/iceberg/lineitem_iceberg_gz_no_hint/metadata/.snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro.crc similarity index 100% rename from data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro.crc rename to data/persistent/iceberg/lineitem_iceberg_gz_no_hint/metadata/.snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro.crc diff --git a/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.v1.gz.metadata.json.crc b/data/persistent/iceberg/lineitem_iceberg_gz_no_hint/metadata/.v1.gz.metadata.json.crc similarity index 100% rename from data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.v1.gz.metadata.json.crc rename to data/persistent/iceberg/lineitem_iceberg_gz_no_hint/metadata/.v1.gz.metadata.json.crc diff --git a/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.v2.gz.metadata.json.crc b/data/persistent/iceberg/lineitem_iceberg_gz_no_hint/metadata/.v2.gz.metadata.json.crc similarity index 100% rename from data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.v2.gz.metadata.json.crc rename to data/persistent/iceberg/lineitem_iceberg_gz_no_hint/metadata/.v2.gz.metadata.json.crc diff --git a/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.version-hint.text.crc b/data/persistent/iceberg/lineitem_iceberg_gz_no_hint/metadata/.version-hint.text.crc similarity index 100% rename from data/iceberg/lineitem_iceberg_gz_no_hint/metadata/.version-hint.text.crc rename to data/persistent/iceberg/lineitem_iceberg_gz_no_hint/metadata/.version-hint.text.crc diff --git a/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro b/data/persistent/iceberg/lineitem_iceberg_gz_no_hint/metadata/23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro similarity index 100% rename from data/iceberg/lineitem_iceberg_gz_no_hint/metadata/23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro rename to data/persistent/iceberg/lineitem_iceberg_gz_no_hint/metadata/23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro diff --git a/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro b/data/persistent/iceberg/lineitem_iceberg_gz_no_hint/metadata/snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro similarity index 100% rename from data/iceberg/lineitem_iceberg_gz_no_hint/metadata/snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro rename to data/persistent/iceberg/lineitem_iceberg_gz_no_hint/metadata/snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro diff --git a/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/v1.gz.metadata.json b/data/persistent/iceberg/lineitem_iceberg_gz_no_hint/metadata/v1.gz.metadata.json similarity index 100% rename from data/iceberg/lineitem_iceberg_gz_no_hint/metadata/v1.gz.metadata.json rename to data/persistent/iceberg/lineitem_iceberg_gz_no_hint/metadata/v1.gz.metadata.json diff --git a/data/iceberg/lineitem_iceberg_gz_no_hint/metadata/v2.gz.metadata.json b/data/persistent/iceberg/lineitem_iceberg_gz_no_hint/metadata/v2.gz.metadata.json similarity index 100% rename from data/iceberg/lineitem_iceberg_gz_no_hint/metadata/v2.gz.metadata.json rename to data/persistent/iceberg/lineitem_iceberg_gz_no_hint/metadata/v2.gz.metadata.json diff --git a/data/iceberg/lineitem_iceberg_no_hint/README.md b/data/persistent/iceberg/lineitem_iceberg_no_hint/README.md similarity index 100% rename from data/iceberg/lineitem_iceberg_no_hint/README.md rename to data/persistent/iceberg/lineitem_iceberg_no_hint/README.md diff --git a/data/iceberg/lineitem_iceberg_no_hint/data/.00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet.crc b/data/persistent/iceberg/lineitem_iceberg_no_hint/data/.00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet.crc similarity index 100% rename from data/iceberg/lineitem_iceberg_no_hint/data/.00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet.crc rename to data/persistent/iceberg/lineitem_iceberg_no_hint/data/.00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet.crc diff --git a/data/iceberg/lineitem_iceberg_no_hint/data/.00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet.crc b/data/persistent/iceberg/lineitem_iceberg_no_hint/data/.00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet.crc similarity index 100% rename from data/iceberg/lineitem_iceberg_no_hint/data/.00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet.crc rename to data/persistent/iceberg/lineitem_iceberg_no_hint/data/.00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet.crc diff --git a/data/iceberg/lineitem_iceberg_no_hint/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet b/data/persistent/iceberg/lineitem_iceberg_no_hint/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet similarity index 100% rename from data/iceberg/lineitem_iceberg_no_hint/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet rename to data/persistent/iceberg/lineitem_iceberg_no_hint/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet diff --git a/data/iceberg/lineitem_iceberg_no_hint/data/00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet b/data/persistent/iceberg/lineitem_iceberg_no_hint/data/00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet similarity index 100% rename from data/iceberg/lineitem_iceberg_no_hint/data/00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet rename to data/persistent/iceberg/lineitem_iceberg_no_hint/data/00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro.crc b/data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro.crc similarity index 100% rename from data/iceberg/lineitem_iceberg_no_hint/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro.crc rename to data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro.crc diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro.crc b/data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro.crc similarity index 100% rename from data/iceberg/lineitem_iceberg_no_hint/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro.crc rename to data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/.10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro.crc diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/.cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro.crc b/data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/.cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro.crc similarity index 100% rename from data/iceberg/lineitem_iceberg_no_hint/metadata/.cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro.crc rename to data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/.cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro.crc diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/.snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro.crc b/data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/.snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro.crc similarity index 100% rename from data/iceberg/lineitem_iceberg_no_hint/metadata/.snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro.crc rename to data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/.snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro.crc diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/.snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro.crc b/data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/.snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro.crc similarity index 100% rename from data/iceberg/lineitem_iceberg_no_hint/metadata/.snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro.crc rename to data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/.snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro.crc diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/.v1.metadata.json.crc b/data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/.v1.metadata.json.crc similarity index 100% rename from data/iceberg/lineitem_iceberg_no_hint/metadata/.v1.metadata.json.crc rename to data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/.v1.metadata.json.crc diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/.v2.metadata.json.crc b/data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/.v2.metadata.json.crc similarity index 100% rename from data/iceberg/lineitem_iceberg_no_hint/metadata/.v2.metadata.json.crc rename to data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/.v2.metadata.json.crc diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/.version-hint.text.crc b/data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/.version-hint.text.crc similarity index 100% rename from data/iceberg/lineitem_iceberg_no_hint/metadata/.version-hint.text.crc rename to data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/.version-hint.text.crc diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro b/data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro similarity index 100% rename from data/iceberg/lineitem_iceberg_no_hint/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro rename to data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro b/data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro similarity index 100% rename from data/iceberg/lineitem_iceberg_no_hint/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro rename to data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro b/data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro similarity index 100% rename from data/iceberg/lineitem_iceberg_no_hint/metadata/cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro rename to data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro b/data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro similarity index 100% rename from data/iceberg/lineitem_iceberg_no_hint/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro rename to data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro b/data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro similarity index 100% rename from data/iceberg/lineitem_iceberg_no_hint/metadata/snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro rename to data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/v1.metadata.json b/data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/v1.metadata.json similarity index 100% rename from data/iceberg/lineitem_iceberg_no_hint/metadata/v1.metadata.json rename to data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/v1.metadata.json diff --git a/data/iceberg/lineitem_iceberg_no_hint/metadata/v2.metadata.json b/data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/v2.metadata.json similarity index 100% rename from data/iceberg/lineitem_iceberg_no_hint/metadata/v2.metadata.json rename to data/persistent/iceberg/lineitem_iceberg_no_hint/metadata/v2.metadata.json diff --git a/scripts/data_generators/DataGenerationBase.py b/scripts/data_generators/DataGenerationBase.py new file mode 100644 index 0000000..1d959e0 --- /dev/null +++ b/scripts/data_generators/DataGenerationBase.py @@ -0,0 +1,17 @@ + +class DataGenerationBase: + """Base class for database connections and operations.""" + def GetConnection(self): + """Returns a connection to the database.""" + raise NotImplementedError("Subclasses must implement GetConnection()") + + def SetupData(self): + """Sets up data before table generation.""" + raise NotImplementedError("Subclasses must implement SetupData()") + + def GenerateTables(self, con): + """Creates tables using the provided database connection.""" + raise NotImplementedError("Subclasses must implement GenerateTables()") + + def CloseConnection(self, con): + raise NotImplementedError("Subclasses must implement CloseConnection()") \ No newline at end of file diff --git a/scripts/test_data_generator/README.md b/scripts/data_generators/README.md similarity index 66% rename from scripts/test_data_generator/README.md rename to scripts/data_generators/README.md index dbdce99..0a3869c 100644 --- a/scripts/test_data_generator/README.md +++ b/scripts/data_generators/README.md @@ -25,4 +25,13 @@ Should be portable between DuckDB, Spark and Snowflake - Time not yet working - PySpark does not support UUID - Generate similar data from snowflake's iceberg implementation -- value deletes? \ No newline at end of file +- value deletes? + + +# How it works now + +We have data generators, like spark-local, and spark-rest. Eventually we should have generators like DuckDB and more. + +Each generate has a directory, in the directory are sub-directories with queries that create and modify a table, which should be defined in q00. +If some more setup is needed to generate the data, then you need to add it as a {setup.*} file in the directory with the other sql queries. +A python script is the best bet for this. \ No newline at end of file diff --git a/scripts/data_generators/generate_data.py b/scripts/data_generators/generate_data.py new file mode 100644 index 0000000..ecd1b6e --- /dev/null +++ b/scripts/data_generators/generate_data.py @@ -0,0 +1,18 @@ +from generate_spark_local.generate_iceberg_spark_local import IcebergSparkLocal +from generate_spark_rest.generate_iceberg_spark_rest import IcebergSparkRest + +# Example usage: +if __name__ == "__main__": + db2 = IcebergSparkRest() + conn2 = db2.GetConnection() + db2.GenerateTables(conn2) + db2.CloseConnection(conn2) + del db2 + del conn2 + db = IcebergSparkLocal() + conn = db.GetConnection() + db.GenerateTables(conn) + db.CloseConnection(conn) + del db + del conn + diff --git a/scripts/data_generators/generate_spark_local/__init__.py b/scripts/data_generators/generate_spark_local/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/data_generators/generate_spark_local/generate_iceberg_spark_local.py b/scripts/data_generators/generate_spark_local/generate_iceberg_spark_local.py new file mode 100755 index 0000000..864527c --- /dev/null +++ b/scripts/data_generators/generate_spark_local/generate_iceberg_spark_local.py @@ -0,0 +1,92 @@ +#!/usr/bin/python3 +import pyspark +import pyspark.sql +import sys +import duckdb +import os +from pyspark import SparkContext +from pathlib import Path +import duckdb +import shutil + + +# import DataGenerationBase + +# from scripts.data_generators.generate_base_parquet import PARQUET_SRC_FILE + +DATA_GENERATION_DIR = f"./data/generated/iceberg/spark-local/" +SCRIPT_DIR = f"./scripts/data_generators/" +INTERMEDIATE_DATA = "./data/generated/intermediates/spark-local/" + +class IcebergSparkLocal(): + def __init__(self): + pass + + ### + ### Configure everyone's favorite apache product + ### + def GetConnection(self): + conf = pyspark.SparkConf() + conf.setMaster('local[*]') + conf.set('spark.sql.catalog.iceberg_catalog', 'org.apache.iceberg.spark.SparkCatalog') + conf.set('spark.sql.catalog.iceberg_catalog.type', 'hadoop') + conf.set('spark.sql.catalog.iceberg_catalog.warehouse', DATA_GENERATION_DIR) + conf.set('spark.sql.parquet.outputTimestampType', 'TIMESTAMP_MICROS') + conf.set('spark.driver.memory', '10g') + conf.set('spark.jars', f'{SCRIPT_DIR}/iceberg-spark-runtime-3.5_2.12-1.4.2.jar') + conf.set('spark.sql.extensions', 'org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions') + spark = pyspark.sql.SparkSession.builder.config(conf=conf).getOrCreate() + sc = spark.sparkContext + sc.setLogLevel("ERROR") + return spark + + def GetSQLFiles(self, table_dir): + sql_files = [f for f in os.listdir(table_dir) if f.endswith('.sql')] # Find .sql files + sql_files.sort() # Order matters obviously # Store results + return sql_files + + def GetTableDirs(self): + dir = "./scripts/data_generators/generate_spark_local/" + subdirectories = [d for d in os.listdir(dir) if os.path.isdir(dir + d) and d != "__pycache__"] + return subdirectories + + def GetSetupFile(self, dir): + setup_files = [f for f in os.listdir(dir) if 'setup' in f.lower()] + if len(setup_files) == 0: + return "" + return setup_files[0] + + def GenerateTables(self, con): + # con is spark_session + # first get the sub_directories in the current directory + for table_dir in self.GetTableDirs(): + full_table_dir = f"./scripts/data_generators/generate_spark_local/{table_dir}" + setup_script = self.GetSetupFile(full_table_dir) + + PARQUET_SRC_FILE = f"scripts/data_generators/tmp_data/tmp.parquet" + if setup_script != "": + os.system(f"PARQUET_SRC_FILE='{PARQUET_SRC_FILE}' python3 {full_table_dir}/{os.path.basename(setup_script)}") + con.read.parquet(PARQUET_SRC_FILE).createOrReplaceTempView('parquet_file_view') + + update_files = self.GetSQLFiles(full_table_dir) + + last_file = "" + for path in update_files: + full_file_path = f"{full_table_dir}/{os.path.basename(path)}" + with open(full_file_path, 'r') as file: + file_trimmed = os.path.basename(path)[:-4] + last_file = file_trimmed + query = file.read() + # Run spark query + con.sql(query) + + # Create a parquet copy of table + df = con.read.table(f"iceberg_catalog.{table_dir}") + df.write.mode("overwrite").parquet(f"{INTERMEDIATE_DATA}/{table_dir}/{file_trimmed}/data.parquet"); + + if last_file != "": + ### Finally, copy the latest results to a "final" dir for easy test writing + shutil.copytree(f"{INTERMEDIATE_DATA}/{table_dir}/{last_file}/data.parquet", f"{INTERMEDIATE_DATA}/{table_dir}/last/data.parquet", dirs_exist_ok=True) + + def CloseConnection(self, con): + pass diff --git a/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q00.sql b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q00.sql new file mode 100644 index 0000000..36a4197 --- /dev/null +++ b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q00.sql @@ -0,0 +1 @@ +CREATE or REPLACE TABLE iceberg_catalog.pyspark_iceberg_table_v1 TBLPROPERTIES ('format-version'='1') AS SELECT * FROM parquet_file_view; \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q01.sql b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q01.sql new file mode 100644 index 0000000..7349c82 --- /dev/null +++ b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q01.sql @@ -0,0 +1,13 @@ +update iceberg_catalog.pyspark_iceberg_table_v1 +set l_orderkey_bool=NULL, + l_partkey_int=NULL, + l_suppkey_long=NULL, + l_extendedprice_float=NULL, + l_extendedprice_double=NULL, + l_shipdate_date=NULL, + l_partkey_time=NULL, + l_commitdate_timestamp=NULL, + l_commitdate_timestamp_tz=NULL, + l_comment_string=NULL, + l_comment_blob=NULL +where l_partkey_int % 2 = 0; \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q02.sql b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q02.sql new file mode 100644 index 0000000..1efc3e5 --- /dev/null +++ b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q02.sql @@ -0,0 +1,3 @@ +insert into iceberg_catalog.pyspark_iceberg_table_v1 +select * FROM iceberg_catalog.pyspark_iceberg_table_v1 +where l_extendedprice_double < 30000 \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q03.sql b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q03.sql new file mode 100644 index 0000000..708df84 --- /dev/null +++ b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q03.sql @@ -0,0 +1,2 @@ +update iceberg_catalog.pyspark_iceberg_table_v1 +set l_orderkey_bool = not l_orderkey_bool; \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q04.sql b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q04.sql new file mode 100644 index 0000000..4419b3c --- /dev/null +++ b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q04.sql @@ -0,0 +1,3 @@ +update iceberg_catalog.pyspark_iceberg_table_v1 +set l_orderkey_bool = false +where l_partkey_int % 4 = 0; \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q05.sql b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q05.sql new file mode 100644 index 0000000..3518f1c --- /dev/null +++ b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q05.sql @@ -0,0 +1,3 @@ +update iceberg_catalog.pyspark_iceberg_table_v1 +set l_orderkey_bool = false +where l_partkey_int % 5 = 0; \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q06.sql b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q06.sql new file mode 100644 index 0000000..7866bfc --- /dev/null +++ b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q06.sql @@ -0,0 +1,2 @@ +ALTER TABLE iceberg_catalog.pyspark_iceberg_table_v1 + ADD COLUMN schema_evol_added_col_1 INT DEFAULT 42; \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q07.sql b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q07.sql new file mode 100644 index 0000000..57ec7c5 --- /dev/null +++ b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q07.sql @@ -0,0 +1,3 @@ +UPDATE iceberg_catalog.pyspark_iceberg_table_v1 +SET schema_evol_added_col_1 = l_partkey_int +WHERE l_partkey_int % 5 = 0; \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q08.sql b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q08.sql new file mode 100644 index 0000000..347c725 --- /dev/null +++ b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/q08.sql @@ -0,0 +1,2 @@ +ALTER TABLE iceberg_catalog.pyspark_iceberg_table_v1 +ALTER COLUMN schema_evol_added_col_1 TYPE BIGINT; \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/setup.py b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/setup.py new file mode 100644 index 0000000..63e3bf4 --- /dev/null +++ b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v1/setup.py @@ -0,0 +1,28 @@ +import duckdb +import os + +PARQUET_SRC_FILE = os.getenv('PARQUET_SRC_FILE') + +duckdb_con = duckdb.connect() +duckdb_con.execute("call dbgen(sf=0.001)") +duckdb_con.query("""CREATE VIEW test_table as + SELECT + (l_orderkey%2=0) as l_orderkey_bool, + l_partkey::INT32 as l_partkey_int, + l_suppkey::INT64 as l_suppkey_long, + l_extendedprice::FLOAT as l_extendedprice_float, + l_extendedprice::DOUBLE as l_extendedprice_double, + l_extendedprice::DECIMAL(9,2) as l_extendedprice_dec9_2, + l_extendedprice::DECIMAL(18,6) as l_extendedprice_dec18_6, + l_extendedprice::DECIMAL(38,10) as l_extendedprice_dec38_10, + l_shipdate::DATE as l_shipdate_date, + l_partkey as l_partkey_time, + l_commitdate::TIMESTAMP as l_commitdate_timestamp, + l_commitdate::TIMESTAMPTZ as l_commitdate_timestamp_tz, + l_comment as l_comment_string, + gen_random_uuid()::VARCHAR as uuid, + l_comment::BLOB as l_comment_blob + FROM + lineitem;""") + +duckdb_con.execute(f"copy test_table to '{PARQUET_SRC_FILE}' (FORMAT PARQUET)") \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q00.sql b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q00.sql new file mode 100644 index 0000000..ba1bc38 --- /dev/null +++ b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q00.sql @@ -0,0 +1 @@ +CREATE or REPLACE TABLE iceberg_catalog.pyspark_iceberg_table_v2 TBLPROPERTIES ('format-version'='2', 'write.update.mode'='merge-on-read') AS SELECT * FROM parquet_file_view; \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q01.sql b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q01.sql new file mode 100644 index 0000000..41d8198 --- /dev/null +++ b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q01.sql @@ -0,0 +1,13 @@ +update iceberg_catalog.pyspark_iceberg_table_v2 +set l_orderkey_bool=NULL, + l_partkey_int=NULL, + l_suppkey_long=NULL, + l_extendedprice_float=NULL, + l_extendedprice_double=NULL, + l_shipdate_date=NULL, + l_partkey_time=NULL, + l_commitdate_timestamp=NULL, + l_commitdate_timestamp_tz=NULL, + l_comment_string=NULL, + l_comment_blob=NULL +where l_partkey_int % 2 = 0; \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q02.sql b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q02.sql new file mode 100644 index 0000000..491e5c5 --- /dev/null +++ b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q02.sql @@ -0,0 +1,3 @@ +insert into iceberg_catalog.pyspark_iceberg_table_v2 +select * FROM iceberg_catalog.pyspark_iceberg_table_v2 +where l_extendedprice_double < 30000 \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q03.sql b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q03.sql new file mode 100644 index 0000000..a481c56 --- /dev/null +++ b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q03.sql @@ -0,0 +1,2 @@ +update iceberg_catalog.pyspark_iceberg_table_v2 +set l_orderkey_bool = not l_orderkey_bool; \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q04.sql b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q04.sql new file mode 100644 index 0000000..09465f5 --- /dev/null +++ b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q04.sql @@ -0,0 +1,3 @@ +delete +from iceberg_catalog.pyspark_iceberg_table_v2 +where l_extendedprice_double < 10000; \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q05.sql b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q05.sql new file mode 100644 index 0000000..6d4ea42 --- /dev/null +++ b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q05.sql @@ -0,0 +1,3 @@ +delete +from iceberg_catalog.pyspark_iceberg_table_v2 +where l_extendedprice_double > 70000; \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q06.sql b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q06.sql new file mode 100644 index 0000000..bf215d5 --- /dev/null +++ b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q06.sql @@ -0,0 +1,2 @@ +ALTER TABLE iceberg_catalog.pyspark_iceberg_table_v2 + ADD COLUMN schema_evol_added_col_1 INT DEFAULT 42; \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q07.sql b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q07.sql new file mode 100644 index 0000000..af30771 --- /dev/null +++ b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q07.sql @@ -0,0 +1,3 @@ +UPDATE iceberg_catalog.pyspark_iceberg_table_v2 +SET schema_evol_added_col_1 = l_partkey_int +WHERE l_partkey_int % 5 = 0; \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q08.sql b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q08.sql new file mode 100644 index 0000000..2c51dc9 --- /dev/null +++ b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/q08.sql @@ -0,0 +1,2 @@ +ALTER TABLE iceberg_catalog.pyspark_iceberg_table_v2 +ALTER COLUMN schema_evol_added_col_1 TYPE BIGINT; \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/setup.py b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/setup.py new file mode 100644 index 0000000..63e3bf4 --- /dev/null +++ b/scripts/data_generators/generate_spark_local/pyspark_iceberg_table_v2/setup.py @@ -0,0 +1,28 @@ +import duckdb +import os + +PARQUET_SRC_FILE = os.getenv('PARQUET_SRC_FILE') + +duckdb_con = duckdb.connect() +duckdb_con.execute("call dbgen(sf=0.001)") +duckdb_con.query("""CREATE VIEW test_table as + SELECT + (l_orderkey%2=0) as l_orderkey_bool, + l_partkey::INT32 as l_partkey_int, + l_suppkey::INT64 as l_suppkey_long, + l_extendedprice::FLOAT as l_extendedprice_float, + l_extendedprice::DOUBLE as l_extendedprice_double, + l_extendedprice::DECIMAL(9,2) as l_extendedprice_dec9_2, + l_extendedprice::DECIMAL(18,6) as l_extendedprice_dec18_6, + l_extendedprice::DECIMAL(38,10) as l_extendedprice_dec38_10, + l_shipdate::DATE as l_shipdate_date, + l_partkey as l_partkey_time, + l_commitdate::TIMESTAMP as l_commitdate_timestamp, + l_commitdate::TIMESTAMPTZ as l_commitdate_timestamp_tz, + l_comment as l_comment_string, + gen_random_uuid()::VARCHAR as uuid, + l_comment::BLOB as l_comment_blob + FROM + lineitem;""") + +duckdb_con.execute(f"copy test_table to '{PARQUET_SRC_FILE}' (FORMAT PARQUET)") \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_rest/__init__.py b/scripts/data_generators/generate_spark_rest/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/data_generators/generate_spark_rest/generate_iceberg_spark_rest.py b/scripts/data_generators/generate_spark_rest/generate_iceberg_spark_rest.py new file mode 100644 index 0000000..ad030ec --- /dev/null +++ b/scripts/data_generators/generate_spark_rest/generate_iceberg_spark_rest.py @@ -0,0 +1,120 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pyspark.sql import SparkSession + +#!/usr/bin/python3 +import pyspark +import pyspark.sql +import sys +import duckdb +import os +from pyspark import SparkContext +from pathlib import Path +import duckdb +import shutil + +DATA_GENERATION_DIR = f"./data/generated/iceberg/spark-rest/" +SCRIPT_DIR = f"./scripts/data_generators/" +INTERMEDIATE_DATA = "./data/generated/intermediates/spark-rest/" + +class IcebergSparkRest(): + def __init__(self): + pass + + ### + ### Configure everyone's favorite apache product + ### + def GetConnection(self): + os.environ[ + "PYSPARK_SUBMIT_ARGS" + ] = "--packages org.apache.iceberg:iceberg-spark-runtime-3.4_2.12:1.4.2,org.apache.iceberg:iceberg-aws-bundle:1.4.2 pyspark-shell" + os.environ["AWS_REGION"] = "us-east-1" + os.environ["AWS_ACCESS_KEY_ID"] = "admin" + os.environ["AWS_SECRET_ACCESS_KEY"] = "password" + + spark = ( + SparkSession.builder.appName("DuckDB REST Integeration test") + .config( + "spark.sql.extensions", + "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions", + ) + .config("spark.sql.catalog.demo", "org.apache.iceberg.spark.SparkCatalog") + .config("spark.sql.catalog.demo.type", "rest") + .config("spark.sql.catalog.demo.uri", "http://127.0.0.1:8181") + .config("spark.sql.catalog.demo.warehouse", "s3://warehouse/wh/") + .config("spark.sql.catalog.demo.s3.endpoint", "http://127.0.0.1:9000") + .config("spark.sql.catalog.demo.s3.path-style-access", "true") + .config("spark.sql.defaultCatalog", "demo") + .config('spark.driver.memory', '10g') + .config("spark.sql.catalogImplementation", "in-memory") + .config("spark.sql.catalog.demo.io-impl", "org.apache.iceberg.aws.s3.S3FileIO") + .config('spark.jars', f'{SCRIPT_DIR}/iceberg-spark-runtime-3.5_2.12-1.4.2.jar') + .config('spark.sql.extensions', 'org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions') + .getOrCreate() + ) + spark.sql("CREATE DATABASE IF NOT EXISTS default;") + return spark + + def GetSQLFiles(self, table_dir): + sql_files = [f for f in os.listdir(table_dir) if f.endswith('.sql')] # Find .sql files + sql_files.sort() # Order matters obviously # Store results + return sql_files + + def GetTableDirs(self): + dir = "./scripts/data_generators/generate_spark_rest/" + subdirectories = [d for d in os.listdir(dir) if os.path.isdir(dir + d) and d != "__pycache__"] + return subdirectories + + def GetSetupFile(self, dir): + setup_files = [f for f in os.listdir(dir) if 'setup' in f.lower()] + if len(setup_files) == 0: + return "" + return setup_files[0] + + def GenerateTables(self, con): + # con is spark_session + # first get the sub_directories in the current directory + for table_dir in self.GetTableDirs(): + full_table_dir = f"./scripts/data_generators/generate_spark_rest/{table_dir}" + setup_script = self.GetSetupFile(full_table_dir) + + # should mimic generate_base_parquet + PARQUET_SRC_FILE = f"scripts/data_generators/tmp_data/tmp.parquet" + if setup_script != "": + os.system(f"PARQUET_SRC_FILE='{PARQUET_SRC_FILE}' python3 {full_table_dir}/{os.path.basename(setup_script)}") + con.read.parquet(PARQUET_SRC_FILE).createOrReplaceTempView('parquet_file_view') + + update_files = self.GetSQLFiles(full_table_dir) + + for path in update_files: + full_file_path = f"{full_table_dir}/{os.path.basename(path)}" + with open(full_file_path, 'r') as file: + file_trimmed = os.path.basename(path)[:-4] + last_file = file_trimmed + query = file.read() + # Run spark query + con.sql(query) + + # Create a parquet copy of table + df = con.read.table(f"default.{table_dir}") + df.write.mode("overwrite").parquet(f"{INTERMEDIATE_DATA}/{table_dir}/{file_trimmed}/data.parquet"); + + ### Finally, copy the latest results to a "final" dir for easy test writing + shutil.copytree(f"{INTERMEDIATE_DATA}/{table_dir}/{last_file}/data.parquet", f"{INTERMEDIATE_DATA}/{table_dir}/last/data.parquet",dirs_exist_ok=True) + def CloseConnection(self, con): + del con diff --git a/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q00.sql b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q00.sql new file mode 100644 index 0000000..cb12b68 --- /dev/null +++ b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q00.sql @@ -0,0 +1 @@ +CREATE or REPLACE TABLE default.pyspark_iceberg_table_v1 TBLPROPERTIES ('format-version'='1') AS SELECT * FROM parquet_file_view; \ No newline at end of file diff --git a/scripts/test_data_generator/updates_v1/q01.sql b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q01.sql similarity index 88% rename from scripts/test_data_generator/updates_v1/q01.sql rename to scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q01.sql index d9119c5..b84bc2a 100644 --- a/scripts/test_data_generator/updates_v1/q01.sql +++ b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q01.sql @@ -1,4 +1,4 @@ -update iceberg_catalog.pyspark_iceberg_table +update default.pyspark_iceberg_table_v1 set l_orderkey_bool=NULL, l_partkey_int=NULL, l_suppkey_long=NULL, diff --git a/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q02.sql b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q02.sql new file mode 100644 index 0000000..e31e759 --- /dev/null +++ b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q02.sql @@ -0,0 +1,3 @@ +insert into default.pyspark_iceberg_table_v1 +select * FROM default.pyspark_iceberg_table_v1 +where l_extendedprice_double < 30000 \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q03.sql b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q03.sql new file mode 100644 index 0000000..637d421 --- /dev/null +++ b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q03.sql @@ -0,0 +1,2 @@ +update default.pyspark_iceberg_table_v1 +set l_orderkey_bool = not l_orderkey_bool; \ No newline at end of file diff --git a/scripts/test_data_generator/updates_v1/q04.sql b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q04.sql similarity index 55% rename from scripts/test_data_generator/updates_v1/q04.sql rename to scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q04.sql index e64ea11..9460593 100644 --- a/scripts/test_data_generator/updates_v1/q04.sql +++ b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q04.sql @@ -1,3 +1,3 @@ -update iceberg_catalog.pyspark_iceberg_table +update default.pyspark_iceberg_table_v1 set l_orderkey_bool = false where l_partkey_int % 4 = 0; \ No newline at end of file diff --git a/scripts/test_data_generator/updates_v1/q05.sql b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q05.sql similarity index 55% rename from scripts/test_data_generator/updates_v1/q05.sql rename to scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q05.sql index c5eb8cb..13dd2e3 100644 --- a/scripts/test_data_generator/updates_v1/q05.sql +++ b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q05.sql @@ -1,3 +1,3 @@ -update iceberg_catalog.pyspark_iceberg_table +update default.pyspark_iceberg_table_v1 set l_orderkey_bool = false where l_partkey_int % 5 = 0; \ No newline at end of file diff --git a/scripts/test_data_generator/updates_v1/q06.sql b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q06.sql similarity index 51% rename from scripts/test_data_generator/updates_v1/q06.sql rename to scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q06.sql index 0f454b8..12b8c7c 100644 --- a/scripts/test_data_generator/updates_v1/q06.sql +++ b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q06.sql @@ -1,2 +1,2 @@ -ALTER TABLE iceberg_catalog.pyspark_iceberg_table +ALTER TABLE default.pyspark_iceberg_table_v1 ADD COLUMN schema_evol_added_col_1 INT DEFAULT 42; \ No newline at end of file diff --git a/scripts/test_data_generator/updates_v1/q07.sql b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q07.sql similarity index 61% rename from scripts/test_data_generator/updates_v1/q07.sql rename to scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q07.sql index 3de5491..b3b4097 100644 --- a/scripts/test_data_generator/updates_v1/q07.sql +++ b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q07.sql @@ -1,3 +1,3 @@ -UPDATE iceberg_catalog.pyspark_iceberg_table +UPDATE default.pyspark_iceberg_table_v1 SET schema_evol_added_col_1 = l_partkey_int WHERE l_partkey_int % 5 = 0; \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q08.sql b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q08.sql new file mode 100644 index 0000000..f018d3f --- /dev/null +++ b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/q08.sql @@ -0,0 +1,2 @@ +ALTER TABLE default.pyspark_iceberg_table_v1 +ALTER COLUMN schema_evol_added_col_1 TYPE BIGINT; \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/setup.py b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/setup.py new file mode 100644 index 0000000..63e3bf4 --- /dev/null +++ b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v1/setup.py @@ -0,0 +1,28 @@ +import duckdb +import os + +PARQUET_SRC_FILE = os.getenv('PARQUET_SRC_FILE') + +duckdb_con = duckdb.connect() +duckdb_con.execute("call dbgen(sf=0.001)") +duckdb_con.query("""CREATE VIEW test_table as + SELECT + (l_orderkey%2=0) as l_orderkey_bool, + l_partkey::INT32 as l_partkey_int, + l_suppkey::INT64 as l_suppkey_long, + l_extendedprice::FLOAT as l_extendedprice_float, + l_extendedprice::DOUBLE as l_extendedprice_double, + l_extendedprice::DECIMAL(9,2) as l_extendedprice_dec9_2, + l_extendedprice::DECIMAL(18,6) as l_extendedprice_dec18_6, + l_extendedprice::DECIMAL(38,10) as l_extendedprice_dec38_10, + l_shipdate::DATE as l_shipdate_date, + l_partkey as l_partkey_time, + l_commitdate::TIMESTAMP as l_commitdate_timestamp, + l_commitdate::TIMESTAMPTZ as l_commitdate_timestamp_tz, + l_comment as l_comment_string, + gen_random_uuid()::VARCHAR as uuid, + l_comment::BLOB as l_comment_blob + FROM + lineitem;""") + +duckdb_con.execute(f"copy test_table to '{PARQUET_SRC_FILE}' (FORMAT PARQUET)") \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q00.sql b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q00.sql new file mode 100644 index 0000000..1743f22 --- /dev/null +++ b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q00.sql @@ -0,0 +1 @@ +CREATE or REPLACE TABLE default.pyspark_iceberg_table_v2 TBLPROPERTIES ('format-version'='2', 'write.update.mode'='merge-on-read') AS SELECT * FROM parquet_file_view; \ No newline at end of file diff --git a/scripts/test_data_generator/updates_v2/q01.sql b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q01.sql similarity index 88% rename from scripts/test_data_generator/updates_v2/q01.sql rename to scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q01.sql index d9119c5..205669c 100644 --- a/scripts/test_data_generator/updates_v2/q01.sql +++ b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q01.sql @@ -1,4 +1,4 @@ -update iceberg_catalog.pyspark_iceberg_table +update default.pyspark_iceberg_table_v2 set l_orderkey_bool=NULL, l_partkey_int=NULL, l_suppkey_long=NULL, diff --git a/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q02.sql b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q02.sql new file mode 100644 index 0000000..1a7aa13 --- /dev/null +++ b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q02.sql @@ -0,0 +1,3 @@ +insert into default.pyspark_iceberg_table_v2 +select * FROM default.pyspark_iceberg_table_v2 +where l_extendedprice_double < 30000 \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q03.sql b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q03.sql new file mode 100644 index 0000000..133da8b --- /dev/null +++ b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q03.sql @@ -0,0 +1,2 @@ +update default.pyspark_iceberg_table_v2 +set l_orderkey_bool = not l_orderkey_bool; \ No newline at end of file diff --git a/scripts/test_data_generator/updates_v2/q04.sql b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q04.sql similarity index 50% rename from scripts/test_data_generator/updates_v2/q04.sql rename to scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q04.sql index 950b4b2..6bbbb05 100644 --- a/scripts/test_data_generator/updates_v2/q04.sql +++ b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q04.sql @@ -1,3 +1,3 @@ delete -from iceberg_catalog.pyspark_iceberg_table +from default.pyspark_iceberg_table_v2 where l_extendedprice_double < 10000; \ No newline at end of file diff --git a/scripts/test_data_generator/updates_v2/q05.sql b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q05.sql similarity index 50% rename from scripts/test_data_generator/updates_v2/q05.sql rename to scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q05.sql index da6b6e4..2780a8a 100644 --- a/scripts/test_data_generator/updates_v2/q05.sql +++ b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q05.sql @@ -1,3 +1,3 @@ delete -from iceberg_catalog.pyspark_iceberg_table +from default.pyspark_iceberg_table_v2 where l_extendedprice_double > 70000; \ No newline at end of file diff --git a/scripts/test_data_generator/updates_v2/q06.sql b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q06.sql similarity index 51% rename from scripts/test_data_generator/updates_v2/q06.sql rename to scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q06.sql index 0f454b8..1a9eafd 100644 --- a/scripts/test_data_generator/updates_v2/q06.sql +++ b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q06.sql @@ -1,2 +1,2 @@ -ALTER TABLE iceberg_catalog.pyspark_iceberg_table +ALTER TABLE default.pyspark_iceberg_table_v2 ADD COLUMN schema_evol_added_col_1 INT DEFAULT 42; \ No newline at end of file diff --git a/scripts/test_data_generator/updates_v2/q07.sql b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q07.sql similarity index 61% rename from scripts/test_data_generator/updates_v2/q07.sql rename to scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q07.sql index 3de5491..66ed123 100644 --- a/scripts/test_data_generator/updates_v2/q07.sql +++ b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q07.sql @@ -1,3 +1,3 @@ -UPDATE iceberg_catalog.pyspark_iceberg_table +UPDATE default.pyspark_iceberg_table_v2 SET schema_evol_added_col_1 = l_partkey_int WHERE l_partkey_int % 5 = 0; \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q08.sql b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q08.sql new file mode 100644 index 0000000..99a53dd --- /dev/null +++ b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/q08.sql @@ -0,0 +1,2 @@ +ALTER TABLE default.pyspark_iceberg_table_v2 +ALTER COLUMN schema_evol_added_col_1 TYPE BIGINT; \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/setup.py b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/setup.py new file mode 100644 index 0000000..63e3bf4 --- /dev/null +++ b/scripts/data_generators/generate_spark_rest/pyspark_iceberg_table_v2/setup.py @@ -0,0 +1,28 @@ +import duckdb +import os + +PARQUET_SRC_FILE = os.getenv('PARQUET_SRC_FILE') + +duckdb_con = duckdb.connect() +duckdb_con.execute("call dbgen(sf=0.001)") +duckdb_con.query("""CREATE VIEW test_table as + SELECT + (l_orderkey%2=0) as l_orderkey_bool, + l_partkey::INT32 as l_partkey_int, + l_suppkey::INT64 as l_suppkey_long, + l_extendedprice::FLOAT as l_extendedprice_float, + l_extendedprice::DOUBLE as l_extendedprice_double, + l_extendedprice::DECIMAL(9,2) as l_extendedprice_dec9_2, + l_extendedprice::DECIMAL(18,6) as l_extendedprice_dec18_6, + l_extendedprice::DECIMAL(38,10) as l_extendedprice_dec38_10, + l_shipdate::DATE as l_shipdate_date, + l_partkey as l_partkey_time, + l_commitdate::TIMESTAMP as l_commitdate_timestamp, + l_commitdate::TIMESTAMPTZ as l_commitdate_timestamp_tz, + l_comment as l_comment_string, + gen_random_uuid()::VARCHAR as uuid, + l_comment::BLOB as l_comment_blob + FROM + lineitem;""") + +duckdb_con.execute(f"copy test_table to '{PARQUET_SRC_FILE}' (FORMAT PARQUET)") \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_rest/table_more_deletes/q00.sql b/scripts/data_generators/generate_spark_rest/table_more_deletes/q00.sql new file mode 100644 index 0000000..fae1cc3 --- /dev/null +++ b/scripts/data_generators/generate_spark_rest/table_more_deletes/q00.sql @@ -0,0 +1,12 @@ +CREATE OR REPLACE TABLE default.table_more_deletes ( + dt date, + number integer, + letter string + ) + USING iceberg + TBLPROPERTIES ( + 'write.delete.mode'='merge-on-read', + 'write.update.mode'='merge-on-read', + 'write.merge.mode'='merge-on-read', + 'format-version'='2' + ); \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_rest/table_more_deletes/q01.sql b/scripts/data_generators/generate_spark_rest/table_more_deletes/q01.sql new file mode 100644 index 0000000..912dae7 --- /dev/null +++ b/scripts/data_generators/generate_spark_rest/table_more_deletes/q01.sql @@ -0,0 +1,14 @@ +INSERT INTO default.table_more_deletes +VALUES + (CAST('2023-03-01' AS date), 1, 'a'), + (CAST('2023-03-02' AS date), 2, 'b'), + (CAST('2023-03-03' AS date), 3, 'c'), + (CAST('2023-03-04' AS date), 4, 'd'), + (CAST('2023-03-05' AS date), 5, 'e'), + (CAST('2023-03-06' AS date), 6, 'f'), + (CAST('2023-03-07' AS date), 7, 'g'), + (CAST('2023-03-08' AS date), 8, 'h'), + (CAST('2023-03-09' AS date), 9, 'i'), + (CAST('2023-03-10' AS date), 10, 'j'), + (CAST('2023-03-11' AS date), 11, 'k'), + (CAST('2023-03-12' AS date), 12, 'l'); diff --git a/scripts/data_generators/generate_spark_rest/table_more_deletes/q02.sql b/scripts/data_generators/generate_spark_rest/table_more_deletes/q02.sql new file mode 100644 index 0000000..0b85ca8 --- /dev/null +++ b/scripts/data_generators/generate_spark_rest/table_more_deletes/q02.sql @@ -0,0 +1,2 @@ +Delete from default.table_more_deletes +where number > 3 and number < 10; \ No newline at end of file diff --git a/scripts/data_generators/generate_spark_rest/table_partitioned/q00.sql b/scripts/data_generators/generate_spark_rest/table_partitioned/q00.sql new file mode 100644 index 0000000..2aba8da --- /dev/null +++ b/scripts/data_generators/generate_spark_rest/table_partitioned/q00.sql @@ -0,0 +1,7 @@ +CREATE OR REPLACE TABLE default.table_partitioned ( + dt date, + number integer, + letter string +) +USING iceberg +PARTITIONED BY (days(dt)) diff --git a/scripts/data_generators/generate_spark_rest/table_partitioned/q01.sql b/scripts/data_generators/generate_spark_rest/table_partitioned/q01.sql new file mode 100644 index 0000000..ecbcd5e --- /dev/null +++ b/scripts/data_generators/generate_spark_rest/table_partitioned/q01.sql @@ -0,0 +1,14 @@ +INSERT INTO default.table_partitioned +VALUES + (CAST('2023-03-01' AS date), 1, 'a'), + (CAST('2023-03-02' AS date), 2, 'b'), + (CAST('2023-03-03' AS date), 3, 'c'), + (CAST('2023-03-04' AS date), 4, 'd'), + (CAST('2023-03-05' AS date), 5, 'e'), + (CAST('2023-03-06' AS date), 6, 'f'), + (CAST('2023-03-07' AS date), 7, 'g'), + (CAST('2023-03-08' AS date), 8, 'h'), + (CAST('2023-03-09' AS date), 9, 'i'), + (CAST('2023-03-10' AS date), 10, 'j'), + (CAST('2023-03-11' AS date), 11, 'k'), + (CAST('2023-03-12' AS date), 12, 'l'); diff --git a/scripts/data_generators/generate_spark_rest/table_unpartitioned/q00.sql b/scripts/data_generators/generate_spark_rest/table_unpartitioned/q00.sql new file mode 100644 index 0000000..b7f2c4c --- /dev/null +++ b/scripts/data_generators/generate_spark_rest/table_unpartitioned/q00.sql @@ -0,0 +1,7 @@ +CREATE OR REPLACE TABLE default.table_unpartitioned ( + dt date, + number integer, + letter string +) +USING iceberg +; diff --git a/scripts/data_generators/generate_spark_rest/table_unpartitioned/q01.sql b/scripts/data_generators/generate_spark_rest/table_unpartitioned/q01.sql new file mode 100644 index 0000000..cb58794 --- /dev/null +++ b/scripts/data_generators/generate_spark_rest/table_unpartitioned/q01.sql @@ -0,0 +1,14 @@ +INSERT INTO default.table_unpartitioned + VALUES + (CAST('2023-03-01' AS date), 1, 'a'), + (CAST('2023-03-02' AS date), 2, 'b'), + (CAST('2023-03-03' AS date), 3, 'c'), + (CAST('2023-03-04' AS date), 4, 'd'), + (CAST('2023-03-05' AS date), 5, 'e'), + (CAST('2023-03-06' AS date), 6, 'f'), + (CAST('2023-03-07' AS date), 7, 'g'), + (CAST('2023-03-08' AS date), 8, 'h'), + (CAST('2023-03-09' AS date), 9, 'i'), + (CAST('2023-03-10' AS date), 10, 'j'), + (CAST('2023-03-11' AS date), 11, 'k'), + (CAST('2023-03-12' AS date), 12, 'l'); diff --git a/scripts/test_data_generator/iceberg-spark-runtime-3.5_2.12-1.4.2.jar b/scripts/data_generators/iceberg-spark-runtime-3.5_2.12-1.4.2.jar similarity index 100% rename from scripts/test_data_generator/iceberg-spark-runtime-3.5_2.12-1.4.2.jar rename to scripts/data_generators/iceberg-spark-runtime-3.5_2.12-1.4.2.jar diff --git a/scripts/data_generators/tmp_data/tmp.parquet b/scripts/data_generators/tmp_data/tmp.parquet new file mode 100644 index 0000000..5475f1e Binary files /dev/null and b/scripts/data_generators/tmp_data/tmp.parquet differ diff --git a/scripts/docker-compose.yml b/scripts/docker-compose.yml index 6a14e64..0621258 100644 --- a/scripts/docker-compose.yml +++ b/scripts/docker-compose.yml @@ -1,5 +1,3 @@ -version: "3" - services: rest: image: tabulario/iceberg-rest @@ -26,6 +24,10 @@ services: iceberg_net: aliases: - warehouse.minio + volumes: + - type: bind + source: ../data/generated/iceberg/spark-rest/ + target: /data ports: - 9001:9001 - 9000:9000 @@ -41,12 +43,13 @@ services: - AWS_ACCESS_KEY_ID=admin - AWS_SECRET_ACCESS_KEY=password - AWS_REGION=us-east-1 - entrypoint: > + entrypoint: | /bin/sh -c " until (/usr/bin/mc config host add minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; /usr/bin/mc rm -r --force minio/warehouse; /usr/bin/mc mb minio/warehouse; /usr/bin/mc policy set public minio/warehouse; + tail -f /dev/null " networks: iceberg_net: \ No newline at end of file diff --git a/scripts/provision.py b/scripts/provision.py deleted file mode 100644 index 53f8932..0000000 --- a/scripts/provision.py +++ /dev/null @@ -1,244 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from pyspark.sql import SparkSession - -import os -import duckdb - -in_scripts_dir = os.path.basename(os.path.dirname(__file__)) == 'scripts' -if not in_scripts_dir: - print("please run provision.py from duckdb-iceberg/scripts dir") - exit(1) - - -os.environ[ - "PYSPARK_SUBMIT_ARGS" -] = "--packages org.apache.iceberg:iceberg-spark-runtime-3.4_2.12:1.4.2,org.apache.iceberg:iceberg-aws-bundle:1.4.2 pyspark-shell" -os.environ["AWS_REGION"] = "us-east-1" -os.environ["AWS_ACCESS_KEY_ID"] = "admin" -os.environ["AWS_SECRET_ACCESS_KEY"] = "password" - -spark = ( - SparkSession.builder.appName("DuckDB REST Integeration test") - .config( - "spark.sql.extensions", - "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions", - ) - .config("spark.sql.catalog.demo", "org.apache.iceberg.spark.SparkCatalog") - .config("spark.sql.catalog.demo.type", "rest") - .config("spark.sql.catalog.demo.uri", "http://127.0.0.1:8181") - .config("spark.sql.catalog.demo.io-impl", "org.apache.iceberg.aws.s3.S3FileIO") - .config("spark.sql.catalog.demo.warehouse", "s3://warehouse/wh/") - .config("spark.sql.catalog.demo.s3.endpoint", "http://127.0.0.1:9000") - .config("spark.sql.catalog.demo.s3.path-style-access", "true") - .config("spark.sql.defaultCatalog", "demo") - .config('spark.driver.memory', '10g') - .config("spark.sql.catalogImplementation", "in-memory") - .config('spark.jars', f'test_data_generator/iceberg-spark-runtime-3.5_2.12-1.4.2.jar') - .config('spark.sql.extensions', 'org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions') - .getOrCreate() -) - -spark.sql( - """ - CREATE DATABASE IF NOT EXISTS default; -""" -) - -spark.sql( - """ -CREATE OR REPLACE TABLE default.table_unpartitioned ( - dt date, - number integer, - letter string -) -USING iceberg -""" -) - - -spark.sql( - """ - INSERT INTO default.table_unpartitioned - VALUES - (CAST('2023-03-01' AS date), 1, 'a'), - (CAST('2023-03-02' AS date), 2, 'b'), - (CAST('2023-03-03' AS date), 3, 'c'), - (CAST('2023-03-04' AS date), 4, 'd'), - (CAST('2023-03-05' AS date), 5, 'e'), - (CAST('2023-03-06' AS date), 6, 'f'), - (CAST('2023-03-07' AS date), 7, 'g'), - (CAST('2023-03-08' AS date), 8, 'h'), - (CAST('2023-03-09' AS date), 9, 'i'), - (CAST('2023-03-10' AS date), 10, 'j'), - (CAST('2023-03-11' AS date), 11, 'k'), - (CAST('2023-03-12' AS date), 12, 'l'); - """ -) - - -spark.sql( - """ -CREATE OR REPLACE TABLE default.table_partitioned ( - dt date, - number integer, - letter string -) -USING iceberg -PARTITIONED BY (days(dt)) -""" -) - -spark.sql( - """ - INSERT INTO default.table_partitioned - VALUES - (CAST('2023-03-01' AS date), 1, 'a'), - (CAST('2023-03-02' AS date), 2, 'b'), - (CAST('2023-03-03' AS date), 3, 'c'), - (CAST('2023-03-04' AS date), 4, 'd'), - (CAST('2023-03-05' AS date), 5, 'e'), - (CAST('2023-03-06' AS date), 6, 'f'), - (CAST('2023-03-07' AS date), 7, 'g'), - (CAST('2023-03-08' AS date), 8, 'h'), - (CAST('2023-03-09' AS date), 9, 'i'), - (CAST('2023-03-10' AS date), 10, 'j'), - (CAST('2023-03-11' AS date), 11, 'k'), - (CAST('2023-03-12' AS date), 12, 'l'); - """ -) - -# By default, Spark uses merge on write deletes -# which optimize for read-performance - -spark.sql( - """ -CREATE OR REPLACE TABLE default.table_mor_deletes ( - dt date, - number integer, - letter string -) -USING iceberg -TBLPROPERTIES ( - 'write.delete.mode'='merge-on-read', - 'write.update.mode'='merge-on-read', - 'write.merge.mode'='merge-on-read', - 'format-version'='2' -); -""" -) - -spark.sql( - """ - INSERT INTO default.table_mor_deletes - VALUES - (CAST('2023-03-01' AS date), 1, 'a'), - (CAST('2023-03-02' AS date), 2, 'b'), - (CAST('2023-03-03' AS date), 3, 'c'), - (CAST('2023-03-04' AS date), 4, 'd'), - (CAST('2023-03-05' AS date), 5, 'e'), - (CAST('2023-03-06' AS date), 6, 'f'), - (CAST('2023-03-07' AS date), 7, 'g'), - (CAST('2023-03-08' AS date), 8, 'h'), - (CAST('2023-03-09' AS date), 9, 'i'), - (CAST('2023-03-10' AS date), 10, 'j'), - (CAST('2023-03-11' AS date), 11, 'k'), - (CAST('2023-03-12' AS date), 12, 'l'); - """ -) - -spark.sql( - """ - Delete from default.table_mor_deletes - where number > 3 and number < 10; - """ -) - -# TODO find better script to generate deletes in iceberg -CWD=".." -DEST_PATH='data/iceberg/generated_spec1_0_001' -os.system(f"python3 test_data_generator/generate_base_parquet.py 001 {CWD}/{DEST_PATH} spark") -location = "../data/iceberg/generated_spec1_0_001/base_file/file.parquet" -spark.read.parquet(location).createOrReplaceTempView('parquet_lineitem_view'); - -spark.sql( - """ - CREATE OR REPLACE TABLE default.pyspark_iceberg_table - USING ICEBERG - TBLPROPERTIES ( - 'format-version'='2', - 'write.update.mode'='merge-on-read' - ) - As select * from parquet_lineitem_view - """ -) - -spark.sql(""" -update default.pyspark_iceberg_table -set l_orderkey_bool=NULL, - l_partkey_int=NULL, - l_suppkey_long=NULL, - l_extendedprice_float=NULL, - l_extendedprice_double=NULL, - l_shipdate_date=NULL, - l_partkey_time=NULL, - l_commitdate_timestamp=NULL, - l_commitdate_timestamp_tz=NULL, - l_comment_string=NULL, - l_comment_blob=NULL -where l_partkey_int % 2 = 0;""") - -spark.sql(""" -insert into default.pyspark_iceberg_table -select * FROM default.pyspark_iceberg_table -where l_extendedprice_double < 30000 -""") - -spark.sql(""" -update default.pyspark_iceberg_table -set l_orderkey_bool = not l_orderkey_bool; -""") - - -spark.sql(""" -delete -from default.pyspark_iceberg_table -where l_extendedprice_double < 10000; -""") - -spark.sql(""" -delete -from default.pyspark_iceberg_table -where l_extendedprice_double > 70000; -""") - -spark.sql(""" -ALTER TABLE default.pyspark_iceberg_table -ADD COLUMN schema_evol_added_col_1 INT DEFAULT 42; -""") - -spark.sql(""" -UPDATE default.pyspark_iceberg_table -SET schema_evol_added_col_1 = l_partkey_int -WHERE l_partkey_int % 5 = 0; -""") - -spark.sql(""" -ALTER TABLE default.pyspark_iceberg_table -ALTER COLUMN schema_evol_added_col_1 TYPE BIGINT; - """) \ No newline at end of file diff --git a/scripts/requirements.txt b/scripts/requirements.txt index eea13ae..f5cb241 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -1,2 +1,2 @@ pyspark==3.5.0 -duckdb \ No newline at end of file +duckdb diff --git a/scripts/start-rest-catalog.sh b/scripts/start-rest-catalog.sh index ba8fca8..47924b6 100755 --- a/scripts/start-rest-catalog.sh +++ b/scripts/start-rest-catalog.sh @@ -1,38 +1,20 @@ -set -ex - -docker-compose kill -docker-compose rm -f -docker-compose up -d -docker-compose logs -f mc - -pip3 install -r requirements.txt - -python3 provision.py - -# Would be nice to have rest support in there :) -UNPARTITIONED_TABLE_PATH=$(curl -s http://127.0.0.1:8181/v1/namespaces/default/tables/table_unpartitioned | jq -r '."metadata-location"') - -SQL=$(cat <<-END +if test ! -f "./scripts/docker-compose.yml" +then + # in CI + echo "Please run from duckdb root." + exit 1 +fi -CREATE SECRET ( - TYPE S3, - KEY_ID 'admin', - SECRET 'password', - ENDPOINT '127.0.0.1:9000', - URL_STYLE 'path', - USE_SSL 0 -); +mkdir -p data/generated/iceberg/spark-rest +mkdir -p data/generated/intermediates -SELECT * FROM iceberg_scan('${UNPARTITIONED_TABLE_PATH}'); -END +# cd into scripts where docker-compose file is. +cd scripts -) +# need to have this happen in the background +set -ex -if test -f "../build/release/duckdb" -then - # in CI - ../build/release/duckdb -s "$SQL" -else - duckdb -s "$SQL" -fi +docker compose kill +docker compose rm -f +docker compose up --detach diff --git a/scripts/test_data_generator/generate_base_parquet.py b/scripts/test_data_generator/generate_base_parquet.py deleted file mode 100755 index 55aad73..0000000 --- a/scripts/test_data_generator/generate_base_parquet.py +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/bin/python3 -import pyspark.sql -import sys -import duckdb -import os -from pyspark import SparkContext -from pathlib import Path - -if (len(sys.argv) < 3): - print("Usage: generate_base_parquet.py ()") - exit(1) - -SCALE = sys.argv[1] -DEST_PATH = sys.argv[2] -MODE = sys.argv[3] if len(sys.argv) > 3 else "default"; -PARQUET_SRC_FILE = f'{DEST_PATH}/base_file/file.parquet' -CWD = os.getcwd() -SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) - -### -### Generate dataset from DuckDB -### -con = duckdb.connect() -con.query("INSTALL tpch") -con.query("LOAD tpch") -con.query(f"SELECT setseed(0.42);") -con.query(f"CALL dbgen(sf={SCALE});") - -# TODO: we're missing the fixed byte array, as we don't have one in duckdb - -# UUID not supported in spark https://github.com/apache/iceberg/issues/4038, is a STRING now -if (MODE.lower() == "spark"): - con.query("""CREATE VIEW test_table as - SELECT - (l_orderkey%2=0) as l_orderkey_bool, - l_partkey::INT32 as l_partkey_int, - l_suppkey::INT64 as l_suppkey_long, - l_extendedprice::FLOAT as l_extendedprice_float, - l_extendedprice::DOUBLE as l_extendedprice_double, - l_extendedprice::DECIMAL(9,2) as l_extendedprice_dec9_2, - l_extendedprice::DECIMAL(18,6) as l_extendedprice_dec18_6, - l_extendedprice::DECIMAL(38,10) as l_extendedprice_dec38_10, - l_shipdate::DATE as l_shipdate_date, - l_partkey as l_partkey_time, - l_commitdate::TIMESTAMP as l_commitdate_timestamp, - l_commitdate::TIMESTAMPTZ as l_commitdate_timestamp_tz, - l_comment as l_comment_string, - gen_random_uuid()::VARCHAR as uuid, - l_comment::BLOB as l_comment_blob - FROM - lineitem;"""); -elif (MODE.lower() == "default"): - con.query("""CREATE VIEW test_table as - SELECT - (l_orderkey%2=0) as l_orderkey_bool, - l_partkey::INT32 as l_partkey_int, - l_suppkey::INT64 as l_suppkey_long, - l_extendedprice::FLOAT as l_extendedprice_float, - l_extendedprice::DOUBLE as l_extendedprice_double, - l_extendedprice::DECIMAL(9,2) as l_extendedprice_dec9_2, - l_extendedprice::DECIMAL(18,6) as l_extendedprice_dec18_6, - l_extendedprice::DECIMAL(38,10) as l_extendedprice_dec38_10, - l_shipdate::DATE as l_shipdate_date, - make_time(l_partkey%24, l_partkey%60, 0) as l_partkey_time, - make_time(l_partkey%24, l_partkey%60, 0)::TIMETZ as l_partkey_time_tz, - l_commitdate::TIMESTAMP as l_commitdate_timestamp, - l_commitdate::TIMESTAMPTZ as l_commitdate_timestamp_tz, - l_comment as l_comment_string, - gen_random_uuid()::UUID as uuid, - l_comment::BLOB as l_comment_blob - FROM - lineitem;"""); -else: - print(f"Unknown mode '{MODE}'") - exit(1) - -os.makedirs(os.path.dirname(PARQUET_SRC_FILE), exist_ok=True) -con.query(f"COPY test_table TO '{PARQUET_SRC_FILE}'"); - diff --git a/scripts/test_data_generator/generate_iceberg.py b/scripts/test_data_generator/generate_iceberg.py deleted file mode 100755 index 5f6afe7..0000000 --- a/scripts/test_data_generator/generate_iceberg.py +++ /dev/null @@ -1,98 +0,0 @@ -#!/usr/bin/python3 -import pyspark -import pyspark.sql -import sys -import duckdb -import os -from pyspark import SparkContext -from pathlib import Path - -if (len(sys.argv) != 4 ): - print("Usage: generate_iceberg.py ") - exit(1) - -SCALE = sys.argv[1] -DEST_PATH = sys.argv[2] -ICEBERG_SPEC_VERSION = sys.argv[3] - -PARQUET_SRC_FILE = f'{DEST_PATH}/base_file/file.parquet' -TABLE_NAME = "iceberg_catalog.pyspark_iceberg_table"; -CWD = os.getcwd() -SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) - -### -### Generate dataset -### -os.system(f"python3 {SCRIPT_DIR}/generate_base_parquet.py {SCALE} {CWD}/{DEST_PATH} spark") - -### -### Configure everyone's favorite apache product -### -conf = pyspark.SparkConf() -conf.setMaster('local[*]') -conf.set('spark.sql.catalog.iceberg_catalog', 'org.apache.iceberg.spark.SparkCatalog') -conf.set('spark.sql.catalog.iceberg_catalog.type', 'hadoop') -conf.set('spark.sql.catalog.iceberg_catalog.warehouse', DEST_PATH) -conf.set('spark.sql.parquet.outputTimestampType', 'TIMESTAMP_MICROS') -conf.set('spark.driver.memory', '10g') -conf.set('spark.jars', f'{SCRIPT_DIR}/iceberg-spark-runtime-3.5_2.12-1.4.2.jar') -conf.set('spark.sql.extensions', 'org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions') -spark = pyspark.sql.SparkSession.builder.config(conf=conf).getOrCreate() -sc = spark.sparkContext -sc.setLogLevel("ERROR") - -### -### Create Iceberg table from dataset -### -spark.read.parquet(PARQUET_SRC_FILE).createOrReplaceTempView('parquet_file_view'); - -if ICEBERG_SPEC_VERSION == '1': - spark.sql(f"CREATE or REPLACE TABLE {TABLE_NAME} TBLPROPERTIES ('format-version'='{ICEBERG_SPEC_VERSION}') AS SELECT * FROM parquet_file_view"); -elif ICEBERG_SPEC_VERSION == '2': - spark.sql(f"CREATE or REPLACE TABLE {TABLE_NAME} TBLPROPERTIES ('format-version'='{ICEBERG_SPEC_VERSION}', 'write.update.mode'='merge-on-read') AS SELECT * FROM parquet_file_view"); -else: - print(f"Are you from the future? Iceberg spec version '{ICEBERG_SPEC_VERSION}' is unbeknownst to me") - exit(1) - -### -### Apply modifications to base table generating verification results between each step -### -update_files = [str(path) for path in Path(f'{SCRIPT_DIR}/updates_v{ICEBERG_SPEC_VERSION}').rglob('*.sql')] -update_files.sort() # Order matters obviously -last_file = "" - -for path in update_files: - full_file_path = f"{SCRIPT_DIR}/updates_v{ICEBERG_SPEC_VERSION}/{os.path.basename(path)}" - with open(full_file_path, 'r') as file: - file_trimmed = os.path.basename(path)[:-4] - last_file = file_trimmed - print(f"Applying {file_trimmed} to DB") - query = file.read() - # Run spark query - spark.sql(query) - print(f"Writing verification data") - - # Write total count - ret = spark.sql(f"SELECT COUNT(*) FROM {TABLE_NAME}") - out_path = f'{DEST_PATH}/expected_results/{file_trimmed}/count.csv' - os.makedirs(os.path.dirname(out_path), exist_ok=True) - with open(out_path, 'w') as f: - f.write("count\n") - f.write('%d' % ret.collect()[0][0]) - - # Create copy of table - df = spark.read.table(TABLE_NAME) - df.write.parquet(f"{DEST_PATH}/expected_results/{file_trimmed}/data"); - - # For documentation, also write the query we executed to the data - query_path = f'{DEST_PATH}/expected_results/{file_trimmed}/query.sql' - with open(query_path, 'w') as f: - f.write("-- The query executed at this step:\n") - f.write(query) - - -### -### Finally, we copy the latest results to a "final" dir for easy test writing -### -import shutil -shutil.copytree(f"{DEST_PATH}/expected_results/{last_file}", f"{DEST_PATH}/expected_results/last") diff --git a/scripts/test_data_generator/updates_v1/q02.sql b/scripts/test_data_generator/updates_v1/q02.sql deleted file mode 100644 index 63a7b39..0000000 --- a/scripts/test_data_generator/updates_v1/q02.sql +++ /dev/null @@ -1,3 +0,0 @@ -insert into iceberg_catalog.pyspark_iceberg_table -select * FROM iceberg_catalog.pyspark_iceberg_table -where l_extendedprice_double < 30000 \ No newline at end of file diff --git a/scripts/test_data_generator/updates_v1/q03.sql b/scripts/test_data_generator/updates_v1/q03.sql deleted file mode 100644 index 952e135..0000000 --- a/scripts/test_data_generator/updates_v1/q03.sql +++ /dev/null @@ -1,2 +0,0 @@ -update iceberg_catalog.pyspark_iceberg_table -set l_orderkey_bool = not l_orderkey_bool; \ No newline at end of file diff --git a/scripts/test_data_generator/updates_v1/q08.sql b/scripts/test_data_generator/updates_v1/q08.sql deleted file mode 100644 index 151167d..0000000 --- a/scripts/test_data_generator/updates_v1/q08.sql +++ /dev/null @@ -1,2 +0,0 @@ -ALTER TABLE iceberg_catalog.pyspark_iceberg_table -ALTER COLUMN schema_evol_added_col_1 TYPE BIGINT; \ No newline at end of file diff --git a/scripts/test_data_generator/updates_v2/q02.sql b/scripts/test_data_generator/updates_v2/q02.sql deleted file mode 100644 index 63a7b39..0000000 --- a/scripts/test_data_generator/updates_v2/q02.sql +++ /dev/null @@ -1,3 +0,0 @@ -insert into iceberg_catalog.pyspark_iceberg_table -select * FROM iceberg_catalog.pyspark_iceberg_table -where l_extendedprice_double < 30000 \ No newline at end of file diff --git a/scripts/test_data_generator/updates_v2/q03.sql b/scripts/test_data_generator/updates_v2/q03.sql deleted file mode 100644 index 952e135..0000000 --- a/scripts/test_data_generator/updates_v2/q03.sql +++ /dev/null @@ -1,2 +0,0 @@ -update iceberg_catalog.pyspark_iceberg_table -set l_orderkey_bool = not l_orderkey_bool; \ No newline at end of file diff --git a/scripts/test_data_generator/updates_v2/q08.sql b/scripts/test_data_generator/updates_v2/q08.sql deleted file mode 100644 index 151167d..0000000 --- a/scripts/test_data_generator/updates_v2/q08.sql +++ /dev/null @@ -1,2 +0,0 @@ -ALTER TABLE iceberg_catalog.pyspark_iceberg_table -ALTER COLUMN schema_evol_added_col_1 TYPE BIGINT; \ No newline at end of file diff --git a/scripts/upload_iceberg_to_s3_test_server.sh b/scripts/upload_iceberg_to_s3_test_server.sh index 52a5208..2518af3 100644 --- a/scripts/upload_iceberg_to_s3_test_server.sh +++ b/scripts/upload_iceberg_to_s3_test_server.sh @@ -1,2 +1,2 @@ #!/bin/bash -AWS_ACCESS_KEY_ID=duckdb_minio_admin AWS_SECRET_ACCESS_KEY=duckdb_minio_admin_password aws --endpoint-url http://duckdb-minio.com:9000 s3 sync data/iceberg/generated_0_01 s3://test-bucket-public/iceberg_0_01 \ No newline at end of file +AWS_ACCESS_KEY_ID=duckdb_minio_admin AWS_SECRET_ACCESS_KEY=duckdb_minio_admin_password aws --endpoint-url http://duckdb-minio.com:9000 s3 sync data/generated/iceberg/generated_0_01 s3://test-bucket-public/iceberg_0_01 \ No newline at end of file diff --git a/test/sql/local/iceberg_catalog_read.test b/test/sql/local/iceberg_catalog_read.test index f076912..538b9f1 100644 --- a/test/sql/local/iceberg_catalog_read.test +++ b/test/sql/local/iceberg_catalog_read.test @@ -33,8 +33,9 @@ ATTACH '' AS my_datalake (TYPE ICEBERG); query IIIIII Show all tables; ---- -my_datalake default pyspark_iceberg_table [__] [INTEGER] false -my_datalake default table_mor_deletes [__] [INTEGER] false +my_datalake default pyspark_iceberg_table_v1 [__] [INTEGER] false +my_datalake default pyspark_iceberg_table_v2 [__] [INTEGER] false +my_datalake default table_more_deletes [__] [INTEGER] false my_datalake default table_partitioned [__] [INTEGER] false my_datalake default table_unpartitioned [__] [INTEGER] false @@ -64,9 +65,9 @@ select * from my_datalake.default.table_unpartitioned order by all; 2023-03-11 11 k 2023-03-12 12 l -# test deletes (see provision.py for where deletes occur) +# test deletes (see generate_iceberg_spark_rest.py for where deletes occur) query III -select * from my_datalake.default.table_mor_deletes order by all; +select * from my_datalake.default.table_more_deletes order by all; ---- 2023-03-01 1 a 2023-03-02 2 b @@ -76,7 +77,7 @@ select * from my_datalake.default.table_mor_deletes order by all; 2023-03-12 12 l statement error -select * from my_datalake.default.pyspark_iceberg_table; +select * from my_datalake.default.pyspark_iceberg_table_v2; ---- :.*Not implemented Error.* @@ -101,12 +102,12 @@ Create table my_datalake.default.new_table (a int, b varchar); :.*Not implemented Error.* statement error -Alter table my_datalake.default.table_mor_deletes add column new_column INTEGER default 10; +Alter table my_datalake.default.table_more_deletes add column new_column INTEGER default 10; ---- :.*Not implemented Error.* statement error -Drop table my_datalake.default.table_mor_deletes; +Drop table my_datalake.default.table_more_deletes; ---- :.*Not implemented Error.* diff --git a/test/sql/local/iceberg_metadata.test b/test/sql/local/iceberg_metadata.test index caf2bf8..e4ef083 100644 --- a/test/sql/local/iceberg_metadata.test +++ b/test/sql/local/iceberg_metadata.test @@ -4,57 +4,57 @@ # Before we load the extension, this will fail statement error -SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg'); +SELECT * FROM ICEBERG_METADATA('data/persistent/iceberg/lineitem_iceberg'); ---- Catalog Error require iceberg query IIIIIIII -SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg', ALLOW_MOVED_PATHS=TRUE); +SELECT * FROM ICEBERG_METADATA('data/persistent/iceberg/lineitem_iceberg', ALLOW_MOVED_PATHS=TRUE); ---- lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro 2 DATA ADDED EXISTING lineitem_iceberg/data/00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet PARQUET 51793 lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro 2 DATA DELETED EXISTING lineitem_iceberg/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet PARQUET 60175 query IIIIIIII -SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg', ALLOW_MOVED_PATHS=TRUE, version='1'); +SELECT * FROM ICEBERG_METADATA('data/persistent/iceberg/lineitem_iceberg', ALLOW_MOVED_PATHS=TRUE, version='1'); ---- lineitem_iceberg/metadata/cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro 1 DATA ADDED EXISTING lineitem_iceberg/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet PARQUET 60175 query IIIIIIII -SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg', ALLOW_MOVED_PATHS=TRUE, version_name_format='v%s%s.metadata.json'); +SELECT * FROM ICEBERG_METADATA('data/persistent/iceberg/lineitem_iceberg', ALLOW_MOVED_PATHS=TRUE, version_name_format='v%s%s.metadata.json'); ---- lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro 2 DATA ADDED EXISTING lineitem_iceberg/data/00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet PARQUET 51793 lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro 2 DATA DELETED EXISTING lineitem_iceberg/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet PARQUET 60175 query IIIIIIII -SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg', ALLOW_MOVED_PATHS=TRUE, version='2', version_name_format='v%s%s.metadata.json'); +SELECT * FROM ICEBERG_METADATA('data/persistent/iceberg/lineitem_iceberg', ALLOW_MOVED_PATHS=TRUE, version='2', version_name_format='v%s%s.metadata.json'); ---- lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro 2 DATA ADDED EXISTING lineitem_iceberg/data/00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet PARQUET 51793 lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro 2 DATA DELETED EXISTING lineitem_iceberg/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet PARQUET 60175 statement error -SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg_gz', ALLOW_MOVED_PATHS=TRUE); +SELECT * FROM ICEBERG_METADATA('data/persistent/iceberg/lineitem_iceberg_gz', ALLOW_MOVED_PATHS=TRUE); ---- IO Error: Iceberg metadata file not found for table version '2' using 'none' compression and format(s): 'v%s%s.metadata.json,%s%s.metadata.json' statement error -SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg_gz', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC="blarg", version_name_format='blat%s%s'); +SELECT * FROM ICEBERG_METADATA('data/persistent/iceberg/lineitem_iceberg_gz', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC="blarg", version_name_format='blat%s%s'); ---- IO Error: Iceberg metadata file not found for table version '2' using 'blarg' compression and format(s): 'blat%s%s' query IIIIIIII -SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg_gz', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC="gzip"); +SELECT * FROM ICEBERG_METADATA('data/persistent/iceberg/lineitem_iceberg_gz', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC="gzip"); ---- lineitem_iceberg_gz/metadata/23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro 0 DATA ADDED EXISTING lineitem_iceberg_gz/data/00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet PARQUET 111968 statement error -SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg_nonexistent'); +SELECT * FROM ICEBERG_METADATA('data/persistent/iceberg/lineitem_iceberg_nonexistent'); ---- IO Error: Failed to read iceberg table. No version was provided and no version-hint could be found, statement error -SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg_no_hint', ALLOW_MOVED_PATHS=TRUE); +SELECT * FROM ICEBERG_METADATA('data/persistent/iceberg/lineitem_iceberg_no_hint', ALLOW_MOVED_PATHS=TRUE); ---- :.*SET unsafe_enable_version_guessing.* @@ -62,35 +62,35 @@ statement ok SET unsafe_enable_version_guessing = true; query IIIIIIII -SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg_no_hint', ALLOW_MOVED_PATHS=TRUE); +SELECT * FROM ICEBERG_METADATA('data/persistent/iceberg/lineitem_iceberg_no_hint', ALLOW_MOVED_PATHS=TRUE); ---- lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro 2 DATA ADDED EXISTING lineitem_iceberg/data/00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet PARQUET 51793 lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro 2 DATA DELETED EXISTING lineitem_iceberg/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet PARQUET 60175 query IIIIIIII -SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg_no_hint', ALLOW_MOVED_PATHS=TRUE, version='1'); +SELECT * FROM ICEBERG_METADATA('data/persistent/iceberg/lineitem_iceberg_no_hint', ALLOW_MOVED_PATHS=TRUE, version='1'); ---- lineitem_iceberg/metadata/cf3d0be5-cf70-453d-ad8f-48fdc412e608-m0.avro 1 DATA ADDED EXISTING lineitem_iceberg/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet PARQUET 60175 query IIIIIIII -SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg_no_hint', ALLOW_MOVED_PATHS=TRUE, version_name_format='v%s%s.metadata.json'); +SELECT * FROM ICEBERG_METADATA('data/persistent/iceberg/lineitem_iceberg_no_hint', ALLOW_MOVED_PATHS=TRUE, version_name_format='v%s%s.metadata.json'); ---- lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro 2 DATA ADDED EXISTING lineitem_iceberg/data/00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet PARQUET 51793 lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro 2 DATA DELETED EXISTING lineitem_iceberg/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet PARQUET 60175 query IIIIIIII -SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg_no_hint', ALLOW_MOVED_PATHS=TRUE, version='?', version_name_format='v%s%s.metadata.json'); +SELECT * FROM ICEBERG_METADATA('data/persistent/iceberg/lineitem_iceberg_no_hint', ALLOW_MOVED_PATHS=TRUE, version='?', version_name_format='v%s%s.metadata.json'); ---- lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m1.avro 2 DATA ADDED EXISTING lineitem_iceberg/data/00041-414-f3c73457-bbd6-4b92-9c15-17b241171b16-00001.parquet PARQUET 51793 lineitem_iceberg/metadata/10eaca8a-1e1c-421e-ad6d-b232e5ee23d3-m0.avro 2 DATA DELETED EXISTING lineitem_iceberg/data/00000-411-0792dcfe-4e25-4ca3-8ada-175286069a47-00001.parquet PARQUET 60175 query IIIIIIII -SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg_gz_no_hint', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC='gzip', version='?'); +SELECT * FROM ICEBERG_METADATA('data/persistent/iceberg/lineitem_iceberg_gz_no_hint', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC='gzip', version='?'); ---- lineitem_iceberg_gz/metadata/23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro 0 DATA ADDED EXISTING lineitem_iceberg_gz/data/00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet PARQUET 111968 query IIIIIIII -SELECT * FROM ICEBERG_METADATA('data/iceberg/lineitem_iceberg_gz_no_hint', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC='gzip'); +SELECT * FROM ICEBERG_METADATA('data/persistent/iceberg/lineitem_iceberg_gz_no_hint', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC='gzip'); ---- lineitem_iceberg_gz/metadata/23f9dbea-1e7f-4694-a82c-dc3c9a94953e-m0.avro 0 DATA ADDED EXISTING lineitem_iceberg_gz/data/00000-2-371a340c-ded5-4e85-aa49-9c788d6f21cd-00001.parquet PARQUET 111968 diff --git a/test/sql/local/iceberg_scan.test b/test/sql/local/iceberg_scan.test index 73610f7..1d6bfa1 100644 --- a/test/sql/local/iceberg_scan.test +++ b/test/sql/local/iceberg_scan.test @@ -1,4 +1,4 @@ -# name: test/sql/local/iceberg.test +# name: test/sql/local/iceberg_scan.test # description: test iceberg extension # group: [iceberg] @@ -15,25 +15,25 @@ require iceberg ### Scanning latest snapshot query I -SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg', ALLOW_MOVED_PATHS=TRUE); +SELECT count(*) FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg', ALLOW_MOVED_PATHS=TRUE); ---- 51793 # Scanning 1st snapshot query I -SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg', 3776207205136740581::UBIGINT, ALLOW_MOVED_PATHS=TRUE); +SELECT count(*) FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg', 3776207205136740581::UBIGINT, ALLOW_MOVED_PATHS=TRUE); ---- 60175 # Scanning 2nd snapshot query I -SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg', 7635660646343998149::UBIGINT, ALLOW_MOVED_PATHS=TRUE); +SELECT count(*) FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg', 7635660646343998149::UBIGINT, ALLOW_MOVED_PATHS=TRUE); ---- 51793 # Scanning 2nd snapshot query I -SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg', version='2', ALLOW_MOVED_PATHS=TRUE); +SELECT count(*) FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg', version='2', ALLOW_MOVED_PATHS=TRUE); ---- 51793 @@ -42,47 +42,47 @@ SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg', version='2', # 1 = 2023-02-15 15:07:54.504 # 2 = 2023-02-15 15:08:14.73 query I -SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg', '2023-02-15 15:07:54.504'::TIMESTAMP, ALLOW_MOVED_PATHS=TRUE); +SELECT count(*) FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg', '2023-02-15 15:07:54.504'::TIMESTAMP, ALLOW_MOVED_PATHS=TRUE); ---- 60175 query I -SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg', '2023-02-15 15:07:54.729'::TIMESTAMP, ALLOW_MOVED_PATHS=TRUE); +SELECT count(*) FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg', '2023-02-15 15:07:54.729'::TIMESTAMP, ALLOW_MOVED_PATHS=TRUE); ---- 60175 query I -SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg', '2023-02-15 15:08:14.73'::TIMESTAMP, ALLOW_MOVED_PATHS=TRUE); +SELECT count(*) FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg', '2023-02-15 15:08:14.73'::TIMESTAMP, ALLOW_MOVED_PATHS=TRUE); ---- 51793 statement error -FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg', '2023-02-15 15:07:54.503'::TIMESTAMP, ALLOW_MOVED_PATHS=TRUE); +FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg', '2023-02-15 15:07:54.503'::TIMESTAMP, ALLOW_MOVED_PATHS=TRUE); ---- IO Error: Could not find latest snapshots for timestamp 2023-02-15 15:07:54.503 statement error -SELECT * FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg_gz', ALLOW_MOVED_PATHS=TRUE); +SELECT * FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg_gz', ALLOW_MOVED_PATHS=TRUE); ---- IO Error: Iceberg metadata file not found for table version '2' using 'none' compression and format(s): 'v%s%s.metadata.json,%s%s.metadata.json' query I -SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg_gz', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC="gzip"); +SELECT count(*) FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg_gz', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC="gzip"); ---- 111968 statement error -SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg_gz', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC="gzip", version='1'); +SELECT count(*) FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg_gz', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC="gzip", version='1'); ---- IO Error: No snapshots found query I -SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg_gz', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC="gzip", version='2', version_name_format='v%s%s.metadata.json'); +SELECT count(*) FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg_gz', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC="gzip", version='2', version_name_format='v%s%s.metadata.json'); ---- 111968 statement error -SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg_no_hint', ALLOW_MOVED_PATHS=TRUE); +SELECT count(*) FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg_no_hint', ALLOW_MOVED_PATHS=TRUE); ---- :.*SET unsafe_enable_version_guessing.* @@ -90,91 +90,93 @@ statement ok SET unsafe_enable_version_guessing=true; query I -SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg_no_hint', '2023-02-15 15:07:54.504'::TIMESTAMP, ALLOW_MOVED_PATHS=TRUE); +SELECT count(*) FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg_no_hint', '2023-02-15 15:07:54.504'::TIMESTAMP, ALLOW_MOVED_PATHS=TRUE); ---- 60175 query I -SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg_no_hint', '2023-02-15 15:07:54.729'::TIMESTAMP, ALLOW_MOVED_PATHS=TRUE); +SELECT count(*) FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg_no_hint', '2023-02-15 15:07:54.729'::TIMESTAMP, ALLOW_MOVED_PATHS=TRUE); ---- 60175 query I -SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg_no_hint', '2023-02-15 15:08:14.73'::TIMESTAMP, ALLOW_MOVED_PATHS=TRUE); +SELECT count(*) FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg_no_hint', '2023-02-15 15:08:14.73'::TIMESTAMP, ALLOW_MOVED_PATHS=TRUE); ---- 51793 statement error -FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg_no_hint', '2023-02-15 15:07:54.503'::TIMESTAMP, ALLOW_MOVED_PATHS=TRUE); +FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg_no_hint', '2023-02-15 15:07:54.503'::TIMESTAMP, ALLOW_MOVED_PATHS=TRUE); ---- IO Error: Could not find latest snapshots for timestamp 2023-02-15 15:07:54.503 query I -SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg_gz_no_hint', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC="gzip"); +SELECT count(*) FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg_gz_no_hint', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC="gzip"); ---- 111968 query I -SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/lineitem_iceberg_gz_no_hint', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC="gzip", version='2', version_name_format='v%s%s.metadata.json'); +SELECT count(*) FROM ICEBERG_SCAN('data/persistent/iceberg/lineitem_iceberg_gz_no_hint', ALLOW_MOVED_PATHS=TRUE, METADATA_COMPRESSION_CODEC="gzip", version='2', version_name_format='v%s%s.metadata.json'); ---- 111968 +require-env DUCKDB_ICEBERG_HAVE_GENERATED_DATA + query I -SELECT typeof(l_orderkey_bool) FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table', ALLOW_MOVED_PATHS=TRUE) LIMIT 1; +SELECT typeof(l_orderkey_bool) FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v1', ALLOW_MOVED_PATHS=TRUE) LIMIT 1; ---- BOOLEAN query I -SELECT typeof(l_partkey_int) FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table', ALLOW_MOVED_PATHS=TRUE) LIMIT 1; +SELECT typeof(l_partkey_int) FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v1', ALLOW_MOVED_PATHS=TRUE) LIMIT 1; ---- INTEGER query I -SELECT typeof(l_suppkey_long) FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table', ALLOW_MOVED_PATHS=TRUE) LIMIT 1; +SELECT typeof(l_suppkey_long) FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v1', ALLOW_MOVED_PATHS=TRUE) LIMIT 1; ---- BIGINT query I -SELECT typeof(l_extendedprice_float) FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table', ALLOW_MOVED_PATHS=TRUE) LIMIT 1; +SELECT typeof(l_extendedprice_float) FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v1', ALLOW_MOVED_PATHS=TRUE) LIMIT 1; ---- FLOAT query I -SELECT typeof(l_extendedprice_double) FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table', ALLOW_MOVED_PATHS=TRUE) LIMIT 1; +SELECT typeof(l_extendedprice_double) FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v1', ALLOW_MOVED_PATHS=TRUE) LIMIT 1; ---- DOUBLE query I -SELECT typeof(l_extendedprice_dec9_2) FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table', ALLOW_MOVED_PATHS=TRUE) LIMIT 1; +SELECT typeof(l_extendedprice_dec9_2) FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v1', ALLOW_MOVED_PATHS=TRUE) LIMIT 1; ---- DECIMAL(9,2) query I -SELECT typeof(l_shipdate_date) FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table', ALLOW_MOVED_PATHS=TRUE) LIMIT 1; +SELECT typeof(l_shipdate_date) FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v1', ALLOW_MOVED_PATHS=TRUE) LIMIT 1; ---- DATE -query I -SELECT typeof(l_partkey_time) FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table', ALLOW_MOVED_PATHS=TRUE) LIMIT 1; ----- -INTEGER +# query I +# SELECT typeof(l_partkey_time) FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v1', ALLOW_MOVED_PATHS=TRUE) LIMIT 1; +# ---- +# INTEGER query I -SELECT typeof(l_commitdate_timestamp) FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table', ALLOW_MOVED_PATHS=TRUE) LIMIT 1; +SELECT typeof(l_commitdate_timestamp) FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v1', ALLOW_MOVED_PATHS=TRUE) LIMIT 1; ---- TIMESTAMP query I -SELECT typeof(l_commitdate_timestamp_tz) FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table', ALLOW_MOVED_PATHS=TRUE) LIMIT 1; +SELECT typeof(l_commitdate_timestamp_tz) FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v1', ALLOW_MOVED_PATHS=TRUE) LIMIT 1; ---- TIMESTAMP WITH TIME ZONE query I -SELECT typeof(l_comment_string) FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table', ALLOW_MOVED_PATHS=TRUE) LIMIT 1; +SELECT typeof(l_comment_string) FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v1', ALLOW_MOVED_PATHS=TRUE) LIMIT 1; ---- VARCHAR query I -SELECT typeof(l_comment_blob) FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table', ALLOW_MOVED_PATHS=TRUE) LIMIT 1; +SELECT typeof(l_comment_blob) FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v1', ALLOW_MOVED_PATHS=TRUE) LIMIT 1; ---- BLOB diff --git a/test/sql/local/iceberg_scan_generated_data_0_001.test b/test/sql/local/iceberg_scan_generated_data_0_001.test index 3736fa2..1a3584f 100644 --- a/test/sql/local/iceberg_scan_generated_data_0_001.test +++ b/test/sql/local/iceberg_scan_generated_data_0_001.test @@ -8,113 +8,117 @@ require iceberg ### Invalid iceberg metadata leads to failed statement statement error -SELECT count(*) FROM ICEBERG_SCAN('data/bad_data/bad_iceberg_metadata.json'); +SELECT count(*) FROM ICEBERG_SCAN('data/persistent/bad_data/bad_iceberg_metadata.json'); ---- -Invalid Input Error: Fails to parse iceberg metadata from data/bad_data/bad_iceberg_metadata.json +Invalid Input Error: Fails to parse iceberg metadata from data/persistent/bad_data/bad_iceberg_metadata.json ### Iceberg spec v1 -# Check count matches -query I -SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table'); +require-env DUCKDB_ICEBERG_HAVE_GENERATED_DATA + +# Check count matches the same as last file +query I nosort table_v1_count +SELECT count(*) FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v1'); ---- -:data/iceberg/generated_spec1_0_001/expected_results/last/count.csv + +query I nosort table_v1_count +SELECT count(*) FROM PARQUET_SCAN('data/generated/intermediates/spark-local/pyspark_iceberg_table_v1/last/data.parquet/*.parquet'); +---- + # Check schema is identical, sorting by uuid to guarantee unique order query I nosort q1-schema -DESCRIBE SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table') ORDER BY uuid; +DESCRIBE SELECT * FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v1') ORDER BY uuid; ---- query I nosort q1-schema -DESCRIBE SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v9.metadata.json') ORDER BY uuid; +DESCRIBE SELECT * FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v1/metadata/v9.metadata.json') ORDER BY uuid; ---- query I nosort q1-schema -DESCRIBE SELECT * FROM PARQUET_SCAN('data/iceberg/generated_spec1_0_001/expected_results/last/data/*.parquet') ORDER BY uuid; +DESCRIBE SELECT * FROM PARQUET_SCAN('data/generated/intermediates/spark-local/pyspark_iceberg_table_v1/last/data.parquet/*.parquet') ORDER BY uuid; ---- # Check data is identical, sorting by uuid to guarantee unique order query I nosort q1-data -SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table') ORDER BY uuid; +SELECT * FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v1') ORDER BY uuid; ---- query I nosort q1-data -SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table/metadata/v9.metadata.json') ORDER BY uuid; +SELECT * FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v1/metadata/v9.metadata.json') ORDER BY uuid; ---- query I nosort q1-data -SELECT * FROM PARQUET_SCAN('data/iceberg/generated_spec1_0_001/expected_results/last/data/*.parquet') ORDER BY uuid; +SELECT * FROM PARQUET_SCAN('data/generated/intermediates/spark-local/pyspark_iceberg_table_v1/last/data.parquet/*.parquet') ORDER BY uuid; ---- # Confirm the type matches that of the iceberg schema query IIIIII -DESCRIBE SELECT schema_evol_added_col_1 FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table') ORDER BY uuid; +DESCRIBE SELECT schema_evol_added_col_1 FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v1') ORDER BY uuid; ---- schema_evol_added_col_1 BIGINT YES NULL NULL NULL ### Iceberg spec v2 # Check count matches -query I -SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_001/pyspark_iceberg_table'); +query I nosort count_match_r1 +SELECT count(*) FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v2'); ---- -:data/iceberg/generated_spec2_0_001/expected_results/last/count.csv # We should also be able to scan the metadata file directly -query I -SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v9.metadata.json'); +query I nosort count_match_r1 +SELECT count(*) FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v2/metadata/v9.metadata.json'); ---- -:data/iceberg/generated_spec2_0_001/expected_results/last/count.csv # Check schema is identical, sorting by uuid to guarantee unique order query I nosort q2-schema -DESCRIBE SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_001/pyspark_iceberg_table') ORDER BY uuid; +DESCRIBE SELECT * FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v2') ORDER BY uuid; ---- query I nosort q2-schema -DESCRIBE SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v9.metadata.json') ORDER BY uuid; +DESCRIBE SELECT * FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v2/metadata/v9.metadata.json') ORDER BY uuid; ---- query I nosort q2-schema -DESCRIBE SELECT * FROM PARQUET_SCAN('data/iceberg/generated_spec2_0_001/expected_results/last/data/*.parquet') ORDER BY uuid; +DESCRIBE SELECT * FROM PARQUET_SCAN('data/generated/intermediates/spark-local/pyspark_iceberg_table_v2/last/data.parquet/*.parquet') ORDER BY uuid; ---- # Check data is identical, sorting by uuid to guarantee unique order query I nosort q2-data -SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_001/pyspark_iceberg_table') ORDER BY uuid; +SELECT * FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v2') ORDER BY uuid; ---- # Check data is identical, sorting by uuid to guarantee unique order query I nosort q2-data -SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v9.metadata.json') ORDER BY uuid; +SELECT * FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v2/metadata/v9.metadata.json') ORDER BY uuid; ---- query I nosort q2-data -SELECT * FROM PARQUET_SCAN('data/iceberg/generated_spec2_0_001/expected_results/last/data/*.parquet') ORDER BY uuid; +SELECT * FROM PARQUET_SCAN('data/generated/intermediates/spark-local/pyspark_iceberg_table_v2/last/data.parquet/*.parquet') ORDER BY uuid; ---- ### Test schema evolution # Latest metadata version has correct type query IIIIII -DESCRIBE SELECT schema_evol_added_col_1 FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v9.metadata.json') ORDER BY uuid; +DESCRIBE SELECT schema_evol_added_col_1 FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v2/metadata/v9.metadata.json') ORDER BY uuid; ---- schema_evol_added_col_1 BIGINT YES NULL NULL NULL # One before has the old type query IIIIII -DESCRIBE SELECT schema_evol_added_col_1 FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v8.metadata.json') ORDER BY uuid; +DESCRIBE SELECT schema_evol_added_col_1 FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v2/metadata/v8.metadata.json') ORDER BY uuid; ---- schema_evol_added_col_1 INTEGER YES NULL NULL NULL # Even older: it did not exist yet statement error -DESCRIBE SELECT schema_evol_added_col_1 FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_001/pyspark_iceberg_table/metadata/v6.metadata.json') ORDER BY uuid; +DESCRIBE SELECT schema_evol_added_col_1 FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v2/metadata/v6.metadata.json') ORDER BY uuid; ---- Binder Error # Check that there are injected cardinality query II -EXPLAIN SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/generated_spec2_0_001/pyspark_iceberg_table'); +EXPLAIN SELECT count(*) FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v2'); ---- physical_plan :.* ANTI .*PARQUET_SCAN.*Rows.*Rows.* \ No newline at end of file diff --git a/test/sql/local/iceberg_scan_generated_data_1.test_slow b/test/sql/local/iceberg_scan_generated_data_1.test_slow index b5b5a10..a9a9f47 100644 --- a/test/sql/local/iceberg_scan_generated_data_1.test_slow +++ b/test/sql/local/iceberg_scan_generated_data_1.test_slow @@ -1,4 +1,4 @@ -# name: test/sql/iceberg_scan_generated_data_1.test_slow +# name: test/sql/local/iceberg_scan_generated_data_1.test_slow # description: test iceberg extension with the sf1 generated test set # group: [iceberg] @@ -6,36 +6,38 @@ require parquet require iceberg -require-env DUCKDB_ICEBERG_HAVE_TEST_DATA +require-env DUCKDB_ICEBERG_HAVE_GENERATED_DATA # Check count matches -query I -SELECT count(*) FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table'); ----- -:data/iceberg/generated_spec1_0_001/expected_results/last/count.csv +query I nosort count_iceberg_scan +SELECT count(*) FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v1'); + + +query I nosort count_iceberg_scan +select count(*) from read_parquet('data/generated/intermediates/spark-local/pyspark_iceberg_table_v1/last/data.parquet/*.parquet'); # Check data is identical, sorting by uuid to guarantee unique order. query I nosort q1 -SELECT COUNT(*) FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table'); +SELECT COUNT(*) FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v1'); ---- query I nosort q1 -SELECT COUNT(*) FROM PARQUET_SCAN('data/iceberg/generated_spec1_0_001/expected_results/last/data/*.parquet'); +SELECT COUNT(*) FROM PARQUET_SCAN('data/generated/intermediates/spark-local/pyspark_iceberg_table_v1/last/data.parquet/*.parquet'); ---- query I nosort q2 -SELECT COUNT(*), MIN(l_suppkey_long), MAX(l_suppkey_long), SUM(l_suppkey_long) FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table'); +SELECT COUNT(*), MIN(l_suppkey_long), MAX(l_suppkey_long), SUM(l_suppkey_long) FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v1'); ---- query I nosort q2 -SELECT COUNT(*), MIN(l_suppkey_long), MAX(l_suppkey_long), SUM(l_suppkey_long) FROM PARQUET_SCAN('data/iceberg/generated_spec1_0_001/expected_results/last/data/*.parquet'); +SELECT COUNT(*), MIN(l_suppkey_long), MAX(l_suppkey_long), SUM(l_suppkey_long) FROM PARQUET_SCAN('data/generated/intermediates/spark-local/pyspark_iceberg_table_v1/last/data.parquet/*.parquet'); ---- # Full table compare: very slow query I nosort q3 -SELECT * FROM ICEBERG_SCAN('data/iceberg/generated_spec1_0_001/pyspark_iceberg_table') WHERE uuid NOT NULL ORDER BY uuid; +SELECT * FROM ICEBERG_SCAN('data/generated/iceberg/spark-local/pyspark_iceberg_table_v1') WHERE uuid NOT NULL ORDER BY uuid; ---- query I nosort q3 -SELECT * FROM PARQUET_SCAN('data/iceberg/generated_spec1_0_001/expected_results/last/data/*.parquet') WHERE uuid NOT NULL ORDER BY uuid; +SELECT * FROM PARQUET_SCAN('data/generated/intermediates/spark-local/pyspark_iceberg_table_v1/last/data.parquet/*.parquet') WHERE uuid NOT NULL ORDER BY uuid; ---- diff --git a/test/sql/local/iceberg_snapshots.test b/test/sql/local/iceberg_snapshots.test index ac05270..3fc61e1 100644 --- a/test/sql/local/iceberg_snapshots.test +++ b/test/sql/local/iceberg_snapshots.test @@ -1,10 +1,10 @@ -# name: test/sql/iceberg_snapshots.test +# name: test/sql/local/iceberg_snapshots.test # description: test iceberg snapshots function # group: [iceberg] # Before we load the extension, this will fail statement error -SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg'); +SELECT * FROM ICEBERG_SNAPSHOTS('data/persistent/iceberg/lineitem_iceberg'); ---- Catalog Error @@ -13,48 +13,48 @@ require notwindows require iceberg query IIII -SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg'); +SELECT * FROM ICEBERG_SNAPSHOTS('data/persistent/iceberg/lineitem_iceberg'); ---- 1 3776207205136740581 2023-02-15 15:07:54.504 lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro 2 7635660646343998149 2023-02-15 15:08:14.73 lineitem_iceberg/metadata/snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro query IIII -SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg', version='1'); +SELECT * FROM ICEBERG_SNAPSHOTS('data/persistent/iceberg/lineitem_iceberg', version='1'); ---- 1 3776207205136740581 2023-02-15 15:07:54.504 lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro statement error -SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg', version="1", version_name_format='v%s%s.metadata.gz'); +SELECT * FROM ICEBERG_SNAPSHOTS('data/persistent/iceberg/lineitem_iceberg', version="1", version_name_format='v%s%s.metadata.gz'); ---- IO Error: Iceberg metadata file not found for table version '1' using 'none' compression and format(s): 'v%s%s.metadata.gz' query IIII -SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg', version="1", version_name_format='v%s%s.metadata.json'); +SELECT * FROM ICEBERG_SNAPSHOTS('data/persistent/iceberg/lineitem_iceberg', version="1", version_name_format='v%s%s.metadata.json'); ---- 1 3776207205136740581 2023-02-15 15:07:54.504 lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro query IIII -SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg', version='1'); +SELECT * FROM ICEBERG_SNAPSHOTS('data/persistent/iceberg/lineitem_iceberg', version='1'); ---- 1 3776207205136740581 2023-02-15 15:07:54.504 lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro statement error -SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg_nonexistent'); +SELECT * FROM ICEBERG_SNAPSHOTS('data/persistent/iceberg/lineitem_iceberg_nonexistent'); ---- IO Error: Failed to read iceberg table. No version was provided and no version-hint could be found, statement error -SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg_gz'); +SELECT * FROM ICEBERG_SNAPSHOTS('data/persistent/iceberg/lineitem_iceberg_gz'); ---- IO Error: Iceberg metadata file not found for table version '2' using 'none' compression and format(s): 'v%s%s.metadata.json,%s%s.metadata.json' query IIII -SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg_gz', metadata_compression_codec="gzip"); +SELECT * FROM ICEBERG_SNAPSHOTS('data/persistent/iceberg/lineitem_iceberg_gz', metadata_compression_codec="gzip"); ---- 0 4468019210336628573 2024-03-13 18:38:58.602 lineitem_iceberg_gz/metadata/snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro query IIII -SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg_gz', metadata_compression_codec="gzip", version='2'); +SELECT * FROM ICEBERG_SNAPSHOTS('data/persistent/iceberg/lineitem_iceberg_gz', metadata_compression_codec="gzip", version='2'); ---- 0 4468019210336628573 2024-03-13 18:38:58.602 lineitem_iceberg_gz/metadata/snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro @@ -62,24 +62,24 @@ statement ok SET unsafe_enable_version_guessing=true; query IIII -SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg_no_hint'); +SELECT * FROM ICEBERG_SNAPSHOTS('data/persistent/iceberg/lineitem_iceberg_no_hint'); ---- 1 3776207205136740581 2023-02-15 15:07:54.504 lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro 2 7635660646343998149 2023-02-15 15:08:14.73 lineitem_iceberg/metadata/snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro query IIII -SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg_no_hint', version='1'); +SELECT * FROM ICEBERG_SNAPSHOTS('data/persistent/iceberg/lineitem_iceberg_no_hint', version='1'); ---- 1 3776207205136740581 2023-02-15 15:07:54.504 lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro query IIII -SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg_no_hint', version="?"); +SELECT * FROM ICEBERG_SNAPSHOTS('data/persistent/iceberg/lineitem_iceberg_no_hint', version="?"); ---- 1 3776207205136740581 2023-02-15 15:07:54.504 lineitem_iceberg/metadata/snap-3776207205136740581-1-cf3d0be5-cf70-453d-ad8f-48fdc412e608.avro 2 7635660646343998149 2023-02-15 15:08:14.73 lineitem_iceberg/metadata/snap-7635660646343998149-1-10eaca8a-1e1c-421e-ad6d-b232e5ee23d3.avro query IIII -SELECT * FROM ICEBERG_SNAPSHOTS('data/iceberg/lineitem_iceberg_gz_no_hint', metadata_compression_codec="gzip"); +SELECT * FROM ICEBERG_SNAPSHOTS('data/persistent/iceberg/lineitem_iceberg_gz_no_hint', metadata_compression_codec="gzip"); ---- 0 4468019210336628573 2024-03-13 18:38:58.602 lineitem_iceberg_gz/metadata/snap-4468019210336628573-1-23f9dbea-1e7f-4694-a82c-dc3c9a94953e.avro