Skip to content

Commit

Permalink
add more tests, especially tests taht test delete vectors
Browse files Browse the repository at this point in the history
  • Loading branch information
Tmonster committed Feb 24, 2025
1 parent 15a1e37 commit d71eb18
Show file tree
Hide file tree
Showing 49 changed files with 1,238 additions and 4 deletions.
4 changes: 0 additions & 4 deletions .github/workflows/Rest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,6 @@ jobs:
run: |
make release
- name: Start Rest Catalog
run: |
make start-rest-catalog
- name: Generate data
run: |
make data
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
CREATE or REPLACE TABLE iceberg_catalog.lineitem_001_deletes
TBLPROPERTIES (
'format-version'='2',
'write.update.mode'='merge-on-read'
)
AS SELECT * FROM parquet_file_view;
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
update iceberg_catalog.lineitem_001_deletes
set l_orderkey=NULL,
l_partkey=NULL,
l_suppkey=NULL,
l_linenumber=NULL,
l_quantity=NULL,
l_extendedprice=NULL,
l_discount=NULL,
l_shipdate=NULL,
l_comment=NULL
where l_partkey % 2 = 0;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import duckdb
import os

PARQUET_SRC_FILE = os.getenv('PARQUET_SRC_FILE')

duckdb_con = duckdb.connect()
duckdb_con.execute("call dbgen(sf=0.01)")
duckdb_con.execute(f"copy lineitem to '{PARQUET_SRC_FILE}' (FORMAT PARQUET)")
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
CREATE or REPLACE TABLE iceberg_catalog.lineitem_001_deletes
TBLPROPERTIES (
'format-version'='2',
'write.update.mode'='merge-on-read'
)
AS SELECT * FROM parquet_file_view;
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
update iceberg_catalog.lineitem_001_deletes
set l_orderkey=NULL,
l_partkey=NULL,
l_suppkey=NULL,
l_linenumber=NULL,
l_quantity=NULL,
l_extendedprice=NULL,
l_discount=NULL,
l_shipdate=NULL,
l_comment=NULL
where l_partkey % 2 = 0;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import duckdb
import os

PARQUET_SRC_FILE = os.getenv('PARQUET_SRC_FILE')

duckdb_con = duckdb.connect()
duckdb_con.execute("call dbgen(sf=0.01)")
duckdb_con.execute(f"copy lineitem to '{PARQUET_SRC_FILE}' (FORMAT PARQUET)")
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CREATE OR REPLACE TABLE iceberg_catalog.lineitem_partitioned_l_shipmode
USING iceberg
PARTITIONED BY (l_shipmode)
TBLPROPERTIES (
'format-version'='2',
'write.update.mode'='merge-on-read'
)
as select * from parquet_file_view;
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
delete from iceberg_catalog.lineitem_partitioned_l_shipmode where l_shipmode = 'TRUCK';
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import duckdb
import os

PARQUET_SRC_FILE = os.getenv('PARQUET_SRC_FILE')

duckdb_con = duckdb.connect()
duckdb_con.execute("call dbgen(sf=0.01)")
duckdb_con.execute(f"copy lineitem to '{PARQUET_SRC_FILE}' (FORMAT PARQUET)")
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CREATE OR REPLACE TABLE iceberg_catalog.lineitem_partitioned_l_shipmode_deletes
USING iceberg
PARTITIONED BY (l_shipmode)
TBLPROPERTIES (
'format-version'='2',
'write.update.mode'='merge-on-read'
)
as select * from parquet_file_view;
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
UPDATE iceberg_catalog.lineitem_partitioned_l_shipmode_deletes
Set l_comment=NULL,
l_quantity=NULL,
l_discount=NULL,
l_linestatus=NULL
where l_linenumber = 3 or l_linenumber = 4 or l_linenumber = 5;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import duckdb
import os

PARQUET_SRC_FILE = os.getenv('PARQUET_SRC_FILE')

duckdb_con = duckdb.connect()
duckdb_con.execute("call dbgen(sf=0.01)")
duckdb_con.execute(f"copy lineitem to '{PARQUET_SRC_FILE}' (FORMAT PARQUET)")
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
CREATE or REPLACE TABLE iceberg_catalog.lineitem_sf1_deletes
TBLPROPERTIES (
'format-version'='2',
'write.update.mode'='merge-on-read'
)
AS SELECT * FROM parquet_file_view;
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
update iceberg_catalog.lineitem_sf1_deletes
set l_orderkey=NULL,
l_partkey=NULL,
l_suppkey=NULL,
l_linenumber=NULL,
l_quantity=NULL,
l_extendedprice=NULL,
l_discount=NULL,
l_shipdate=NULL,
l_comment=NULL
where l_partkey % 2 = 0;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import duckdb
import os

PARQUET_SRC_FILE = os.getenv('PARQUET_SRC_FILE')

duckdb_con = duckdb.connect()
duckdb_con.execute("call dbgen(sf=1)")
duckdb_con.execute(f"copy lineitem to '{PARQUET_SRC_FILE}' (FORMAT PARQUET)")
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
CREATE or REPLACE TABLE iceberg_catalog.lineitem_sf_01_1_delete
TBLPROPERTIES (
'format-version'='2',
'write.update.mode'='merge-on-read'
)
AS SELECT * FROM parquet_file_view;
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
delete from iceberg_catalog.lineitem_sf_01_1_delete where l_orderkey=10053 and l_partkey = 77;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import duckdb
import os

PARQUET_SRC_FILE = os.getenv('PARQUET_SRC_FILE')

duckdb_con = duckdb.connect()
duckdb_con.execute("call dbgen(sf=0.01)")
duckdb_con.execute(f"copy lineitem to '{PARQUET_SRC_FILE}' (FORMAT PARQUET)")
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
CREATE or REPLACE TABLE iceberg_catalog.lineitem_sf_01_no_deletes
TBLPROPERTIES (
'format-version'='2',
'write.update.mode'='merge-on-read'
)
AS SELECT * FROM parquet_file_view;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import duckdb
import os

PARQUET_SRC_FILE = os.getenv('PARQUET_SRC_FILE')

duckdb_con = duckdb.connect()
duckdb_con.execute("call dbgen(sf=0.01)")
duckdb_con.execute(f"copy lineitem to '{PARQUET_SRC_FILE}' (FORMAT PARQUET)")
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
CREATE or REPLACE TABLE iceberg_catalog.table_with_deletes
TBLPROPERTIES (
'format-version'='2',
'write.update.mode'='merge-on-read'
)
AS SELECT * FROM parquet_file_view;
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
update iceberg_catalog.table_with_deletes
set l_orderkey=NULL,
l_partkey=NULL,
l_suppkey=NULL,
l_linenumber=NULL,
l_quantity=NULL,
l_extendedprice=NULL,
l_discount=NULL,
l_shipdate=NULL,
l_comment=NULL
where l_partkey % 2 = 0;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import duckdb
import os

PARQUET_SRC_FILE = os.getenv('PARQUET_SRC_FILE')

duckdb_con = duckdb.connect()
duckdb_con.execute("call dbgen(sf=0.01)")
duckdb_con.execute(f"copy lineitem to '{PARQUET_SRC_FILE}' (FORMAT PARQUET)")
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
CREATE or REPLACE TABLE default.lineitem_001_deletes
TBLPROPERTIES (
'format-version'='2',
'write.update.mode'='merge-on-read'
)
AS SELECT * FROM parquet_file_view;
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
update default.lineitem_001_deletes
set l_orderkey=NULL,
l_partkey=NULL,
l_suppkey=NULL,
l_linenumber=NULL,
l_quantity=NULL,
l_extendedprice=NULL,
l_discount=NULL,
l_shipdate=NULL,
l_comment=NULL
where l_partkey % 2 = 0;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import duckdb
import os

PARQUET_SRC_FILE = os.getenv('PARQUET_SRC_FILE')

duckdb_con = duckdb.connect()
duckdb_con.execute("call dbgen(sf=0.01)")
duckdb_con.execute(f"copy lineitem to '{PARQUET_SRC_FILE}' (FORMAT PARQUET)")
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CREATE OR REPLACE TABLE default.lineitem_partitioned_l_shipmode
USING iceberg
PARTITIONED BY (l_shipmode)
TBLPROPERTIES (
'format-version'='2',
'write.update.mode'='merge-on-read'
)
as select * from parquet_file_view;
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
delete from default.lineitem_partitioned_l_shipmode where l_shipmode = 'TRUCK';
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import duckdb
import os

PARQUET_SRC_FILE = os.getenv('PARQUET_SRC_FILE')

duckdb_con = duckdb.connect()
duckdb_con.execute("call dbgen(sf=0.01)")
duckdb_con.execute(f"copy lineitem to '{PARQUET_SRC_FILE}' (FORMAT PARQUET)")
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CREATE OR REPLACE TABLE default.lineitem_partitioned_l_shipmode_deletes
USING iceberg
PARTITIONED BY (l_shipmode)
TBLPROPERTIES (
'format-version'='2',
'write.update.mode'='merge-on-read'
)
as select * from parquet_file_view;
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
UPDATE default.lineitem_partitioned_l_shipmode_deletes
Set l_comment=NULL,
l_quantity=NULL,
l_discount=NULL,
l_linestatus=NULL
where l_linenumber = 3 or l_linenumber = 4 or l_linenumber = 5;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import duckdb
import os

PARQUET_SRC_FILE = os.getenv('PARQUET_SRC_FILE')

duckdb_con = duckdb.connect()
duckdb_con.execute("call dbgen(sf=0.01)")
duckdb_con.execute(f"copy lineitem to '{PARQUET_SRC_FILE}' (FORMAT PARQUET)")
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
CREATE or REPLACE TABLE default.lineitem_sf1_deletes
TBLPROPERTIES (
'format-version'='2',
'write.update.mode'='merge-on-read'
)
AS SELECT * FROM parquet_file_view;
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
update default.lineitem_sf1_deletes
set l_orderkey=NULL,
l_partkey=NULL,
l_suppkey=NULL,
l_linenumber=NULL,
l_quantity=NULL,
l_extendedprice=NULL,
l_discount=NULL,
l_shipdate=NULL,
l_comment=NULL
where l_partkey % 2 = 0;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import duckdb
import os

PARQUET_SRC_FILE = os.getenv('PARQUET_SRC_FILE')

duckdb_con = duckdb.connect()
duckdb_con.execute("call dbgen(sf=1)")
duckdb_con.execute(f"copy lineitem to '{PARQUET_SRC_FILE}' (FORMAT PARQUET)")
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
CREATE or REPLACE TABLE default.lineitem_sf_01_1_delete
TBLPROPERTIES (
'format-version'='2',
'write.update.mode'='merge-on-read'
)
AS SELECT * FROM parquet_file_view;
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
delete from default.lineitem_sf_01_1_delete where l_orderkey=10053 and l_partkey = 77;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import duckdb
import os

PARQUET_SRC_FILE = os.getenv('PARQUET_SRC_FILE')

duckdb_con = duckdb.connect()
duckdb_con.execute("call dbgen(sf=0.01)")
duckdb_con.execute(f"copy lineitem to '{PARQUET_SRC_FILE}' (FORMAT PARQUET)")
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
CREATE or REPLACE TABLE default.lineitem_sf_01_no_deletes
TBLPROPERTIES (
'format-version'='2',
'write.update.mode'='merge-on-read'
)
AS SELECT * FROM parquet_file_view;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import duckdb
import os

PARQUET_SRC_FILE = os.getenv('PARQUET_SRC_FILE')

duckdb_con = duckdb.connect()
duckdb_con.execute("call dbgen(sf=0.01)")
duckdb_con.execute(f"copy lineitem to '{PARQUET_SRC_FILE}' (FORMAT PARQUET)")
Loading

0 comments on commit d71eb18

Please sign in to comment.