From aa5abb94774faa9a607361232c6e5784b5ac1744 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 30 Jan 2025 06:54:41 -0800 Subject: [PATCH] add test for conflicting column names --- datafusion/core/tests/sql/select.rs | 37 +++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/datafusion/core/tests/sql/select.rs b/datafusion/core/tests/sql/select.rs index b76259a29d153..c33304b48be8a 100644 --- a/datafusion/core/tests/sql/select.rs +++ b/datafusion/core/tests/sql/select.rs @@ -491,4 +491,41 @@ async fn test_select_system_column() { "+--------------+-----------+", ]; assert_batches_sorted_eq!(expected, &batchs); + + let batch = record_batch!( + ("other_id", UInt8, [1, 2, 3]), + ("bank_account", UInt64, [9, 10, 11]), + ("_row_id", UInt32, [10, 11, 12]) + ) + .unwrap(); + let schema = Schema::new(vec![ + Field::new("other_id", DataType::UInt8, true), + Field::new("bank_account", DataType::UInt64, true), + Field::new("_row_id", DataType::UInt32, true), // not a system column! + ]); + let batch = batch.with_schema(Arc::new(schema)).unwrap(); + let _ = ctx.register_batch("test2", batch); + + // Normally _row_id would be a name conflict + // But when it's a conflict between a metadata column and a non-metadata column, the non metadata column should be used + let select7 = + "SELECT id, other_id, _row_id FROM test INNER JOIN test2 ON id = other_id"; + let df = ctx.sql(select7).await.unwrap(); + let batchs = df.collect().await.unwrap(); + #[rustfmt::skip] + let expected = [ + "+----+----------+---------+", + "| id | other_id | _row_id |", + "+----+----------+---------+", + "| 1 | 1 | 10 |", + "| 2 | 2 | 11 |", + "| 3 | 3 | 12 |", + "+----+----------+---------+", + ]; + assert_batches_sorted_eq!(expected, &batchs); + + // Demonstrate that for other columns we get a conflict + let select7 = + "SELECT id, other_id, bank_account FROM test INNER JOIN test2 ON id = other_id"; + assert!(ctx.sql(select7).await.is_err()); }