From d4357ec42b7759402a60020266ca64508f8fa99a Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sat, 17 Feb 2024 11:15:43 -0800
Subject: [PATCH 1/4] Add more doc to InputOrderMode (#9255)

---
 datafusion/physical-plan/src/ordering.rs    | 11 +++++++----
 datafusion/physical-plan/src/windows/mod.rs |  8 ++++----
 2 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/datafusion/physical-plan/src/ordering.rs b/datafusion/physical-plan/src/ordering.rs
index 047f89eef193..8b596b9cb23e 100644
--- a/datafusion/physical-plan/src/ordering.rs
+++ b/datafusion/physical-plan/src/ordering.rs
@@ -28,11 +28,14 @@
 /// - A `PARTITION BY a, b` or a `PARTITION BY b, a` can use `Sorted` mode.
 ///
 /// ## Aggregations
-/// - A `GROUP BY b` clause can use `Linear` mode.
-/// - A `GROUP BY a, c` or a `GROUP BY BY c, a` can use
-///   `PartiallySorted([0])` or `PartiallySorted([1])` modes, respectively.
+/// - A `GROUP BY b` clause can use `Linear` mode, as the only one permutation `[b]`
+///   cannot satisfy the existing ordering.
+/// - A `GROUP BY a, c` or a `GROUP BY c, a` can use
+///   `PartiallySorted([0])` or `PartiallySorted([1])` modes, respectively, as
+///   the permutation `[a]` satisfies the existing ordering.
 ///   (The vector stores the index of `a` in the respective PARTITION BY expression.)
-/// - A `GROUP BY a, b` or a `GROUP BY b, a` can use `Sorted` mode.
+/// - A `GROUP BY a, b` or a `GROUP BY b, a` can use `Sorted` mode, as the
+///   full permutation `[a, b]` satisfies the existing ordering.
 ///
 /// Note these are the same examples as above, but with `GROUP BY` instead of
 /// `PARTITION BY` to make the examples easier to read.
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index 01818405b810..693d20e90a66 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -329,11 +329,11 @@ pub(crate) fn calc_requirements<
     (!sort_reqs.is_empty()).then_some(sort_reqs)
 }
 
-/// This function calculates the indices such that when partition by expressions reordered with this indices
+/// This function calculates the indices such that when partition by expressions reordered with the indices
 /// resulting expressions define a preset for existing ordering.
-// For instance, if input is ordered by a, b, c and PARTITION BY b, a is used
-// This vector will be [1, 0]. It means that when we iterate b,a columns with the order [1, 0]
-// resulting vector (a, b) is a preset of the existing ordering (a, b, c).
+/// For instance, if input is ordered by a, b, c and PARTITION BY b, a is used,
+/// this vector will be [1, 0]. It means that when we iterate b, a columns with the order [1, 0]
+/// resulting vector (a, b) is a preset of the existing ordering (a, b, c).
 pub(crate) fn get_ordered_partition_by_indices(
     partition_by_exprs: &[Arc<dyn PhysicalExpr>],
     input: &Arc<dyn ExecutionPlan>,

From 497cb9d46c6f68de6762998c241d0860072c7909 Mon Sep 17 00:00:00 2001
From: ZHENGLIN LI <63448884+ZhengLin-Li@users.noreply.github.com>
Date: Sun, 18 Feb 2024 19:34:49 -0600
Subject: [PATCH 2/4] fix: fix `GLIBC_2.35' not found error in ci (#9243)

---
 datafusion-cli/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion-cli/Dockerfile b/datafusion-cli/Dockerfile
index 67de1f0c8009..5ddedad2a6f4 100644
--- a/datafusion-cli/Dockerfile
+++ b/datafusion-cli/Dockerfile
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-FROM rust:1.72 as builder
+FROM rust:1.72-bullseye as builder
 
 COPY . /usr/src/arrow-datafusion
 COPY ./datafusion /usr/src/arrow-datafusion/datafusion

From 60ee91e2d0011eed4e150a1c2aff83e42087cba5 Mon Sep 17 00:00:00 2001
From: Trent Hauck <trent@trenthauck.com>
Date: Sun, 18 Feb 2024 22:58:43 -0800
Subject: [PATCH 3/4] fix: add `ArrayAndElementAndOptionalIndex` for proper
 casting in `array_position` (#9233)

* fix: use `array_and_element` for proper casting in array_position

* fix: fix typo

* feat: add ArrayAndElementAndOptionalIndex

* refactor: cleanup

* docs: add docs to enum variants

* doc: fix cargo doc formatting snafu

* test: add a couple of tests

* refactor: update names, early exit logic

* test: add null test for array_position
---
 datafusion/expr/src/built_in_function.rs      |  2 +-
 datafusion/expr/src/signature.rs              | 17 +++++++++-
 .../expr/src/type_coercion/functions.rs       | 32 +++++++++++++++++++
 datafusion/sqllogictest/test_files/array.slt  | 25 +++++++++++++++
 4 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs
index 09077a557a7f..f92ae87d6e6c 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -953,7 +953,7 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::ArrayNdims => Signature::any(1, self.volatility()),
             BuiltinScalarFunction::ArrayDistinct => Signature::any(1, self.volatility()),
             BuiltinScalarFunction::ArrayPosition => {
-                Signature::variadic_any(self.volatility())
+                Signature::array_and_element_and_optional_index(self.volatility())
             }
             BuiltinScalarFunction::ArrayPositions => {
                 Signature::array_and_element(self.volatility())
diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs
index 48f4c996cb5d..e8d9d8fb3966 100644
--- a/datafusion/expr/src/signature.rs
+++ b/datafusion/expr/src/signature.rs
@@ -91,7 +91,7 @@ pub enum TypeSignature {
     /// DataFusion attempts to coerce all argument types to match the first argument's type
     ///
     /// # Examples
-    /// Given types in signature should be coericible to the same final type.
+    /// Given types in signature should be coercible to the same final type.
     /// A function such as `make_array` is `VariadicEqual`.
     ///
     /// `make_array(i32, i64) -> make_array(i64, i64)`
@@ -132,7 +132,10 @@ pub enum ArrayFunctionSignature {
     /// The first argument should be non-list or list, and the second argument should be List/LargeList.
     /// The first argument's list dimension should be one dimension less than the second argument's list dimension.
     ElementAndArray,
+    /// Specialized Signature for Array functions of the form (List/LargeList, Index)
     ArrayAndIndex,
+    /// Specialized Signature for Array functions of the form (List/LargeList, Element, Optional Index)
+    ArrayAndElementAndOptionalIndex,
 }
 
 impl std::fmt::Display for ArrayFunctionSignature {
@@ -141,6 +144,9 @@ impl std::fmt::Display for ArrayFunctionSignature {
             ArrayFunctionSignature::ArrayAndElement => {
                 write!(f, "array, element")
             }
+            ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => {
+                write!(f, "array, element, [index]")
+            }
             ArrayFunctionSignature::ElementAndArray => {
                 write!(f, "element, array")
             }
@@ -292,6 +298,15 @@ impl Signature {
             volatility,
         }
     }
+    /// Specialized Signature for Array functions with an optional index
+    pub fn array_and_element_and_optional_index(volatility: Volatility) -> Self {
+        Signature {
+            type_signature: TypeSignature::ArraySignature(
+                ArrayFunctionSignature::ArrayAndElementAndOptionalIndex,
+            ),
+            volatility,
+        }
+    }
     /// Specialized Signature for ArrayPrepend and similar functions
     pub fn element_and_array(volatility: Volatility) -> Self {
         Signature {
diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs
index 806fdaaa5246..9cab04bc7605 100644
--- a/datafusion/expr/src/type_coercion/functions.rs
+++ b/datafusion/expr/src/type_coercion/functions.rs
@@ -130,6 +130,35 @@ fn get_valid_types(
             _ => Ok(vec![vec![]]),
         }
     }
+    fn array_element_and_optional_index(
+        current_types: &[DataType],
+    ) -> Result<Vec<Vec<DataType>>> {
+        // make sure there's 2 or 3 arguments
+        if !(current_types.len() == 2 || current_types.len() == 3) {
+            return Ok(vec![vec![]]);
+        }
+
+        let first_two_types = &current_types[0..2];
+        let mut valid_types = array_append_or_prepend_valid_types(first_two_types, true)?;
+
+        // Early return if there are only 2 arguments
+        if current_types.len() == 2 {
+            return Ok(valid_types);
+        }
+
+        let valid_types_with_index = valid_types
+            .iter()
+            .map(|t| {
+                let mut t = t.clone();
+                t.push(DataType::Int64);
+                t
+            })
+            .collect::<Vec<_>>();
+
+        valid_types.extend(valid_types_with_index);
+
+        Ok(valid_types)
+    }
     fn array_and_index(current_types: &[DataType]) -> Result<Vec<Vec<DataType>>> {
         if current_types.len() != 2 {
             return Ok(vec![vec![]]);
@@ -184,6 +213,9 @@ fn get_valid_types(
             ArrayFunctionSignature::ArrayAndElement => {
                 return array_append_or_prepend_valid_types(current_types, true)
             }
+            ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => {
+                return array_element_and_optional_index(current_types)
+            }
             ArrayFunctionSignature::ArrayAndIndex => {
                 return array_and_index(current_types)
             }
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index 286cc1a30ca6..88bae310fbe5 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -2603,6 +2603,31 @@ select array_position(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4,
 ----
 2 2
 
+query I
+SELECT array_position(arrow_cast([5, 2, 3, 4, 5], 'List(Int32)'), 5)
+----
+1
+
+query I
+SELECT array_position(arrow_cast([5, 2, 3, 4, 5], 'List(Int32)'), 5, 2)
+----
+5
+
+query I
+SELECT array_position(arrow_cast([1, 1, 100, 1, 1], 'LargeList(Int32)'), 100)
+----
+3
+
+query I
+SELECT array_position([1, 2, 3], 'foo')
+----
+NULL
+
+query I
+SELECT array_position([1, 2, 3], 'foo', 2)
+----
+NULL
+
 # list_position scalar function #5 (function alias `array_position`)
 query III
 select list_position(['h', 'e', 'l', 'l', 'o'], 'l'), list_position([1, 2, 3, 4, 5], 5), list_position([1, 1, 1], 1);

From b2a04519da97c2ff81789ef41dd652870794a73a Mon Sep 17 00:00:00 2001
From: Devin D'Angelo <devinjdangelo@gmail.com>
Date: Mon, 19 Feb 2024 02:32:27 -0500
Subject: [PATCH 4/4] Support Copy To Partitioned Files (#9240)

* support copy to partitioned files

* remove print statements

* fmt

* fix tests and use err macro

* cargo doc fix

* add partition directory specific test

* try to support columns with single quotes in name
---
 datafusion/common/src/file_options/mod.rs     | 25 ++++++
 datafusion/core/src/dataframe/mod.rs          | 12 +++
 datafusion/core/src/dataframe/parquet.rs      |  1 +
 .../src/datasource/file_format/write/demux.rs | 28 ++++---
 datafusion/core/src/physical_planner.rs       | 10 ++-
 datafusion/expr/src/logical_plan/builder.rs   |  2 +
 datafusion/expr/src/logical_plan/dml.rs       |  2 +
 datafusion/expr/src/logical_plan/plan.rs      |  4 +-
 datafusion/proto/src/logical_plan/mod.rs      |  2 +
 .../tests/cases/roundtrip_logical_plan.rs     |  4 +
 datafusion/sql/src/statement.rs               |  2 +
 datafusion/sqllogictest/test_files/copy.slt   | 84 +++++++++++++++++++
 12 files changed, 163 insertions(+), 13 deletions(-)

diff --git a/datafusion/common/src/file_options/mod.rs b/datafusion/common/src/file_options/mod.rs
index 1d661b17eb1c..3a48f188fb97 100644
--- a/datafusion/common/src/file_options/mod.rs
+++ b/datafusion/common/src/file_options/mod.rs
@@ -97,6 +97,31 @@ impl StatementOptions {
         maybe_option.map(|(_, v)| v)
     }
 
+    /// Finds partition_by option if exists and parses into a `Vec<String>`.
+    /// If option doesn't exist, returns empty `vec![]`.
+    /// E.g. (partition_by 'colA, colB, colC') -> `vec!['colA','colB','colC']`
+    pub fn take_partition_by(&mut self) -> Vec<String> {
+        let partition_by = self.take_str_option("partition_by");
+        match partition_by {
+            Some(part_cols) => {
+                let dequoted = part_cols
+                    .chars()
+                    .enumerate()
+                    .filter(|(idx, c)| {
+                        !((*idx == 0 || *idx == part_cols.len() - 1)
+                            && (*c == '\'' || *c == '"'))
+                    })
+                    .map(|(_idx, c)| c)
+                    .collect::<String>();
+                dequoted
+                    .split(',')
+                    .map(|s| s.trim().replace("''", "'"))
+                    .collect::<Vec<_>>()
+            }
+            None => vec![],
+        }
+    }
+
     /// Infers the file_type given a target and arbitrary options.
     /// If the options contain an explicit "format" option, that will be used.
     /// Otherwise, attempt to infer file_type from the extension of target.
diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index 237f14d2c046..81247908dfe1 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -73,6 +73,9 @@ pub struct DataFrameWriteOptions {
     /// Allows compression of CSV and JSON.
     /// Not supported for parquet.
     compression: CompressionTypeVariant,
+    /// Sets which columns should be used for hive-style partitioned writes by name.
+    /// Can be set to empty vec![] for non-partitioned writes.
+    partition_by: Vec<String>,
 }
 
 impl DataFrameWriteOptions {
@@ -82,6 +85,7 @@ impl DataFrameWriteOptions {
             overwrite: false,
             single_file_output: false,
             compression: CompressionTypeVariant::UNCOMPRESSED,
+            partition_by: vec![],
         }
     }
     /// Set the overwrite option to true or false
@@ -101,6 +105,12 @@ impl DataFrameWriteOptions {
         self.compression = compression;
         self
     }
+
+    /// Sets the partition_by columns for output partitioning
+    pub fn with_partition_by(mut self, partition_by: Vec<String>) -> Self {
+        self.partition_by = partition_by;
+        self
+    }
 }
 
 impl Default for DataFrameWriteOptions {
@@ -1176,6 +1186,7 @@ impl DataFrame {
             self.plan,
             path.into(),
             FileType::CSV,
+            options.partition_by,
             copy_options,
         )?
         .build()?;
@@ -1219,6 +1230,7 @@ impl DataFrame {
             self.plan,
             path.into(),
             FileType::JSON,
+            options.partition_by,
             copy_options,
         )?
         .build()?;
diff --git a/datafusion/core/src/dataframe/parquet.rs b/datafusion/core/src/dataframe/parquet.rs
index 00a0e780d51f..184d3c8cb25a 100644
--- a/datafusion/core/src/dataframe/parquet.rs
+++ b/datafusion/core/src/dataframe/parquet.rs
@@ -73,6 +73,7 @@ impl DataFrame {
             self.plan,
             path.into(),
             FileType::PARQUET,
+            options.partition_by,
             copy_options,
         )?
         .build()?;
diff --git a/datafusion/core/src/datasource/file_format/write/demux.rs b/datafusion/core/src/datasource/file_format/write/demux.rs
index 94d915827e4f..1f7c243e980d 100644
--- a/datafusion/core/src/datasource/file_format/write/demux.rs
+++ b/datafusion/core/src/datasource/file_format/write/demux.rs
@@ -32,7 +32,7 @@ use arrow_array::cast::AsArray;
 use arrow_array::{downcast_dictionary_array, RecordBatch, StringArray, StructArray};
 use arrow_schema::{DataType, Schema};
 use datafusion_common::cast::as_string_array;
-use datafusion_common::DataFusionError;
+use datafusion_common::{exec_datafusion_err, DataFusionError};
 
 use datafusion_execution::TaskContext;
 
@@ -319,14 +319,20 @@ fn compute_partition_keys_by_row<'a>(
 ) -> Result<Vec<Vec<&'a str>>> {
     let mut all_partition_values = vec![];
 
-    for (col, dtype) in partition_by.iter() {
+    // For the purposes of writing partitioned data, we can rely on schema inference
+    // to determine the type of the partition cols in order to provide a more ergonomic
+    // UI which does not require specifying DataTypes manually. So, we ignore the
+    // DataType within the partition_by array and infer the correct type from the
+    // batch schema instead.
+    let schema = rb.schema();
+    for (col, _) in partition_by.iter() {
         let mut partition_values = vec![];
-        let col_array =
-            rb.column_by_name(col)
-                .ok_or(DataFusionError::Execution(format!(
-                    "PartitionBy Column {} does not exist in source data!",
-                    col
-                )))?;
+
+        let dtype = schema.field_with_name(col)?.data_type();
+        let col_array = rb.column_by_name(col).ok_or(exec_datafusion_err!(
+            "PartitionBy Column {} does not exist in source data! Got schema {schema}.",
+            col
+        ))?;
 
         match dtype {
             DataType::Utf8 => {
@@ -339,12 +345,12 @@ fn compute_partition_keys_by_row<'a>(
                 downcast_dictionary_array!(
                     col_array =>  {
                         let array = col_array.downcast_dict::<StringArray>()
-                            .ok_or(DataFusionError::Execution(format!("it is not yet supported to write to hive partitions with datatype {}",
-                            dtype)))?;
+                            .ok_or(exec_datafusion_err!("it is not yet supported to write to hive partitions with datatype {}",
+                            dtype))?;
 
                         for val in array.values() {
                             partition_values.push(
-                                val.ok_or(DataFusionError::Execution(format!("Cannot partition by null value for column {}", col)))?
+                                val.ok_or(exec_datafusion_err!("Cannot partition by null value for column {}", col))?
                             );
                         }
                     },
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 463d0cde8282..dabf0a91b2d3 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -568,6 +568,7 @@ impl DefaultPhysicalPlanner {
                     output_url,
                     file_format,
                     copy_options,
+                    partition_by,
                 }) => {
                     let input_exec = self.create_initial_plan(input, session_state).await?;
                     let parsed_url = ListingTableUrl::parse(output_url)?;
@@ -585,13 +586,20 @@ impl DefaultPhysicalPlanner {
                         CopyOptions::WriterOptions(writer_options) => *writer_options.clone()
                     };
 
+                    // Note: the DataType passed here is ignored for the purposes of writing and inferred instead
+                    // from the schema of the RecordBatch being written. This allows COPY statements to specify only
+                    // the column name rather than column name + explicit data type.
+                    let table_partition_cols = partition_by.iter()
+                        .map(|s| (s.to_string(), arrow_schema::DataType::Null))
+                        .collect::<Vec<_>>();
+
                     // Set file sink related options
                     let config = FileSinkConfig {
                         object_store_url,
                         table_paths: vec![parsed_url],
                         file_groups: vec![],
                         output_schema: Arc::new(schema),
-                        table_partition_cols: vec![],
+                        table_partition_cols,
                         overwrite: false,
                         file_type_writer_options
                     };
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index 39df96d61f45..0662396f611b 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -263,12 +263,14 @@ impl LogicalPlanBuilder {
         input: LogicalPlan,
         output_url: String,
         file_format: FileType,
+        partition_by: Vec<String>,
         copy_options: CopyOptions,
     ) -> Result<Self> {
         Ok(Self::from(LogicalPlan::Copy(CopyTo {
             input: Arc::new(input),
             output_url,
             file_format,
+            partition_by,
             copy_options,
         })))
     }
diff --git a/datafusion/expr/src/logical_plan/dml.rs b/datafusion/expr/src/logical_plan/dml.rs
index 794c64998935..a55781eda643 100644
--- a/datafusion/expr/src/logical_plan/dml.rs
+++ b/datafusion/expr/src/logical_plan/dml.rs
@@ -36,6 +36,8 @@ pub struct CopyTo {
     pub output_url: String,
     /// The file format to output (explicitly defined or inferred from file extension)
     pub file_format: FileType,
+    /// Detmines which, if any, columns should be used for hive-style partitioned writes
+    pub partition_by: Vec<String>,
     /// Arbitrary options as tuples
     pub copy_options: CopyOptions,
 }
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index ba768cf3c6d6..aa5dff25efd8 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -615,12 +615,13 @@ impl LogicalPlan {
                 input: _,
                 output_url,
                 file_format,
+                partition_by,
                 copy_options,
             }) => Ok(LogicalPlan::Copy(CopyTo {
                 input: Arc::new(inputs.swap_remove(0)),
                 output_url: output_url.clone(),
                 file_format: file_format.clone(),
-
+                partition_by: partition_by.clone(),
                 copy_options: copy_options.clone(),
             })),
             LogicalPlan::Values(Values { schema, .. }) => {
@@ -1551,6 +1552,7 @@ impl LogicalPlan {
                         input: _,
                         output_url,
                         file_format,
+                        partition_by: _,
                         copy_options,
                     }) => {
                         let op_str = match copy_options {
diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs
index 7a6dab85de34..aaaf165e3276 100644
--- a/datafusion/proto/src/logical_plan/mod.rs
+++ b/datafusion/proto/src/logical_plan/mod.rs
@@ -918,6 +918,7 @@ impl AsLogicalPlan for LogicalPlanNode {
                         input: Arc::new(input),
                         output_url: copy.output_url.clone(),
                         file_format: FileType::from_str(&copy.file_type)?,
+                        partition_by: vec![],
                         copy_options,
                     },
                 ))
@@ -1641,6 +1642,7 @@ impl AsLogicalPlan for LogicalPlanNode {
                 output_url,
                 file_format,
                 copy_options,
+                partition_by: _,
             }) => {
                 let input = protobuf::LogicalPlanNode::try_from_logical_plan(
                     input,
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 68a318b5a6d5..81f59975476f 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -324,6 +324,7 @@ async fn roundtrip_logical_plan_copy_to_sql_options() -> Result<()> {
         input: Arc::new(input),
         output_url: "test.csv".to_string(),
         file_format: FileType::CSV,
+        partition_by: vec![],
         copy_options: CopyOptions::SQLOptions(StatementOptions::from(&options)),
     });
 
@@ -354,6 +355,7 @@ async fn roundtrip_logical_plan_copy_to_writer_options() -> Result<()> {
         input: Arc::new(input),
         output_url: "test.parquet".to_string(),
         file_format: FileType::PARQUET,
+        partition_by: vec![],
         copy_options: CopyOptions::WriterOptions(Box::new(
             FileTypeWriterOptions::Parquet(ParquetWriterOptions::new(writer_properties)),
         )),
@@ -402,6 +404,7 @@ async fn roundtrip_logical_plan_copy_to_arrow() -> Result<()> {
         input: Arc::new(input),
         output_url: "test.arrow".to_string(),
         file_format: FileType::ARROW,
+        partition_by: vec![],
         copy_options: CopyOptions::WriterOptions(Box::new(FileTypeWriterOptions::Arrow(
             ArrowWriterOptions::new(),
         ))),
@@ -447,6 +450,7 @@ async fn roundtrip_logical_plan_copy_to_csv() -> Result<()> {
         input: Arc::new(input),
         output_url: "test.csv".to_string(),
         file_format: FileType::CSV,
+        partition_by: vec![],
         copy_options: CopyOptions::WriterOptions(Box::new(FileTypeWriterOptions::CSV(
             CsvWriterOptions::new(
                 writer_properties,
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 47eca70ef3e2..bf15146a92f7 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -718,6 +718,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
 
         let mut statement_options = StatementOptions::new(options);
         let file_format = statement_options.try_infer_file_type(&statement.target)?;
+        let partition_by = statement_options.take_partition_by();
 
         let copy_options = CopyOptions::SQLOptions(statement_options);
 
@@ -725,6 +726,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             input: Arc::new(input),
             output_url: statement.target,
             file_format,
+            partition_by,
             copy_options,
         }))
     }
diff --git a/datafusion/sqllogictest/test_files/copy.slt b/datafusion/sqllogictest/test_files/copy.slt
index dd2ce16a632e..51b46d710bd8 100644
--- a/datafusion/sqllogictest/test_files/copy.slt
+++ b/datafusion/sqllogictest/test_files/copy.slt
@@ -25,6 +25,90 @@ COPY source_table TO 'test_files/scratch/copy/table/' (format parquet, compressi
 ----
 2
 
+# Copy to directory as partitioned files
+query IT
+COPY source_table TO 'test_files/scratch/copy/partitioned_table1/' (format parquet, compression 'zstd(10)', partition_by 'col2');
+----
+2
+
+# validate multiple partitioned parquet file output
+statement ok
+CREATE EXTERNAL TABLE validate_partitioned_parquet STORED AS PARQUET 
+LOCATION 'test_files/scratch/copy/partitioned_table1/' PARTITIONED BY (col2);
+
+query I?
+select * from validate_partitioned_parquet order by col1, col2;
+----
+1 Foo
+2 Bar
+
+# validate partition paths were actually generated
+statement ok
+CREATE EXTERNAL TABLE validate_partitioned_parquet_bar STORED AS PARQUET 
+LOCATION 'test_files/scratch/copy/partitioned_table1/col2=Bar';
+
+query I
+select * from validate_partitioned_parquet_bar order by col1;
+----
+2
+
+# Copy to directory as partitioned files
+query ITT
+COPY (values (1, 'a', 'x'), (2, 'b', 'y'), (3, 'c', 'z')) TO 'test_files/scratch/copy/partitioned_table2/' 
+(format parquet, compression 'zstd(10)', partition_by 'column2, column3');
+----
+3
+
+# validate multiple partitioned parquet file output
+statement ok
+CREATE EXTERNAL TABLE validate_partitioned_parquet2 STORED AS PARQUET 
+LOCATION 'test_files/scratch/copy/partitioned_table2/' PARTITIONED BY (column2, column3);
+
+query I??
+select * from validate_partitioned_parquet2 order by column1,column2,column3;
+----
+1 a x
+2 b y
+3 c z
+
+statement ok
+CREATE EXTERNAL TABLE validate_partitioned_parquet_a_x STORED AS PARQUET 
+LOCATION 'test_files/scratch/copy/partitioned_table2/column2=a/column3=x';
+
+query I
+select * from validate_partitioned_parquet_a_x order by column1;
+----
+1
+
+statement ok
+create table test ("'test'" varchar, "'test2'" varchar, "'test3'" varchar); 
+
+query TTT
+insert into test VALUES ('a', 'x', 'aa'), ('b','y', 'bb'), ('c', 'z', 'cc')
+----
+3
+
+query T
+select "'test'" from test
+----
+a
+b
+c
+
+# Note to place a single ' inside of a literal string escape by putting two ''
+query TTT
+copy test to 'test_files/scratch/copy/escape_quote' (format csv, partition_by '''test2'',''test3''')
+----
+3
+
+statement ok
+CREATE EXTERNAL TABLE validate_partitioned_escape_quote STORED AS CSV 
+LOCATION 'test_files/scratch/copy/escape_quote/' PARTITIONED BY ("'test2'", "'test3'");
+
+# This triggers a panic (index out of bounds)
+#query
+#select * from validate_partitioned_escape_quote;
+
 query TT
 EXPLAIN COPY source_table TO 'test_files/scratch/copy/table/' (format parquet, compression 'zstd(10)');
 ----