From 355b76ddcccaaa30f3cf9b2fae6d9ea9eff98858 Mon Sep 17 00:00:00 2001
From: "xudong.w" <wxd963996380@gmail.com>
Date: Sat, 25 Jan 2025 19:26:25 +0800
Subject: [PATCH] Extract useful methods from sqllogictest bin (#14267)

---
 datafusion/sqllogictest/bin/sqllogictests.rs | 109 +++++++++----------
 datafusion/sqllogictest/src/lib.rs           |   3 +
 datafusion/sqllogictest/src/util.rs          | 108 ++++++++++++++++++
 3 files changed, 161 insertions(+), 59 deletions(-)
 create mode 100644 datafusion/sqllogictest/src/util.rs
diff --git a/datafusion/sqllogictest/bin/sqllogictests.rs b/datafusion/sqllogictest/bin/sqllogictests.rs
index c3e739d146c6..8739a208f239 100644
--- a/datafusion/sqllogictest/bin/sqllogictests.rs
+++ b/datafusion/sqllogictest/bin/sqllogictests.rs
@@ -15,8 +15,32 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use clap::Parser;
+use datafusion_common::instant::Instant;
+use datafusion_common::utils::get_available_parallelism;
+use datafusion_common::{exec_err, DataFusionError, Result};
+use datafusion_common_runtime::SpawnedTask;
+use datafusion_sqllogictest::{
+    df_value_validator, read_dir_recursive, setup_scratch_dir, value_normalizer,
+    DataFusion, TestContext,
+};
+use futures::stream::StreamExt;
+use indicatif::{
+    HumanDuration, MultiProgress, ProgressBar, ProgressDrawTarget, ProgressStyle,
+};
+use itertools::Itertools;
+use log::Level::Info;
+use log::{info, log_enabled};
+use sqllogictest::{
+    parse_file, strict_column_validator, AsyncDB, Condition, Normalizer, Record,
+    Validator,
+};
+
+#[cfg(feature = "postgres")]
+use crate::postgres_container::{
+    initialize_postgres_container, terminate_postgres_container,
+};
 use std::ffi::OsStr;
-use std::fs;
 use std::path::{Path, PathBuf};
 
 use clap::Parser;
@@ -40,39 +64,33 @@ pub fn main() -> Result<()> {
         .block_on(run_tests())
 }
 
-fn value_validator(actual: &[Vec<String>], expected: &[String]) -> bool {
-    let expected = expected
+fn sqlite_value_validator(
+    normalizer: Normalizer,
+    actual: &[Vec<String>],
+    expected: &[String],
+) -> bool {
+    let normalized_expected = expected.iter().map(normalizer).collect::<Vec<_>>();
+    let normalized_actual = actual
         .iter()
-        // Trailing whitespace from lines in SLT will typically be removed, but do not fail if it is not
-        // If particular test wants to cover trailing whitespace on a value,
-        // it should project additional non-whitespace column on the right.
-        .map(|s| s.trim_end().to_owned())
-        .collect::<Vec<_>>();
-    let actual = actual
-        .iter()
-        .map(|strs| strs.iter().join(" "))
-        // Editors do not preserve trailing whitespace, so expected may or may not lack it included
-        .map(|s| s.trim_end().to_owned())
-        .collect::<Vec<_>>();
-    actual == expected
-}
-
-/// Sets up an empty directory at test_files/scratch/<name>
-/// creating it if needed and clearing any file contents if it exists
-/// This allows tests for inserting to external tables or copy to
-/// to persist data to disk and have consistent state when running
-/// a new test
-fn setup_scratch_dir(name: &Path) -> Result<()> {
-    // go from copy.slt --> copy
-    let file_stem = name.file_stem().expect("File should have a stem");
-    let path = PathBuf::from("test_files").join("scratch").join(file_stem);
-
-    info!("Creating scratch dir in {path:?}");
-    if path.exists() {
-        fs::remove_dir_all(&path)?;
+        .map(|strs| strs.iter().map(normalizer).join(" "))
+        .collect_vec();
+
+    if log_enabled!(Info) && normalized_actual != normalized_expected {
+        info!("sqlite validation failed. actual vs expected:");
+        for i in 0..normalized_actual.len() {
+            info!("[{i}] {}<eol>", normalized_actual[i]);
+            info!(
+                "[{i}] {}<eol>",
+                if normalized_expected.len() >= i {
+                    &normalized_expected[i]
+                } else {
+                    "No more results"
+                }
+            );
+        }
     }
-    fs::create_dir_all(&path)?;
-    Ok(())
+
+    normalized_actual == normalized_expected
 }
 
 async fn run_tests() -> Result<()> {
@@ -275,33 +293,6 @@ fn read_test_files<'a>(
     ))
 }
 
-fn read_dir_recursive<P: AsRef<Path>>(path: P) -> Result<Vec<PathBuf>> {
-    let mut dst = vec![];
-    read_dir_recursive_impl(&mut dst, path.as_ref())?;
-    Ok(dst)
-}
-
-/// Append all paths recursively to dst
-fn read_dir_recursive_impl(dst: &mut Vec<PathBuf>, path: &Path) -> Result<()> {
-    let entries = fs::read_dir(path)
-        .map_err(|e| exec_datafusion_err!("Error reading directory {path:?}: {e}"))?;
-    for entry in entries {
-        let path = entry
-            .map_err(|e| {
-                exec_datafusion_err!("Error reading entry in directory {path:?}: {e}")
-            })?
-            .path();
-
-        if path.is_dir() {
-            read_dir_recursive_impl(dst, &path)?;
-        } else {
-            dst.push(path);
-        }
-    }
-
-    Ok(())
-}
-
 /// Parsed command line options
 ///
 /// This structure attempts to mimic the command line options of the built in rust test runner
diff --git a/datafusion/sqllogictest/src/lib.rs b/datafusion/sqllogictest/src/lib.rs
index 30a882011dd5..82f194321a8e 100644
--- a/datafusion/sqllogictest/src/lib.rs
+++ b/datafusion/sqllogictest/src/lib.rs
@@ -28,4 +28,7 @@ pub use engines::DataFusion;
 pub use engines::Postgres;
 
 mod test_context;
+mod util;
+
 pub use test_context::TestContext;
+pub use util::*;
diff --git a/datafusion/sqllogictest/src/util.rs b/datafusion/sqllogictest/src/util.rs
new file mode 100644
index 000000000000..1bdfdd03360f
--- /dev/null
+++ b/datafusion/sqllogictest/src/util.rs
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion_common::{exec_datafusion_err, Result};
+use itertools::Itertools;
+use log::Level::Warn;
+use log::{info, log_enabled, warn};
+use sqllogictest::Normalizer;
+use std::fs;
+use std::path::{Path, PathBuf};
+
+/// Sets up an empty directory at `test_files/scratch/<name>`
+/// creating it if needed and clearing any file contents if it exists
+/// This allows tests for inserting to external tables or copy to
+/// persist data to disk and have consistent state when running
+/// a new test
+pub fn setup_scratch_dir(name: &Path) -> Result<()> {
+    // go from copy.slt --> copy
+    let file_stem = name.file_stem().expect("File should have a stem");
+    let path = PathBuf::from("test_files").join("scratch").join(file_stem);
+
+    info!("Creating scratch dir in {path:?}");
+    if path.exists() {
+        fs::remove_dir_all(&path)?;
+    }
+    fs::create_dir_all(&path)?;
+    Ok(())
+}
+
+/// Trailing whitespace from lines in SLT will typically be removed, but do not fail if it is not
+/// If particular test wants to cover trailing whitespace on a value,
+/// it should project additional non-whitespace column on the right.
+#[allow(clippy::ptr_arg)]
+pub fn value_normalizer(s: &String) -> String {
+    s.trim_end().to_string()
+}
+
+pub fn read_dir_recursive<P: AsRef<Path>>(path: P) -> Result<Vec<PathBuf>> {
+    let mut dst = vec![];
+    read_dir_recursive_impl(&mut dst, path.as_ref())?;
+    Ok(dst)
+}
+
+/// Append all paths recursively to dst
+fn read_dir_recursive_impl(dst: &mut Vec<PathBuf>, path: &Path) -> Result<()> {
+    let entries = fs::read_dir(path)
+        .map_err(|e| exec_datafusion_err!("Error reading directory {path:?}: {e}"))?;
+    for entry in entries {
+        let path = entry
+            .map_err(|e| {
+                exec_datafusion_err!("Error reading entry in directory {path:?}: {e}")
+            })?
+            .path();
+
+        if path.is_dir() {
+            read_dir_recursive_impl(dst, &path)?;
+        } else {
+            dst.push(path);
+        }
+    }
+
+    Ok(())
+}
+
+/// Validate the actual and expected values.
+pub fn df_value_validator(
+    normalizer: Normalizer,
+    actual: &[Vec<String>],
+    expected: &[String],
+) -> bool {
+    let normalized_expected = expected.iter().map(normalizer).collect::<Vec<_>>();
+    let normalized_actual = actual
+        .iter()
+        .map(|strs| strs.iter().join(" "))
+        .map(|str| str.trim_end().to_string())
+        .collect_vec();
+
+    if log_enabled!(Warn) && normalized_actual != normalized_expected {
+        warn!("df validation failed. actual vs expected:");
+        for i in 0..normalized_actual.len() {
+            warn!("[{i}] {}<eol>", normalized_actual[i]);
+            warn!(
+                "[{i}] {}<eol>",
+                if normalized_expected.len() >= i {
+                    &normalized_expected[i]
+                } else {
+                    "No more results"
+                }
+            );
+        }
+    }
+
+    normalized_actual == normalized_expected
+}