From 355b76ddcccaaa30f3cf9b2fae6d9ea9eff98858 Mon Sep 17 00:00:00 2001 From: "xudong.w" Date: Sat, 25 Jan 2025 19:26:25 +0800 Subject: [PATCH] Extract useful methods from sqllogictest bin (#14267) --- datafusion/sqllogictest/bin/sqllogictests.rs | 109 +++++++++---------- datafusion/sqllogictest/src/lib.rs | 3 + datafusion/sqllogictest/src/util.rs | 108 ++++++++++++++++++ 3 files changed, 161 insertions(+), 59 deletions(-) create mode 100644 datafusion/sqllogictest/src/util.rs diff --git a/datafusion/sqllogictest/bin/sqllogictests.rs b/datafusion/sqllogictest/bin/sqllogictests.rs index c3e739d146c6..8739a208f239 100644 --- a/datafusion/sqllogictest/bin/sqllogictests.rs +++ b/datafusion/sqllogictest/bin/sqllogictests.rs @@ -15,8 +15,32 @@ // specific language governing permissions and limitations // under the License. +use clap::Parser; +use datafusion_common::instant::Instant; +use datafusion_common::utils::get_available_parallelism; +use datafusion_common::{exec_err, DataFusionError, Result}; +use datafusion_common_runtime::SpawnedTask; +use datafusion_sqllogictest::{ + df_value_validator, read_dir_recursive, setup_scratch_dir, value_normalizer, + DataFusion, TestContext, +}; +use futures::stream::StreamExt; +use indicatif::{ + HumanDuration, MultiProgress, ProgressBar, ProgressDrawTarget, ProgressStyle, +}; +use itertools::Itertools; +use log::Level::Info; +use log::{info, log_enabled}; +use sqllogictest::{ + parse_file, strict_column_validator, AsyncDB, Condition, Normalizer, Record, + Validator, +}; + +#[cfg(feature = "postgres")] +use crate::postgres_container::{ + initialize_postgres_container, terminate_postgres_container, +}; use std::ffi::OsStr; -use std::fs; use std::path::{Path, PathBuf}; use clap::Parser; @@ -40,39 +64,33 @@ pub fn main() -> Result<()> { .block_on(run_tests()) } -fn value_validator(actual: &[Vec], expected: &[String]) -> bool { - let expected = expected +fn sqlite_value_validator( + normalizer: Normalizer, + actual: &[Vec], + expected: &[String], +) -> bool { + let normalized_expected = expected.iter().map(normalizer).collect::>(); + let normalized_actual = actual .iter() - // Trailing whitespace from lines in SLT will typically be removed, but do not fail if it is not - // If particular test wants to cover trailing whitespace on a value, - // it should project additional non-whitespace column on the right. - .map(|s| s.trim_end().to_owned()) - .collect::>(); - let actual = actual - .iter() - .map(|strs| strs.iter().join(" ")) - // Editors do not preserve trailing whitespace, so expected may or may not lack it included - .map(|s| s.trim_end().to_owned()) - .collect::>(); - actual == expected -} - -/// Sets up an empty directory at test_files/scratch/ -/// creating it if needed and clearing any file contents if it exists -/// This allows tests for inserting to external tables or copy to -/// to persist data to disk and have consistent state when running -/// a new test -fn setup_scratch_dir(name: &Path) -> Result<()> { - // go from copy.slt --> copy - let file_stem = name.file_stem().expect("File should have a stem"); - let path = PathBuf::from("test_files").join("scratch").join(file_stem); - - info!("Creating scratch dir in {path:?}"); - if path.exists() { - fs::remove_dir_all(&path)?; + .map(|strs| strs.iter().map(normalizer).join(" ")) + .collect_vec(); + + if log_enabled!(Info) && normalized_actual != normalized_expected { + info!("sqlite validation failed. actual vs expected:"); + for i in 0..normalized_actual.len() { + info!("[{i}] {}", normalized_actual[i]); + info!( + "[{i}] {}", + if normalized_expected.len() >= i { + &normalized_expected[i] + } else { + "No more results" + } + ); + } } - fs::create_dir_all(&path)?; - Ok(()) + + normalized_actual == normalized_expected } async fn run_tests() -> Result<()> { @@ -275,33 +293,6 @@ fn read_test_files<'a>( )) } -fn read_dir_recursive>(path: P) -> Result> { - let mut dst = vec![]; - read_dir_recursive_impl(&mut dst, path.as_ref())?; - Ok(dst) -} - -/// Append all paths recursively to dst -fn read_dir_recursive_impl(dst: &mut Vec, path: &Path) -> Result<()> { - let entries = fs::read_dir(path) - .map_err(|e| exec_datafusion_err!("Error reading directory {path:?}: {e}"))?; - for entry in entries { - let path = entry - .map_err(|e| { - exec_datafusion_err!("Error reading entry in directory {path:?}: {e}") - })? - .path(); - - if path.is_dir() { - read_dir_recursive_impl(dst, &path)?; - } else { - dst.push(path); - } - } - - Ok(()) -} - /// Parsed command line options /// /// This structure attempts to mimic the command line options of the built in rust test runner diff --git a/datafusion/sqllogictest/src/lib.rs b/datafusion/sqllogictest/src/lib.rs index 30a882011dd5..82f194321a8e 100644 --- a/datafusion/sqllogictest/src/lib.rs +++ b/datafusion/sqllogictest/src/lib.rs @@ -28,4 +28,7 @@ pub use engines::DataFusion; pub use engines::Postgres; mod test_context; +mod util; + pub use test_context::TestContext; +pub use util::*; diff --git a/datafusion/sqllogictest/src/util.rs b/datafusion/sqllogictest/src/util.rs new file mode 100644 index 000000000000..1bdfdd03360f --- /dev/null +++ b/datafusion/sqllogictest/src/util.rs @@ -0,0 +1,108 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_common::{exec_datafusion_err, Result}; +use itertools::Itertools; +use log::Level::Warn; +use log::{info, log_enabled, warn}; +use sqllogictest::Normalizer; +use std::fs; +use std::path::{Path, PathBuf}; + +/// Sets up an empty directory at `test_files/scratch/` +/// creating it if needed and clearing any file contents if it exists +/// This allows tests for inserting to external tables or copy to +/// persist data to disk and have consistent state when running +/// a new test +pub fn setup_scratch_dir(name: &Path) -> Result<()> { + // go from copy.slt --> copy + let file_stem = name.file_stem().expect("File should have a stem"); + let path = PathBuf::from("test_files").join("scratch").join(file_stem); + + info!("Creating scratch dir in {path:?}"); + if path.exists() { + fs::remove_dir_all(&path)?; + } + fs::create_dir_all(&path)?; + Ok(()) +} + +/// Trailing whitespace from lines in SLT will typically be removed, but do not fail if it is not +/// If particular test wants to cover trailing whitespace on a value, +/// it should project additional non-whitespace column on the right. +#[allow(clippy::ptr_arg)] +pub fn value_normalizer(s: &String) -> String { + s.trim_end().to_string() +} + +pub fn read_dir_recursive>(path: P) -> Result> { + let mut dst = vec![]; + read_dir_recursive_impl(&mut dst, path.as_ref())?; + Ok(dst) +} + +/// Append all paths recursively to dst +fn read_dir_recursive_impl(dst: &mut Vec, path: &Path) -> Result<()> { + let entries = fs::read_dir(path) + .map_err(|e| exec_datafusion_err!("Error reading directory {path:?}: {e}"))?; + for entry in entries { + let path = entry + .map_err(|e| { + exec_datafusion_err!("Error reading entry in directory {path:?}: {e}") + })? + .path(); + + if path.is_dir() { + read_dir_recursive_impl(dst, &path)?; + } else { + dst.push(path); + } + } + + Ok(()) +} + +/// Validate the actual and expected values. +pub fn df_value_validator( + normalizer: Normalizer, + actual: &[Vec], + expected: &[String], +) -> bool { + let normalized_expected = expected.iter().map(normalizer).collect::>(); + let normalized_actual = actual + .iter() + .map(|strs| strs.iter().join(" ")) + .map(|str| str.trim_end().to_string()) + .collect_vec(); + + if log_enabled!(Warn) && normalized_actual != normalized_expected { + warn!("df validation failed. actual vs expected:"); + for i in 0..normalized_actual.len() { + warn!("[{i}] {}", normalized_actual[i]); + warn!( + "[{i}] {}", + if normalized_expected.len() >= i { + &normalized_expected[i] + } else { + "No more results" + } + ); + } + } + + normalized_actual == normalized_expected +}