Skip to content

Commit

Permalink
feat: add hint for missing fields
Browse files Browse the repository at this point in the history
  • Loading branch information
Lordworms committed Feb 6, 2025
1 parent 5239d1a commit 527fbd1
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 6 deletions.
2 changes: 1 addition & 1 deletion datafusion/common/src/dfschema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1069,7 +1069,7 @@ mod tests {
Column names are case sensitive. \
You can use double quotes to refer to the \"\"t1.c0\"\" column \
or set the datafusion.sql_parser.enable_ident_normalization configuration. \
Valid fields are t1.c0, t1.c1.";
Did you mean 't1.c0'?.";
assert_eq!(err.strip_backtrace(), expected);
Ok(())
}
Expand Down
16 changes: 15 additions & 1 deletion datafusion/common/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ use std::io;
use std::result;
use std::sync::Arc;

use crate::utils::datafusion_strsim::normalized_levenshtein;
use crate::utils::quote_identifier;
use crate::{Column, DFSchema, Diagnostic, TableReference};
#[cfg(feature = "avro")]
Expand Down Expand Up @@ -176,6 +177,11 @@ impl Display for SchemaError {
.iter()
.map(|column| column.flat_name().to_lowercase())
.collect::<Vec<String>>();

let valid_fields_names = valid_fields
.iter()
.map(|column| column.flat_name())
.collect::<Vec<String>>();
if lower_valid_fields.contains(&field.flat_name().to_lowercase()) {
write!(
f,
Expand All @@ -184,7 +190,15 @@ impl Display for SchemaError {
field.quoted_flat_name()
)?;
}
if !valid_fields.is_empty() {
let field_name = field.name();
if let Some(matched) = valid_fields_names
.iter()
.filter(|str| normalized_levenshtein(str, field_name) > 0.5)
.collect::<Vec<&String>>()
.first()
{
write!(f, ". Did you mean '{matched}'?")?;
} else if !valid_fields.is_empty() {
write!(
f,
". Valid fields are {}",
Expand Down
21 changes: 21 additions & 0 deletions datafusion/common/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -736,6 +736,27 @@ pub mod datafusion_strsim {
pub fn levenshtein(a: &str, b: &str) -> usize {
generic_levenshtein(&StringWrapper(a), &StringWrapper(b))
}

/// Calculates the normalized Levenshtein distance between two strings.
/// The normalized distance is a value between 0.0 and 1.0, where 1.0 indicates
/// that the strings are identical and 0.0 indicates no similarity.
///
/// ```
/// use datafusion_common::utils::datafusion_strsim::normalized_levenshtein;
///
/// assert!((normalized_levenshtein("kitten", "sitting") - 0.57142).abs() < 0.00001);
///
/// assert!(normalized_levenshtein("", "second").abs() < 0.00001);
///
/// assert!((normalized_levenshtein("kitten", "sitten") - 0.833).abs() < 0.001);
/// ```
pub fn normalized_levenshtein(a: &str, b: &str) -> f64 {
if a.is_empty() && b.is_empty() {
return 1.0;
}
1.0 - (levenshtein(a, b) as f64)
/ (a.chars().count().max(b.chars().count()) as f64)
}
}

/// Merges collections `first` and `second`, removes duplicates and sorts the
Expand Down
10 changes: 10 additions & 0 deletions datafusion/sqllogictest/test_files/errors.slt
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,13 @@ create table records (timestamp timestamp, value float) as values (
'2021-01-01 00:00:00', 1.0,
'2021-01-01 00:00:00', 2.0
);


statement ok
create table a(timestamp int, birthday int);

query error DataFusion error: Schema error: No field named timetamp\. Did you mean 'a\.timestamp'\?\.
select timetamp from a;

query error DataFusion error: Schema error: No field named dadsada\. Valid fields are a\.timestamp, a\.birthday\.
select dadsada from a;
8 changes: 4 additions & 4 deletions datafusion/sqllogictest/test_files/identifiers.slt
Original file line number Diff line number Diff line change
Expand Up @@ -90,16 +90,16 @@ drop table case_insensitive_test
statement ok
CREATE TABLE test("Column1" string) AS VALUES ('content1');

statement error DataFusion error: Schema error: No field named column1. Valid fields are test\."Column1"\.
statement error DataFusion error: Schema error: No field named column1\. Valid fields are test\."Column1"\.
SELECT COLumn1 from test

statement error DataFusion error: Schema error: No field named column1. Valid fields are test\."Column1"\.
statement error DataFusion error: Schema error: No field named column1\. Valid fields are test\."Column1"\.
SELECT Column1 from test

statement error DataFusion error: Schema error: No field named column1. Valid fields are test\."Column1"\.
statement error DataFusion error: Schema error: No field named column1\. Valid fields are test\."Column1"\.
SELECT column1 from test

statement error DataFusion error: Schema error: No field named column1. Valid fields are test\."Column1"\.
statement error DataFusion error: Schema error: No field named column1\. Valid fields are test\."Column1"\.
SELECT "column1" from test

statement ok
Expand Down

0 comments on commit 527fbd1

Please sign in to comment.